kwiberg | 087bd34 | 2017-02-10 16:15:44 | [diff] [blame] | 1 | /* |
| 2 | * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. |
| 3 | * |
| 4 | * Use of this source code is governed by a BSD-style license |
| 5 | * that can be found in the LICENSE file in the root of the source |
| 6 | * tree. An additional intellectual property rights grant can be found |
| 7 | * in the file PATENTS. All contributing project authors may |
| 8 | * be found in the AUTHORS file in the root of the source tree. |
| 9 | */ |
| 10 | |
Mirko Bonadei | 92ea95e | 2017-09-15 04:47:31 | [diff] [blame] | 11 | #ifndef API_AUDIO_CODECS_AUDIO_DECODER_H_ |
| 12 | #define API_AUDIO_CODECS_AUDIO_DECODER_H_ |
kwiberg | 087bd34 | 2017-02-10 16:15:44 | [diff] [blame] | 13 | |
Yves Gerey | 988cc08 | 2018-10-23 10:03:01 | [diff] [blame] | 14 | #include <stddef.h> |
| 15 | #include <stdint.h> |
Jonas Olsson | a4d8737 | 2019-07-05 17:08:33 | [diff] [blame] | 16 | |
kwiberg | 087bd34 | 2017-02-10 16:15:44 | [diff] [blame] | 17 | #include <memory> |
Florent Castelli | 8037fc6 | 2024-08-29 13:00:40 | [diff] [blame] | 18 | #include <optional> |
kwiberg | 087bd34 | 2017-02-10 16:15:44 | [diff] [blame] | 19 | #include <vector> |
| 20 | |
Mirko Bonadei | 92ea95e | 2017-09-15 04:47:31 | [diff] [blame] | 21 | #include "api/array_view.h" |
Mirko Bonadei | 92ea95e | 2017-09-15 04:47:31 | [diff] [blame] | 22 | #include "rtc_base/buffer.h" |
kwiberg | 087bd34 | 2017-02-10 16:15:44 | [diff] [blame] | 23 | |
| 24 | namespace webrtc { |
| 25 | |
| 26 | class AudioDecoder { |
| 27 | public: |
| 28 | enum SpeechType { |
| 29 | kSpeech = 1, |
| 30 | kComfortNoise = 2, |
| 31 | }; |
| 32 | |
| 33 | // Used by PacketDuration below. Save the value -1 for errors. |
| 34 | enum { kNotImplemented = -2 }; |
| 35 | |
| 36 | AudioDecoder() = default; |
| 37 | virtual ~AudioDecoder() = default; |
| 38 | |
Byoungchan Lee | c065e73 | 2022-01-18 00:35:48 | [diff] [blame] | 39 | AudioDecoder(const AudioDecoder&) = delete; |
| 40 | AudioDecoder& operator=(const AudioDecoder&) = delete; |
| 41 | |
kwiberg | 087bd34 | 2017-02-10 16:15:44 | [diff] [blame] | 42 | class EncodedAudioFrame { |
| 43 | public: |
| 44 | struct DecodeResult { |
| 45 | size_t num_decoded_samples; |
| 46 | SpeechType speech_type; |
| 47 | }; |
| 48 | |
| 49 | virtual ~EncodedAudioFrame() = default; |
| 50 | |
| 51 | // Returns the duration in samples-per-channel of this audio frame. |
| 52 | // If no duration can be ascertained, returns zero. |
| 53 | virtual size_t Duration() const = 0; |
| 54 | |
Ivo Creusen | c7f09ad | 2018-05-22 11:21:01 | [diff] [blame] | 55 | // Returns true if this packet contains DTX. |
| 56 | virtual bool IsDtxPacket() const; |
| 57 | |
Artem Titov | 0e61fdd | 2021-07-25 19:50:14 | [diff] [blame] | 58 | // Decodes this frame of audio and writes the result in `decoded`. |
| 59 | // `decoded` must be large enough to store as many samples as indicated by a |
Florent Castelli | 8037fc6 | 2024-08-29 13:00:40 | [diff] [blame] | 60 | // call to Duration() . On success, returns an std::optional containing the |
kwiberg | 087bd34 | 2017-02-10 16:15:44 | [diff] [blame] | 61 | // total number of samples across all channels, as well as whether the |
| 62 | // decoder produced comfort noise or speech. On failure, returns an empty |
Florent Castelli | 8037fc6 | 2024-08-29 13:00:40 | [diff] [blame] | 63 | // std::optional. Decode may be called at most once per frame object. |
| 64 | virtual std::optional<DecodeResult> Decode( |
kwiberg | 087bd34 | 2017-02-10 16:15:44 | [diff] [blame] | 65 | rtc::ArrayView<int16_t> decoded) const = 0; |
| 66 | }; |
| 67 | |
| 68 | struct ParseResult { |
| 69 | ParseResult(); |
| 70 | ParseResult(uint32_t timestamp, |
| 71 | int priority, |
| 72 | std::unique_ptr<EncodedAudioFrame> frame); |
| 73 | ParseResult(ParseResult&& b); |
| 74 | ~ParseResult(); |
| 75 | |
| 76 | ParseResult& operator=(ParseResult&& b); |
| 77 | |
| 78 | // The timestamp of the frame is in samples per channel. |
| 79 | uint32_t timestamp; |
| 80 | // The relative priority of the frame compared to other frames of the same |
| 81 | // payload and the same timeframe. A higher value means a lower priority. |
| 82 | // The highest priority is zero - negative values are not allowed. |
| 83 | int priority; |
| 84 | std::unique_ptr<EncodedAudioFrame> frame; |
| 85 | }; |
| 86 | |
| 87 | // Let the decoder parse this payload and prepare zero or more decodable |
| 88 | // frames. Each frame must be between 10 ms and 120 ms long. The caller must |
| 89 | // ensure that the AudioDecoder object outlives any frame objects returned by |
Artem Titov | 0e61fdd | 2021-07-25 19:50:14 | [diff] [blame] | 90 | // this call. The decoder is free to swap or move the data from the `payload` |
| 91 | // buffer. `timestamp` is the input timestamp, in samples, corresponding to |
kwiberg | 087bd34 | 2017-02-10 16:15:44 | [diff] [blame] | 92 | // the start of the payload. |
| 93 | virtual std::vector<ParseResult> ParsePayload(rtc::Buffer&& payload, |
| 94 | uint32_t timestamp); |
| 95 | |
Niels Möller | b7180c0 | 2018-12-06 12:07:11 | [diff] [blame] | 96 | // TODO(bugs.webrtc.org/10098): The Decode and DecodeRedundant methods are |
| 97 | // obsolete; callers should call ParsePayload instead. For now, subclasses |
| 98 | // must still implement DecodeInternal. |
| 99 | |
Artem Titov | 0e61fdd | 2021-07-25 19:50:14 | [diff] [blame] | 100 | // Decodes `encode_len` bytes from `encoded` and writes the result in |
| 101 | // `decoded`. The maximum bytes allowed to be written into `decoded` is |
| 102 | // `max_decoded_bytes`. Returns the total number of samples across all |
| 103 | // channels. If the decoder produced comfort noise, `speech_type` |
kwiberg | 087bd34 | 2017-02-10 16:15:44 | [diff] [blame] | 104 | // is set to kComfortNoise, otherwise it is kSpeech. The desired output |
Artem Titov | 0e61fdd | 2021-07-25 19:50:14 | [diff] [blame] | 105 | // sample rate is provided in `sample_rate_hz`, which must be valid for the |
kwiberg | 087bd34 | 2017-02-10 16:15:44 | [diff] [blame] | 106 | // codec at hand. |
| 107 | int Decode(const uint8_t* encoded, |
| 108 | size_t encoded_len, |
| 109 | int sample_rate_hz, |
| 110 | size_t max_decoded_bytes, |
| 111 | int16_t* decoded, |
| 112 | SpeechType* speech_type); |
| 113 | |
| 114 | // Same as Decode(), but interfaces to the decoders redundant decode function. |
| 115 | // The default implementation simply calls the regular Decode() method. |
| 116 | int DecodeRedundant(const uint8_t* encoded, |
| 117 | size_t encoded_len, |
| 118 | int sample_rate_hz, |
| 119 | size_t max_decoded_bytes, |
| 120 | int16_t* decoded, |
| 121 | SpeechType* speech_type); |
| 122 | |
| 123 | // Indicates if the decoder implements the DecodePlc method. |
| 124 | virtual bool HasDecodePlc() const; |
| 125 | |
| 126 | // Calls the packet-loss concealment of the decoder to update the state after |
| 127 | // one or several lost packets. The caller has to make sure that the |
Artem Titov | 0e61fdd | 2021-07-25 19:50:14 | [diff] [blame] | 128 | // memory allocated in `decoded` should accommodate `num_frames` frames. |
kwiberg | 087bd34 | 2017-02-10 16:15:44 | [diff] [blame] | 129 | virtual size_t DecodePlc(size_t num_frames, int16_t* decoded); |
| 130 | |
Henrik Lundin | 00eb12a | 2018-09-05 16:14:52 | [diff] [blame] | 131 | // Asks the decoder to generate packet-loss concealment and append it to the |
Artem Titov | 0e61fdd | 2021-07-25 19:50:14 | [diff] [blame] | 132 | // end of `concealment_audio`. The concealment audio should be in |
Henrik Lundin | 00eb12a | 2018-09-05 16:14:52 | [diff] [blame] | 133 | // channel-interleaved format, with as many channels as the last decoded |
| 134 | // packet produced. The implementation must produce at least |
| 135 | // requested_samples_per_channel, or nothing at all. This is a signal to the |
| 136 | // caller to conceal the loss with other means. If the implementation provides |
| 137 | // concealment samples, it is also responsible for "stitching" it together |
| 138 | // with the decoded audio on either side of the concealment. |
| 139 | // Note: The default implementation of GeneratePlc will be deleted soon. All |
| 140 | // implementations must provide their own, which can be a simple as a no-op. |
Pablo Barrera González | ff0e01f | 2021-02-10 09:38:50 | [diff] [blame] | 141 | // TODO(bugs.webrtc.org/9676): Remove default implementation. |
Henrik Lundin | 00eb12a | 2018-09-05 16:14:52 | [diff] [blame] | 142 | virtual void GeneratePlc(size_t requested_samples_per_channel, |
| 143 | rtc::BufferT<int16_t>* concealment_audio); |
| 144 | |
kwiberg | 087bd34 | 2017-02-10 16:15:44 | [diff] [blame] | 145 | // Resets the decoder state (empty buffers etc.). |
| 146 | virtual void Reset() = 0; |
| 147 | |
kwiberg | 087bd34 | 2017-02-10 16:15:44 | [diff] [blame] | 148 | // Returns the last error code from the decoder. |
| 149 | virtual int ErrorCode(); |
| 150 | |
Artem Titov | 0e61fdd | 2021-07-25 19:50:14 | [diff] [blame] | 151 | // Returns the duration in samples-per-channel of the payload in `encoded` |
| 152 | // which is `encoded_len` bytes long. Returns kNotImplemented if no duration |
kwiberg | 087bd34 | 2017-02-10 16:15:44 | [diff] [blame] | 153 | // estimate is available, or -1 in case of an error. |
| 154 | virtual int PacketDuration(const uint8_t* encoded, size_t encoded_len) const; |
| 155 | |
| 156 | // Returns the duration in samples-per-channel of the redandant payload in |
Artem Titov | 0e61fdd | 2021-07-25 19:50:14 | [diff] [blame] | 157 | // `encoded` which is `encoded_len` bytes long. Returns kNotImplemented if no |
kwiberg | 087bd34 | 2017-02-10 16:15:44 | [diff] [blame] | 158 | // duration estimate is available, or -1 in case of an error. |
| 159 | virtual int PacketDurationRedundant(const uint8_t* encoded, |
| 160 | size_t encoded_len) const; |
| 161 | |
| 162 | // Detects whether a packet has forward error correction. The packet is |
Artem Titov | 0e61fdd | 2021-07-25 19:50:14 | [diff] [blame] | 163 | // comprised of the samples in `encoded` which is `encoded_len` bytes long. |
kwiberg | 087bd34 | 2017-02-10 16:15:44 | [diff] [blame] | 164 | // Returns true if the packet has FEC and false otherwise. |
| 165 | virtual bool PacketHasFec(const uint8_t* encoded, size_t encoded_len) const; |
| 166 | |
| 167 | // Returns the actual sample rate of the decoder's output. This value may not |
| 168 | // change during the lifetime of the decoder. |
| 169 | virtual int SampleRateHz() const = 0; |
| 170 | |
| 171 | // The number of channels in the decoder's output. This value may not change |
| 172 | // during the lifetime of the decoder. |
| 173 | virtual size_t Channels() const = 0; |
| 174 | |
Ivo Creusen | d823259 | 2021-11-16 15:11:28 | [diff] [blame] | 175 | // The maximum number of audio channels supported by WebRTC decoders. |
| 176 | static constexpr int kMaxNumberOfChannels = 24; |
| 177 | |
kwiberg | 087bd34 | 2017-02-10 16:15:44 | [diff] [blame] | 178 | protected: |
| 179 | static SpeechType ConvertSpeechType(int16_t type); |
| 180 | |
| 181 | virtual int DecodeInternal(const uint8_t* encoded, |
| 182 | size_t encoded_len, |
| 183 | int sample_rate_hz, |
| 184 | int16_t* decoded, |
| 185 | SpeechType* speech_type) = 0; |
| 186 | |
| 187 | virtual int DecodeRedundantInternal(const uint8_t* encoded, |
| 188 | size_t encoded_len, |
| 189 | int sample_rate_hz, |
| 190 | int16_t* decoded, |
| 191 | SpeechType* speech_type); |
kwiberg | 087bd34 | 2017-02-10 16:15:44 | [diff] [blame] | 192 | }; |
| 193 | |
| 194 | } // namespace webrtc |
Mirko Bonadei | 92ea95e | 2017-09-15 04:47:31 | [diff] [blame] | 195 | #endif // API_AUDIO_CODECS_AUDIO_DECODER_H_ |