blob: 21d60566f43e33d72ce6cdbef848e8d88ad4b8f3 [file] [log] [blame]
kwiberg087bd342017-02-10 16:15:441/*
2 * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
3 *
4 * Use of this source code is governed by a BSD-style license
5 * that can be found in the LICENSE file in the root of the source
6 * tree. An additional intellectual property rights grant can be found
7 * in the file PATENTS. All contributing project authors may
8 * be found in the AUTHORS file in the root of the source tree.
9 */
10
Mirko Bonadei92ea95e2017-09-15 04:47:3111#ifndef API_AUDIO_CODECS_AUDIO_DECODER_H_
12#define API_AUDIO_CODECS_AUDIO_DECODER_H_
kwiberg087bd342017-02-10 16:15:4413
Yves Gerey988cc082018-10-23 10:03:0114#include <stddef.h>
15#include <stdint.h>
Jonas Olssona4d87372019-07-05 17:08:3316
kwiberg087bd342017-02-10 16:15:4417#include <memory>
Florent Castelli8037fc62024-08-29 13:00:4018#include <optional>
kwiberg087bd342017-02-10 16:15:4419#include <vector>
20
Mirko Bonadei92ea95e2017-09-15 04:47:3121#include "api/array_view.h"
Mirko Bonadei92ea95e2017-09-15 04:47:3122#include "rtc_base/buffer.h"
kwiberg087bd342017-02-10 16:15:4423
24namespace webrtc {
25
26class AudioDecoder {
27 public:
28 enum SpeechType {
29 kSpeech = 1,
30 kComfortNoise = 2,
31 };
32
33 // Used by PacketDuration below. Save the value -1 for errors.
34 enum { kNotImplemented = -2 };
35
36 AudioDecoder() = default;
37 virtual ~AudioDecoder() = default;
38
Byoungchan Leec065e732022-01-18 00:35:4839 AudioDecoder(const AudioDecoder&) = delete;
40 AudioDecoder& operator=(const AudioDecoder&) = delete;
41
kwiberg087bd342017-02-10 16:15:4442 class EncodedAudioFrame {
43 public:
44 struct DecodeResult {
45 size_t num_decoded_samples;
46 SpeechType speech_type;
47 };
48
49 virtual ~EncodedAudioFrame() = default;
50
51 // Returns the duration in samples-per-channel of this audio frame.
52 // If no duration can be ascertained, returns zero.
53 virtual size_t Duration() const = 0;
54
Ivo Creusenc7f09ad2018-05-22 11:21:0155 // Returns true if this packet contains DTX.
56 virtual bool IsDtxPacket() const;
57
Artem Titov0e61fdd2021-07-25 19:50:1458 // Decodes this frame of audio and writes the result in `decoded`.
59 // `decoded` must be large enough to store as many samples as indicated by a
Florent Castelli8037fc62024-08-29 13:00:4060 // call to Duration() . On success, returns an std::optional containing the
kwiberg087bd342017-02-10 16:15:4461 // total number of samples across all channels, as well as whether the
62 // decoder produced comfort noise or speech. On failure, returns an empty
Florent Castelli8037fc62024-08-29 13:00:4063 // std::optional. Decode may be called at most once per frame object.
64 virtual std::optional<DecodeResult> Decode(
kwiberg087bd342017-02-10 16:15:4465 rtc::ArrayView<int16_t> decoded) const = 0;
66 };
67
68 struct ParseResult {
69 ParseResult();
70 ParseResult(uint32_t timestamp,
71 int priority,
72 std::unique_ptr<EncodedAudioFrame> frame);
73 ParseResult(ParseResult&& b);
74 ~ParseResult();
75
76 ParseResult& operator=(ParseResult&& b);
77
78 // The timestamp of the frame is in samples per channel.
79 uint32_t timestamp;
80 // The relative priority of the frame compared to other frames of the same
81 // payload and the same timeframe. A higher value means a lower priority.
82 // The highest priority is zero - negative values are not allowed.
83 int priority;
84 std::unique_ptr<EncodedAudioFrame> frame;
85 };
86
87 // Let the decoder parse this payload and prepare zero or more decodable
88 // frames. Each frame must be between 10 ms and 120 ms long. The caller must
89 // ensure that the AudioDecoder object outlives any frame objects returned by
Artem Titov0e61fdd2021-07-25 19:50:1490 // this call. The decoder is free to swap or move the data from the `payload`
91 // buffer. `timestamp` is the input timestamp, in samples, corresponding to
kwiberg087bd342017-02-10 16:15:4492 // the start of the payload.
93 virtual std::vector<ParseResult> ParsePayload(rtc::Buffer&& payload,
94 uint32_t timestamp);
95
Niels Möllerb7180c02018-12-06 12:07:1196 // TODO(bugs.webrtc.org/10098): The Decode and DecodeRedundant methods are
97 // obsolete; callers should call ParsePayload instead. For now, subclasses
98 // must still implement DecodeInternal.
99
Artem Titov0e61fdd2021-07-25 19:50:14100 // Decodes `encode_len` bytes from `encoded` and writes the result in
101 // `decoded`. The maximum bytes allowed to be written into `decoded` is
102 // `max_decoded_bytes`. Returns the total number of samples across all
103 // channels. If the decoder produced comfort noise, `speech_type`
kwiberg087bd342017-02-10 16:15:44104 // is set to kComfortNoise, otherwise it is kSpeech. The desired output
Artem Titov0e61fdd2021-07-25 19:50:14105 // sample rate is provided in `sample_rate_hz`, which must be valid for the
kwiberg087bd342017-02-10 16:15:44106 // codec at hand.
107 int Decode(const uint8_t* encoded,
108 size_t encoded_len,
109 int sample_rate_hz,
110 size_t max_decoded_bytes,
111 int16_t* decoded,
112 SpeechType* speech_type);
113
114 // Same as Decode(), but interfaces to the decoders redundant decode function.
115 // The default implementation simply calls the regular Decode() method.
116 int DecodeRedundant(const uint8_t* encoded,
117 size_t encoded_len,
118 int sample_rate_hz,
119 size_t max_decoded_bytes,
120 int16_t* decoded,
121 SpeechType* speech_type);
122
123 // Indicates if the decoder implements the DecodePlc method.
124 virtual bool HasDecodePlc() const;
125
126 // Calls the packet-loss concealment of the decoder to update the state after
127 // one or several lost packets. The caller has to make sure that the
Artem Titov0e61fdd2021-07-25 19:50:14128 // memory allocated in `decoded` should accommodate `num_frames` frames.
kwiberg087bd342017-02-10 16:15:44129 virtual size_t DecodePlc(size_t num_frames, int16_t* decoded);
130
Henrik Lundin00eb12a2018-09-05 16:14:52131 // Asks the decoder to generate packet-loss concealment and append it to the
Artem Titov0e61fdd2021-07-25 19:50:14132 // end of `concealment_audio`. The concealment audio should be in
Henrik Lundin00eb12a2018-09-05 16:14:52133 // channel-interleaved format, with as many channels as the last decoded
134 // packet produced. The implementation must produce at least
135 // requested_samples_per_channel, or nothing at all. This is a signal to the
136 // caller to conceal the loss with other means. If the implementation provides
137 // concealment samples, it is also responsible for "stitching" it together
138 // with the decoded audio on either side of the concealment.
139 // Note: The default implementation of GeneratePlc will be deleted soon. All
140 // implementations must provide their own, which can be a simple as a no-op.
Pablo Barrera Gonzálezff0e01f2021-02-10 09:38:50141 // TODO(bugs.webrtc.org/9676): Remove default implementation.
Henrik Lundin00eb12a2018-09-05 16:14:52142 virtual void GeneratePlc(size_t requested_samples_per_channel,
143 rtc::BufferT<int16_t>* concealment_audio);
144
kwiberg087bd342017-02-10 16:15:44145 // Resets the decoder state (empty buffers etc.).
146 virtual void Reset() = 0;
147
kwiberg087bd342017-02-10 16:15:44148 // Returns the last error code from the decoder.
149 virtual int ErrorCode();
150
Artem Titov0e61fdd2021-07-25 19:50:14151 // Returns the duration in samples-per-channel of the payload in `encoded`
152 // which is `encoded_len` bytes long. Returns kNotImplemented if no duration
kwiberg087bd342017-02-10 16:15:44153 // estimate is available, or -1 in case of an error.
154 virtual int PacketDuration(const uint8_t* encoded, size_t encoded_len) const;
155
156 // Returns the duration in samples-per-channel of the redandant payload in
Artem Titov0e61fdd2021-07-25 19:50:14157 // `encoded` which is `encoded_len` bytes long. Returns kNotImplemented if no
kwiberg087bd342017-02-10 16:15:44158 // duration estimate is available, or -1 in case of an error.
159 virtual int PacketDurationRedundant(const uint8_t* encoded,
160 size_t encoded_len) const;
161
162 // Detects whether a packet has forward error correction. The packet is
Artem Titov0e61fdd2021-07-25 19:50:14163 // comprised of the samples in `encoded` which is `encoded_len` bytes long.
kwiberg087bd342017-02-10 16:15:44164 // Returns true if the packet has FEC and false otherwise.
165 virtual bool PacketHasFec(const uint8_t* encoded, size_t encoded_len) const;
166
167 // Returns the actual sample rate of the decoder's output. This value may not
168 // change during the lifetime of the decoder.
169 virtual int SampleRateHz() const = 0;
170
171 // The number of channels in the decoder's output. This value may not change
172 // during the lifetime of the decoder.
173 virtual size_t Channels() const = 0;
174
Ivo Creusend8232592021-11-16 15:11:28175 // The maximum number of audio channels supported by WebRTC decoders.
176 static constexpr int kMaxNumberOfChannels = 24;
177
kwiberg087bd342017-02-10 16:15:44178 protected:
179 static SpeechType ConvertSpeechType(int16_t type);
180
181 virtual int DecodeInternal(const uint8_t* encoded,
182 size_t encoded_len,
183 int sample_rate_hz,
184 int16_t* decoded,
185 SpeechType* speech_type) = 0;
186
187 virtual int DecodeRedundantInternal(const uint8_t* encoded,
188 size_t encoded_len,
189 int sample_rate_hz,
190 int16_t* decoded,
191 SpeechType* speech_type);
kwiberg087bd342017-02-10 16:15:44192};
193
194} // namespace webrtc
Mirko Bonadei92ea95e2017-09-15 04:47:31195#endif // API_AUDIO_CODECS_AUDIO_DECODER_H_