Blame - api/audio_codecs/audio_decoder.h - src.git

blob: 21d60566f43e33d72ce6cdbef848e8d88ad4b8f3 [file] [log] [blame]

kwiberg	087bd34	2017-02-10 16:15:44	[diff] [blame]	1	/*
				2	* Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
				3	*
				4	* Use of this source code is governed by a BSD-style license
				5	* that can be found in the LICENSE file in the root of the source
				6	* tree. An additional intellectual property rights grant can be found
				7	* in the file PATENTS. All contributing project authors may
				8	* be found in the AUTHORS file in the root of the source tree.
				9	*/
				10
Mirko Bonadei	92ea95e	2017-09-15 04:47:31	[diff] [blame]	11	#ifndef API_AUDIO_CODECS_AUDIO_DECODER_H_
				12	#define API_AUDIO_CODECS_AUDIO_DECODER_H_
kwiberg	087bd34	2017-02-10 16:15:44	[diff] [blame]	13
Yves Gerey	988cc08	2018-10-23 10:03:01	[diff] [blame]	14	#include <stddef.h>
				15	#include <stdint.h>
Jonas Olsson	a4d8737	2019-07-05 17:08:33	[diff] [blame]	16
kwiberg	087bd34	2017-02-10 16:15:44	[diff] [blame]	17	#include <memory>
Florent Castelli	8037fc6	2024-08-29 13:00:40	[diff] [blame]	18	#include <optional>
kwiberg	087bd34	2017-02-10 16:15:44	[diff] [blame]	19	#include <vector>
				20
Mirko Bonadei	92ea95e	2017-09-15 04:47:31	[diff] [blame]	21	#include "api/array_view.h"
Mirko Bonadei	92ea95e	2017-09-15 04:47:31	[diff] [blame]	22	#include "rtc_base/buffer.h"
kwiberg	087bd34	2017-02-10 16:15:44	[diff] [blame]	23
				24	namespace webrtc {
				25
				26	class AudioDecoder {
				27	public:
				28	enum SpeechType {
				29	kSpeech = 1,
				30	kComfortNoise = 2,
				31	};
				32
				33	// Used by PacketDuration below. Save the value -1 for errors.
				34	enum { kNotImplemented = -2 };
				35
				36	AudioDecoder() = default;
				37	virtual ~AudioDecoder() = default;
				38
Byoungchan Lee	c065e73	2022-01-18 00:35:48	[diff] [blame]	39	AudioDecoder(const AudioDecoder&) = delete;
				40	AudioDecoder& operator=(const AudioDecoder&) = delete;
				41
kwiberg	087bd34	2017-02-10 16:15:44	[diff] [blame]	42	class EncodedAudioFrame {
				43	public:
				44	struct DecodeResult {
				45	size_t num_decoded_samples;
				46	SpeechType speech_type;
				47	};
				48
				49	virtual ~EncodedAudioFrame() = default;
				50
				51	// Returns the duration in samples-per-channel of this audio frame.
				52	// If no duration can be ascertained, returns zero.
				53	virtual size_t Duration() const = 0;
				54
Ivo Creusen	c7f09ad	2018-05-22 11:21:01	[diff] [blame]	55	// Returns true if this packet contains DTX.
				56	virtual bool IsDtxPacket() const;
				57
Artem Titov	0e61fdd	2021-07-25 19:50:14	[diff] [blame]	58	// Decodes this frame of audio and writes the result in `decoded`.
				59	// `decoded` must be large enough to store as many samples as indicated by a
Florent Castelli	8037fc6	2024-08-29 13:00:40	[diff] [blame]	60	// call to Duration() . On success, returns an std::optional containing the
kwiberg	087bd34	2017-02-10 16:15:44	[diff] [blame]	61	// total number of samples across all channels, as well as whether the
				62	// decoder produced comfort noise or speech. On failure, returns an empty
Florent Castelli	8037fc6	2024-08-29 13:00:40	[diff] [blame]	63	// std::optional. Decode may be called at most once per frame object.
				64	virtual std::optional<DecodeResult> Decode(
kwiberg	087bd34	2017-02-10 16:15:44	[diff] [blame]	65	rtc::ArrayView<int16_t> decoded) const = 0;
				66	};
				67
				68	struct ParseResult {
				69	ParseResult();
				70	ParseResult(uint32_t timestamp,
				71	int priority,
				72	std::unique_ptr<EncodedAudioFrame> frame);
				73	ParseResult(ParseResult&& b);
				74	~ParseResult();
				75
				76	ParseResult& operator=(ParseResult&& b);
				77
				78	// The timestamp of the frame is in samples per channel.
				79	uint32_t timestamp;
				80	// The relative priority of the frame compared to other frames of the same
				81	// payload and the same timeframe. A higher value means a lower priority.
				82	// The highest priority is zero - negative values are not allowed.
				83	int priority;
				84	std::unique_ptr<EncodedAudioFrame> frame;
				85	};
				86
				87	// Let the decoder parse this payload and prepare zero or more decodable
				88	// frames. Each frame must be between 10 ms and 120 ms long. The caller must
				89	// ensure that the AudioDecoder object outlives any frame objects returned by
Artem Titov	0e61fdd	2021-07-25 19:50:14	[diff] [blame]	90	// this call. The decoder is free to swap or move the data from the `payload`
				91	// buffer. `timestamp` is the input timestamp, in samples, corresponding to
kwiberg	087bd34	2017-02-10 16:15:44	[diff] [blame]	92	// the start of the payload.
				93	virtual std::vector<ParseResult> ParsePayload(rtc::Buffer&& payload,
				94	uint32_t timestamp);
				95
Niels Möller	b7180c0	2018-12-06 12:07:11	[diff] [blame]	96	// TODO(bugs.webrtc.org/10098): The Decode and DecodeRedundant methods are
				97	// obsolete; callers should call ParsePayload instead. For now, subclasses
				98	// must still implement DecodeInternal.
				99
Artem Titov	0e61fdd	2021-07-25 19:50:14	[diff] [blame]	100	// Decodes `encode_len` bytes from `encoded` and writes the result in
				101	// `decoded`. The maximum bytes allowed to be written into `decoded` is
				102	// `max_decoded_bytes`. Returns the total number of samples across all
				103	// channels. If the decoder produced comfort noise, `speech_type`
kwiberg	087bd34	2017-02-10 16:15:44	[diff] [blame]	104	// is set to kComfortNoise, otherwise it is kSpeech. The desired output
Artem Titov	0e61fdd	2021-07-25 19:50:14	[diff] [blame]	105	// sample rate is provided in `sample_rate_hz`, which must be valid for the
kwiberg	087bd34	2017-02-10 16:15:44	[diff] [blame]	106	// codec at hand.
				107	int Decode(const uint8_t* encoded,
				108	size_t encoded_len,
				109	int sample_rate_hz,
				110	size_t max_decoded_bytes,
				111	int16_t* decoded,
				112	SpeechType* speech_type);
				113
				114	// Same as Decode(), but interfaces to the decoders redundant decode function.
				115	// The default implementation simply calls the regular Decode() method.
				116	int DecodeRedundant(const uint8_t* encoded,
				117	size_t encoded_len,
				118	int sample_rate_hz,
				119	size_t max_decoded_bytes,
				120	int16_t* decoded,
				121	SpeechType* speech_type);
				122
				123	// Indicates if the decoder implements the DecodePlc method.
				124	virtual bool HasDecodePlc() const;
				125
				126	// Calls the packet-loss concealment of the decoder to update the state after
				127	// one or several lost packets. The caller has to make sure that the
Artem Titov	0e61fdd	2021-07-25 19:50:14	[diff] [blame]	128	// memory allocated in `decoded` should accommodate `num_frames` frames.
kwiberg	087bd34	2017-02-10 16:15:44	[diff] [blame]	129	virtual size_t DecodePlc(size_t num_frames, int16_t* decoded);
				130
Henrik Lundin	00eb12a	2018-09-05 16:14:52	[diff] [blame]	131	// Asks the decoder to generate packet-loss concealment and append it to the
Artem Titov	0e61fdd	2021-07-25 19:50:14	[diff] [blame]	132	// end of `concealment_audio`. The concealment audio should be in
Henrik Lundin	00eb12a	2018-09-05 16:14:52	[diff] [blame]	133	// channel-interleaved format, with as many channels as the last decoded
				134	// packet produced. The implementation must produce at least
				135	// requested_samples_per_channel, or nothing at all. This is a signal to the
				136	// caller to conceal the loss with other means. If the implementation provides
				137	// concealment samples, it is also responsible for "stitching" it together
				138	// with the decoded audio on either side of the concealment.
				139	// Note: The default implementation of GeneratePlc will be deleted soon. All
				140	// implementations must provide their own, which can be a simple as a no-op.
Pablo Barrera González	ff0e01f	2021-02-10 09:38:50	[diff] [blame]	141	// TODO(bugs.webrtc.org/9676): Remove default implementation.
Henrik Lundin	00eb12a	2018-09-05 16:14:52	[diff] [blame]	142	virtual void GeneratePlc(size_t requested_samples_per_channel,
				143	rtc::BufferT<int16_t>* concealment_audio);
				144
kwiberg	087bd34	2017-02-10 16:15:44	[diff] [blame]	145	// Resets the decoder state (empty buffers etc.).
				146	virtual void Reset() = 0;
				147
kwiberg	087bd34	2017-02-10 16:15:44	[diff] [blame]	148	// Returns the last error code from the decoder.
				149	virtual int ErrorCode();
				150
Artem Titov	0e61fdd	2021-07-25 19:50:14	[diff] [blame]	151	// Returns the duration in samples-per-channel of the payload in `encoded`
				152	// which is `encoded_len` bytes long. Returns kNotImplemented if no duration
kwiberg	087bd34	2017-02-10 16:15:44	[diff] [blame]	153	// estimate is available, or -1 in case of an error.
				154	virtual int PacketDuration(const uint8_t* encoded, size_t encoded_len) const;
				155
				156	// Returns the duration in samples-per-channel of the redandant payload in
Artem Titov	0e61fdd	2021-07-25 19:50:14	[diff] [blame]	157	// `encoded` which is `encoded_len` bytes long. Returns kNotImplemented if no
kwiberg	087bd34	2017-02-10 16:15:44	[diff] [blame]	158	// duration estimate is available, or -1 in case of an error.
				159	virtual int PacketDurationRedundant(const uint8_t* encoded,
				160	size_t encoded_len) const;
				161
				162	// Detects whether a packet has forward error correction. The packet is
Artem Titov	0e61fdd	2021-07-25 19:50:14	[diff] [blame]	163	// comprised of the samples in `encoded` which is `encoded_len` bytes long.
kwiberg	087bd34	2017-02-10 16:15:44	[diff] [blame]	164	// Returns true if the packet has FEC and false otherwise.
				165	virtual bool PacketHasFec(const uint8_t* encoded, size_t encoded_len) const;
				166
				167	// Returns the actual sample rate of the decoder's output. This value may not
				168	// change during the lifetime of the decoder.
				169	virtual int SampleRateHz() const = 0;
				170
				171	// The number of channels in the decoder's output. This value may not change
				172	// during the lifetime of the decoder.
				173	virtual size_t Channels() const = 0;
				174
Ivo Creusen	d823259	2021-11-16 15:11:28	[diff] [blame]	175	// The maximum number of audio channels supported by WebRTC decoders.
				176	static constexpr int kMaxNumberOfChannels = 24;
				177
kwiberg	087bd34	2017-02-10 16:15:44	[diff] [blame]	178	protected:
				179	static SpeechType ConvertSpeechType(int16_t type);
				180
				181	virtual int DecodeInternal(const uint8_t* encoded,
				182	size_t encoded_len,
				183	int sample_rate_hz,
				184	int16_t* decoded,
				185	SpeechType* speech_type) = 0;
				186
				187	virtual int DecodeRedundantInternal(const uint8_t* encoded,
				188	size_t encoded_len,
				189	int sample_rate_hz,
				190	int16_t* decoded,
				191	SpeechType* speech_type);
kwiberg	087bd34	2017-02-10 16:15:44	[diff] [blame]	192	};
				193
				194	} // namespace webrtc
Mirko Bonadei	92ea95e	2017-09-15 04:47:31	[diff] [blame]	195	#endif // API_AUDIO_CODECS_AUDIO_DECODER_H_