webrtc/modules/audio_coding/main/acm2/acm_receiver.h - src - Git at Google

 /*
  *  Copyright (c) 2013 The WebRTC project authors. All Rights Reserved.
  *
  *  Use of this source code is governed by a BSD-style license
  *  that can be found in the LICENSE file in the root of the source
  *  tree. An additional intellectual property rights grant can be found
  *  in the file PATENTS.  All contributing project authors may
  *  be found in the AUTHORS file in the root of the source tree.
  */

 #ifndef WEBRTC_MODULES_AUDIO_CODING_MAIN_ACM2_ACM_RECEIVER_H_
 #define WEBRTC_MODULES_AUDIO_CODING_MAIN_ACM2_ACM_RECEIVER_H_

 #include <vector>

 #include "webrtc/base/thread_annotations.h"
 #include "webrtc/common_audio/vad/include/webrtc_vad.h"
 #include "webrtc/engine_configurations.h"
 #include "webrtc/modules/audio_coding/main/interface/audio_coding_module.h"
 #include "webrtc/modules/audio_coding/main/acm2/acm_codec_database.h"
 #include "webrtc/modules/audio_coding/main/acm2/acm_resampler.h"
 #include "webrtc/modules/audio_coding/main/acm2/call_statistics.h"
 #include "webrtc/modules/audio_coding/main/acm2/initial_delay_manager.h"
 #include "webrtc/modules/audio_coding/neteq/interface/neteq.h"
 #include "webrtc/modules/interface/module_common_types.h"
 #include "webrtc/system_wrappers/interface/scoped_ptr.h"
 #include "webrtc/typedefs.h"

 namespace webrtc {

 struct CodecInst;
 class CriticalSectionWrapper;
 class NetEq;

 namespace acm2 {

 class Nack;

 class AcmReceiver {
  public:
   struct Decoder {
     bool registered;
     uint8_t payload_type;
     // This field is meaningful for codecs where both mono and
     // stereo versions are registered under the same ID.
     int channels;
   };

   // Constructor of the class
   explicit AcmReceiver(const AudioCodingModule::Config& config);

   // Destructor of the class.
   ~AcmReceiver();

   //
   // Inserts a payload with its associated RTP-header into NetEq.
   //
   // Input:
   //   - rtp_header           : RTP header for the incoming payload containing
   //                            information about payload type, sequence number,
   //                            timestamp, SSRC and marker bit.
   //   - incoming_payload     : Incoming audio payload.
   //   - length_payload       : Length of incoming audio payload in bytes.
   //
   // Return value             : 0 if OK.
   //                           <0 if NetEq returned an error.
   //
   int InsertPacket(const WebRtcRTPHeader& rtp_header,
                    const uint8_t* incoming_payload,
                    size_t length_payload);

   //
   // Asks NetEq for 10 milliseconds of decoded audio.
   //
   // Input:
   //   -desired_freq_hz       : specifies the sampling rate [Hz] of the output
   //                            audio. If set -1 indicates to resampling is
   //                            is required and the audio returned at the
   //                            sampling rate of the decoder.
   //
   // Output:
   //   -audio_frame           : an audio frame were output data and
   //                            associated parameters are written to.
   //
   // Return value             : 0 if OK.
   //                           -1 if NetEq returned an error.
   //
   int GetAudio(int desired_freq_hz, AudioFrame* audio_frame);

   //
   // Adds a new codec to the NetEq codec database.
   //
   // Input:
   //   - acm_codec_id        : ACM codec ID.
   //   - payload_type        : payload type.
   //   - audio_decoder       : pointer to a decoder object. If it is NULL
   //                           then NetEq will internally create the decoder
   //                           object. Otherwise, NetEq will store this pointer
   //                           as the decoder corresponding with the given
   //                           payload type. NetEq won't acquire the ownership
   //                           of this pointer. It is up to the client of this
   //                           class (ACM) to delete it. By providing
   //                           |audio_decoder| ACM will have control over the
   //                           decoder instance of the codec. This is essential
   //                           for a codec like iSAC which encoder/decoder
   //                           encoder has to know about decoder (bandwidth
   //                           estimator that is updated at decoding time).
   //
   // Return value             : 0 if OK.
   //                           <0 if NetEq returned an error.
   //
   int AddCodec(int acm_codec_id,
                uint8_t payload_type,
                int channels,
                AudioDecoder* audio_decoder);

   //
   // Sets a minimum delay for packet buffer. The given delay is maintained,
   // unless channel condition dictates a higher delay.
   //
   // Input:
   //   - delay_ms             : minimum delay in milliseconds.
   //
   // Return value             : 0 if OK.
   //                           <0 if NetEq returned an error.
   //
   int SetMinimumDelay(int delay_ms);

   //
   // Sets a maximum delay [ms] for the packet buffer. The target delay does not
   // exceed the given value, even if channel condition requires so.
   //
   // Input:
   //   - delay_ms             : maximum delay in milliseconds.
   //
   // Return value             : 0 if OK.
   //                           <0 if NetEq returned an error.
   //
   int SetMaximumDelay(int delay_ms);

   //
   // Get least required delay computed based on channel conditions. Note that
   // this is before applying any user-defined limits (specified by calling
   // (SetMinimumDelay() and/or SetMaximumDelay()).
   //
   int LeastRequiredDelayMs() const;

   //
   // Sets an initial delay of |delay_ms| milliseconds. This introduces a playout
   // delay. Silence (zero signal) is played out until equivalent of |delay_ms|
   // millisecond of audio is buffered. Then, NetEq maintains the delay.
   //
   // Input:
   //   - delay_ms             : initial delay in milliseconds.
   //
   // Return value             : 0 if OK.
   //                           <0 if NetEq returned an error.
   //
   int SetInitialDelay(int delay_ms);

   //
   // Resets the initial delay to zero.
   //
   void ResetInitialDelay();

   //
   // Get the current sampling frequency in Hz.
   //
   // Return value             : Sampling frequency in Hz.
   //
   int current_sample_rate_hz() const;

   //
   // Sets the playout mode.
   //
   // Input:
   //   - mode                 : an enumerator specifying the playout mode.
   //
   void SetPlayoutMode(AudioPlayoutMode mode);

   //
   // Get the current playout mode.
   //
   // Return value             : The current playout mode.
   //
   AudioPlayoutMode PlayoutMode() const;

   //
   // Get the current network statistics from NetEq.
   //
   // Output:
   //   - statistics           : The current network statistics.
   //
   void NetworkStatistics(ACMNetworkStatistics* statistics);

   //
   // Enable post-decoding VAD.
   //
   void EnableVad();

   //
   // Disable post-decoding VAD.
   //
   void DisableVad();

   //
   // Returns whether post-decoding VAD is enabled (true) or disabled (false).
   //
   bool vad_enabled() const { return vad_enabled_; }

   //
   // Flushes the NetEq packet and speech buffers.
   //
   void FlushBuffers();

   //
   // Removes a payload-type from the NetEq codec database.
   //
   // Input:
   //   - payload_type         : the payload-type to be removed.
   //
   // Return value             : 0 if OK.
   //                           -1 if an error occurred.
   //
   int RemoveCodec(uint8_t payload_type);

   //
   // Remove all registered codecs.
   //
   int RemoveAllCodecs();

   //
   // Set ID.
   //
   void set_id(int id);  // TODO(turajs): can be inline.

   //
   // Gets the RTP timestamp of the last sample delivered by GetAudio().
   // Returns true if the RTP timestamp is valid, otherwise false.
   //
   bool GetPlayoutTimestamp(uint32_t* timestamp);

   //
   // Return the index of the codec associated with the last non-CNG/non-DTMF
   // received payload. If no non-CNG/non-DTMF payload is received -1 is
   // returned.
   //
   int last_audio_codec_id() const;  // TODO(turajs): can be inline.

   //
   // Return the payload-type of the last non-CNG/non-DTMF RTP packet. If no
   // non-CNG/non-DTMF packet is received -1 is returned.
   //
   int last_audio_payload_type() const;  // TODO(turajs): can be inline.

   //
   // Get the audio codec associated with the last non-CNG/non-DTMF received
   // payload. If no non-CNG/non-DTMF packet is received -1 is returned,
   // otherwise return 0.
   //
   int LastAudioCodec(CodecInst* codec) const;

   //
   // Return payload type of RED if it is registered, otherwise return -1;
   //
   int RedPayloadType() const;

   //
   // Get a decoder given its registered payload-type.
   //
   // Input:
   //    -payload_type         : the payload-type of the codec to be retrieved.
   //
   // Output:
   //    -codec                : codec associated with the given payload-type.
   //
   // Return value             : 0 if succeeded.
   //                           -1 if failed, e.g. given payload-type is not
   //                              registered.
   //
   int DecoderByPayloadType(uint8_t payload_type,
                            CodecInst* codec) const;

   //
   // Enable NACK and set the maximum size of the NACK list. If NACK is already
   // enabled then the maximum NACK list size is modified accordingly.
   //
   // Input:
   //    -max_nack_list_size  : maximum NACK list size
   //                           should be positive (none zero) and less than or
   //                           equal to |Nack::kNackListSizeLimit|
   // Return value
   //                         : 0 if succeeded.
   //                          -1 if failed
   //
   int EnableNack(size_t max_nack_list_size);

   // Disable NACK.
   void DisableNack();

   //
   // Get a list of packets to be retransmitted.
   //
   // Input:
   //    -round_trip_time_ms : estimate of the round-trip-time (in milliseconds).
   // Return value           : list of packets to be retransmitted.
   //
   std::vector<uint16_t> GetNackList(int64_t round_trip_time_ms) const;

   //
   // Get statistics of calls to GetAudio().
   void GetDecodingCallStatistics(AudioDecodingCallStats* stats) const;

  private:
   int PayloadType2CodecIndex(uint8_t payload_type) const;

   bool GetSilence(int desired_sample_rate_hz, AudioFrame* frame)
       EXCLUSIVE_LOCKS_REQUIRED(crit_sect_);

   int GetNumSyncPacketToInsert(uint16_t received_squence_number);

   int RtpHeaderToCodecIndex(
       const RTPHeader& rtp_header, const uint8_t* payload) const;

   uint32_t NowInTimestamp(int decoder_sampling_rate) const;

   void InsertStreamOfSyncPackets(InitialDelayManager::SyncStream* sync_stream);

   scoped_ptr<CriticalSectionWrapper> crit_sect_;
   int id_;  // TODO(henrik.lundin) Make const.
   int last_audio_decoder_ GUARDED_BY(crit_sect_);
   AudioFrame::VADActivity previous_audio_activity_ GUARDED_BY(crit_sect_);
   int current_sample_rate_hz_ GUARDED_BY(crit_sect_);
   ACMResampler resampler_ GUARDED_BY(crit_sect_);
   // Used in GetAudio, declared as member to avoid allocating every 10ms.
   // TODO(henrik.lundin) Stack-allocate in GetAudio instead?
   scoped_ptr<int16_t[]> audio_buffer_ GUARDED_BY(crit_sect_);
   scoped_ptr<int16_t[]> last_audio_buffer_ GUARDED_BY(crit_sect_);
   scoped_ptr<Nack> nack_ GUARDED_BY(crit_sect_);
   bool nack_enabled_ GUARDED_BY(crit_sect_);
   CallStatistics call_stats_ GUARDED_BY(crit_sect_);
   NetEq* neteq_;
   Decoder decoders_[ACMCodecDB::kMaxNumCodecs];
   bool vad_enabled_;
   Clock* clock_;  // TODO(henrik.lundin) Make const if possible.
   bool resampled_last_output_frame_ GUARDED_BY(crit_sect_);

   // Indicates if a non-zero initial delay is set, and the receiver is in
   // AV-sync mode.
   bool av_sync_;
   scoped_ptr<InitialDelayManager> initial_delay_manager_;

   // The following are defined as members to avoid creating them in every
   // iteration. |missing_packets_sync_stream_| is *ONLY* used in InsertPacket().
   // |late_packets_sync_stream_| is only used in GetAudio(). Both of these
   // member variables are allocated only when we AV-sync is enabled, i.e.
   // initial delay is set.
   scoped_ptr<InitialDelayManager::SyncStream> missing_packets_sync_stream_;
   scoped_ptr<InitialDelayManager::SyncStream> late_packets_sync_stream_;
 };

 }  // namespace acm2

 }  // namespace webrtc

 #endif  // WEBRTC_MODULES_AUDIO_CODING_MAIN_ACM2_ACM_RECEIVER_H_
	/*
	* Copyright (c) 2013 The WebRTC project authors. All Rights Reserved.
	*
	* Use of this source code is governed by a BSD-style license
	* that can be found in the LICENSE file in the root of the source
	* tree. An additional intellectual property rights grant can be found
	* in the file PATENTS. All contributing project authors may
	* be found in the AUTHORS file in the root of the source tree.
	*/

	#ifndef WEBRTC_MODULES_AUDIO_CODING_MAIN_ACM2_ACM_RECEIVER_H_
	#define WEBRTC_MODULES_AUDIO_CODING_MAIN_ACM2_ACM_RECEIVER_H_

	#include <vector>

	#include "webrtc/base/thread_annotations.h"
	#include "webrtc/common_audio/vad/include/webrtc_vad.h"
	#include "webrtc/engine_configurations.h"
	#include "webrtc/modules/audio_coding/main/interface/audio_coding_module.h"
	#include "webrtc/modules/audio_coding/main/acm2/acm_codec_database.h"
	#include "webrtc/modules/audio_coding/main/acm2/acm_resampler.h"
	#include "webrtc/modules/audio_coding/main/acm2/call_statistics.h"
	#include "webrtc/modules/audio_coding/main/acm2/initial_delay_manager.h"
	#include "webrtc/modules/audio_coding/neteq/interface/neteq.h"
	#include "webrtc/modules/interface/module_common_types.h"
	#include "webrtc/system_wrappers/interface/scoped_ptr.h"
	#include "webrtc/typedefs.h"

	namespace webrtc {

	struct CodecInst;
	class CriticalSectionWrapper;
	class NetEq;

	namespace acm2 {

	class Nack;

	class AcmReceiver {
	public:
	struct Decoder {
	bool registered;
	uint8_t payload_type;
	// This field is meaningful for codecs where both mono and
	// stereo versions are registered under the same ID.
	int channels;
	};

	// Constructor of the class
	explicit AcmReceiver(const AudioCodingModule::Config& config);

	// Destructor of the class.
	~AcmReceiver();

	//
	// Inserts a payload with its associated RTP-header into NetEq.
	//
	// Input:
	// - rtp_header : RTP header for the incoming payload containing
	// information about payload type, sequence number,
	// timestamp, SSRC and marker bit.
	// - incoming_payload : Incoming audio payload.
	// - length_payload : Length of incoming audio payload in bytes.
	//
	// Return value : 0 if OK.
	// <0 if NetEq returned an error.
	//
	int InsertPacket(const WebRtcRTPHeader& rtp_header,
	const uint8_t* incoming_payload,
	size_t length_payload);

	//
	// Asks NetEq for 10 milliseconds of decoded audio.
	//
	// Input:
	// -desired_freq_hz : specifies the sampling rate [Hz] of the output
	// audio. If set -1 indicates to resampling is
	// is required and the audio returned at the
	// sampling rate of the decoder.
	//
	// Output:
	// -audio_frame : an audio frame were output data and
	// associated parameters are written to.
	//
	// Return value : 0 if OK.
	// -1 if NetEq returned an error.
	//
	int GetAudio(int desired_freq_hz, AudioFrame* audio_frame);

	//
	// Adds a new codec to the NetEq codec database.
	//
	// Input:
	// - acm_codec_id : ACM codec ID.
	// - payload_type : payload type.
	// - audio_decoder : pointer to a decoder object. If it is NULL
	// then NetEq will internally create the decoder
	// object. Otherwise, NetEq will store this pointer
	// as the decoder corresponding with the given
	// payload type. NetEq won't acquire the ownership
	// of this pointer. It is up to the client of this
	// class (ACM) to delete it. By providing
	// \|audio_decoder\| ACM will have control over the
	// decoder instance of the codec. This is essential
	// for a codec like iSAC which encoder/decoder
	// encoder has to know about decoder (bandwidth
	// estimator that is updated at decoding time).
	//
	// Return value : 0 if OK.
	// <0 if NetEq returned an error.
	//
	int AddCodec(int acm_codec_id,
	uint8_t payload_type,
	int channels,
	AudioDecoder* audio_decoder);

	//
	// Sets a minimum delay for packet buffer. The given delay is maintained,
	// unless channel condition dictates a higher delay.
	//
	// Input:
	// - delay_ms : minimum delay in milliseconds.
	//
	// Return value : 0 if OK.
	// <0 if NetEq returned an error.
	//
	int SetMinimumDelay(int delay_ms);

	//
	// Sets a maximum delay [ms] for the packet buffer. The target delay does not
	// exceed the given value, even if channel condition requires so.
	//
	// Input:
	// - delay_ms : maximum delay in milliseconds.
	//
	// Return value : 0 if OK.
	// <0 if NetEq returned an error.
	//
	int SetMaximumDelay(int delay_ms);

	//
	// Get least required delay computed based on channel conditions. Note that
	// this is before applying any user-defined limits (specified by calling
	// (SetMinimumDelay() and/or SetMaximumDelay()).
	//
	int LeastRequiredDelayMs() const;

	//
	// Sets an initial delay of \|delay_ms\| milliseconds. This introduces a playout
	// delay. Silence (zero signal) is played out until equivalent of \|delay_ms\|
	// millisecond of audio is buffered. Then, NetEq maintains the delay.
	//
	// Input:
	// - delay_ms : initial delay in milliseconds.
	//
	// Return value : 0 if OK.
	// <0 if NetEq returned an error.
	//
	int SetInitialDelay(int delay_ms);

	//
	// Resets the initial delay to zero.
	//
	void ResetInitialDelay();

	//
	// Get the current sampling frequency in Hz.
	//
	// Return value : Sampling frequency in Hz.
	//
	int current_sample_rate_hz() const;

	//
	// Sets the playout mode.
	//
	// Input:
	// - mode : an enumerator specifying the playout mode.
	//
	void SetPlayoutMode(AudioPlayoutMode mode);

	//
	// Get the current playout mode.
	//
	// Return value : The current playout mode.
	//
	AudioPlayoutMode PlayoutMode() const;

	//
	// Get the current network statistics from NetEq.
	//
	// Output:
	// - statistics : The current network statistics.
	//
	void NetworkStatistics(ACMNetworkStatistics* statistics);

	//
	// Enable post-decoding VAD.
	//
	void EnableVad();

	//
	// Disable post-decoding VAD.
	//
	void DisableVad();

	//
	// Returns whether post-decoding VAD is enabled (true) or disabled (false).
	//
	bool vad_enabled() const { return vad_enabled_; }

	//
	// Flushes the NetEq packet and speech buffers.
	//
	void FlushBuffers();

	//
	// Removes a payload-type from the NetEq codec database.
	//
	// Input:
	// - payload_type : the payload-type to be removed.
	//
	// Return value : 0 if OK.
	// -1 if an error occurred.
	//
	int RemoveCodec(uint8_t payload_type);

	//
	// Remove all registered codecs.
	//
	int RemoveAllCodecs();

	//
	// Set ID.
	//
	void set_id(int id); // TODO(turajs): can be inline.

	//
	// Gets the RTP timestamp of the last sample delivered by GetAudio().
	// Returns true if the RTP timestamp is valid, otherwise false.
	//
	bool GetPlayoutTimestamp(uint32_t* timestamp);

	//
	// Return the index of the codec associated with the last non-CNG/non-DTMF
	// received payload. If no non-CNG/non-DTMF payload is received -1 is
	// returned.
	//
	int last_audio_codec_id() const; // TODO(turajs): can be inline.

	//
	// Return the payload-type of the last non-CNG/non-DTMF RTP packet. If no
	// non-CNG/non-DTMF packet is received -1 is returned.
	//
	int last_audio_payload_type() const; // TODO(turajs): can be inline.

	//
	// Get the audio codec associated with the last non-CNG/non-DTMF received
	// payload. If no non-CNG/non-DTMF packet is received -1 is returned,
	// otherwise return 0.
	//
	int LastAudioCodec(CodecInst* codec) const;

	//
	// Return payload type of RED if it is registered, otherwise return -1;
	//
	int RedPayloadType() const;

	//
	// Get a decoder given its registered payload-type.
	//
	// Input:
	// -payload_type : the payload-type of the codec to be retrieved.
	//
	// Output:
	// -codec : codec associated with the given payload-type.
	//
	// Return value : 0 if succeeded.
	// -1 if failed, e.g. given payload-type is not
	// registered.
	//
	int DecoderByPayloadType(uint8_t payload_type,
	CodecInst* codec) const;

	//
	// Enable NACK and set the maximum size of the NACK list. If NACK is already
	// enabled then the maximum NACK list size is modified accordingly.
	//
	// Input:
	// -max_nack_list_size : maximum NACK list size
	// should be positive (none zero) and less than or
	// equal to \|Nack::kNackListSizeLimit\|
	// Return value
	// : 0 if succeeded.
	// -1 if failed
	//
	int EnableNack(size_t max_nack_list_size);

	// Disable NACK.
	void DisableNack();

	//
	// Get a list of packets to be retransmitted.
	//
	// Input:
	// -round_trip_time_ms : estimate of the round-trip-time (in milliseconds).
	// Return value : list of packets to be retransmitted.
	//
	std::vector<uint16_t> GetNackList(int64_t round_trip_time_ms) const;

	//
	// Get statistics of calls to GetAudio().
	void GetDecodingCallStatistics(AudioDecodingCallStats* stats) const;

	private:
	int PayloadType2CodecIndex(uint8_t payload_type) const;

	bool GetSilence(int desired_sample_rate_hz, AudioFrame* frame)
	EXCLUSIVE_LOCKS_REQUIRED(crit_sect_);

	int GetNumSyncPacketToInsert(uint16_t received_squence_number);

	int RtpHeaderToCodecIndex(
	const RTPHeader& rtp_header, const uint8_t* payload) const;

	uint32_t NowInTimestamp(int decoder_sampling_rate) const;

	void InsertStreamOfSyncPackets(InitialDelayManager::SyncStream* sync_stream);

	scoped_ptr<CriticalSectionWrapper> crit_sect_;
	int id_; // TODO(henrik.lundin) Make const.
	int last_audio_decoder_ GUARDED_BY(crit_sect_);
	AudioFrame::VADActivity previous_audio_activity_ GUARDED_BY(crit_sect_);
	int current_sample_rate_hz_ GUARDED_BY(crit_sect_);
	ACMResampler resampler_ GUARDED_BY(crit_sect_);
	// Used in GetAudio, declared as member to avoid allocating every 10ms.
	// TODO(henrik.lundin) Stack-allocate in GetAudio instead?
	scoped_ptr<int16_t[]> audio_buffer_ GUARDED_BY(crit_sect_);
	scoped_ptr<int16_t[]> last_audio_buffer_ GUARDED_BY(crit_sect_);
	scoped_ptr<Nack> nack_ GUARDED_BY(crit_sect_);
	bool nack_enabled_ GUARDED_BY(crit_sect_);
	CallStatistics call_stats_ GUARDED_BY(crit_sect_);
	NetEq* neteq_;
	Decoder decoders_[ACMCodecDB::kMaxNumCodecs];
	bool vad_enabled_;
	Clock* clock_; // TODO(henrik.lundin) Make const if possible.
	bool resampled_last_output_frame_ GUARDED_BY(crit_sect_);

	// Indicates if a non-zero initial delay is set, and the receiver is in
	// AV-sync mode.
	bool av_sync_;
	scoped_ptr<InitialDelayManager> initial_delay_manager_;

	// The following are defined as members to avoid creating them in every
	// iteration. \|missing_packets_sync_stream_\| is ONLY used in InsertPacket().
	// \|late_packets_sync_stream_\| is only used in GetAudio(). Both of these
	// member variables are allocated only when we AV-sync is enabled, i.e.
	// initial delay is set.
	scoped_ptr<InitialDelayManager::SyncStream> missing_packets_sync_stream_;
	scoped_ptr<InitialDelayManager::SyncStream> late_packets_sync_stream_;
	};

	} // namespace acm2

	} // namespace webrtc

	#endif // WEBRTC_MODULES_AUDIO_CODING_MAIN_ACM2_ACM_RECEIVER_H_