blob: dd5b84b4d0b1071b5e156e956fd9a28a006914f7 [file] [log] [blame]
/*
* Copyright (c) 2004 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef MEDIA_ENGINE_WEBRTC_VOICE_ENGINE_H_
#define MEDIA_ENGINE_WEBRTC_VOICE_ENGINE_H_
#include <stddef.h>
#include <stdint.h>
#include <map>
#include <memory>
#include <optional>
#include <set>
#include <string>
#include <utility>
#include <vector>
#include "absl/functional/any_invocable.h"
#include "absl/strings/string_view.h"
#include "api/audio/audio_device.h"
#include "api/audio/audio_frame_processor.h"
#include "api/audio/audio_mixer.h"
#include "api/audio/audio_processing.h"
#include "api/audio_codecs/audio_codec_pair_id.h"
#include "api/audio_codecs/audio_decoder_factory.h"
#include "api/audio_codecs/audio_encoder_factory.h"
#include "api/audio_codecs/audio_format.h"
#include "api/audio_options.h"
#include "api/call/audio_sink.h"
#include "api/crypto/crypto_options.h"
#include "api/crypto/frame_decryptor_interface.h"
#include "api/crypto/frame_encryptor_interface.h"
#include "api/field_trials_view.h"
#include "api/frame_transformer_interface.h"
#include "api/media_types.h"
#include "api/rtc_error.h"
#include "api/rtp_headers.h"
#include "api/rtp_parameters.h"
#include "api/rtp_sender_interface.h"
#include "api/scoped_refptr.h"
#include "api/sequence_checker.h"
#include "api/task_queue/pending_task_safety_flag.h"
#include "api/task_queue/task_queue_base.h"
#include "api/task_queue/task_queue_factory.h"
#include "api/transport/rtp/rtp_source.h"
#include "call/audio_send_stream.h"
#include "call/audio_state.h"
#include "call/call.h"
#include "media/base/codec.h"
#include "media/base/media_channel.h"
#include "media/base/media_channel_impl.h"
#include "media/base/media_config.h"
#include "media/base/media_engine.h"
#include "media/base/stream_params.h"
#include "modules/async_audio_processing/async_audio_processing.h"
#include "modules/rtp_rtcp/include/rtp_header_extension_map.h"
#include "modules/rtp_rtcp/source/rtp_packet_received.h"
#include "rtc_base/checks.h"
#include "rtc_base/network/sent_packet.h"
#include "rtc_base/network_route.h"
#include "rtc_base/system/file_wrapper.h"
namespace webrtc {
class AudioFrameProcessor;
}
namespace cricket {
class AudioSource;
// WebRtcVoiceEngine is a class to be used with CompositeMediaEngine.
// It uses the WebRtc VoiceEngine library for audio handling.
class WebRtcVoiceEngine final : public VoiceEngineInterface {
friend class WebRtcVoiceSendChannel;
friend class WebRtcVoiceReceiveChannel;
public:
WebRtcVoiceEngine(
webrtc::TaskQueueFactory* task_queue_factory,
webrtc::AudioDeviceModule* adm,
const rtc::scoped_refptr<webrtc::AudioEncoderFactory>& encoder_factory,
const rtc::scoped_refptr<webrtc::AudioDecoderFactory>& decoder_factory,
rtc::scoped_refptr<webrtc::AudioMixer> audio_mixer,
rtc::scoped_refptr<webrtc::AudioProcessing> audio_processing,
std::unique_ptr<webrtc::AudioFrameProcessor> owned_audio_frame_processor,
const webrtc::FieldTrialsView& trials);
WebRtcVoiceEngine() = delete;
WebRtcVoiceEngine(const WebRtcVoiceEngine&) = delete;
WebRtcVoiceEngine& operator=(const WebRtcVoiceEngine&) = delete;
~WebRtcVoiceEngine() override;
// Does initialization that needs to occur on the worker thread.
void Init() override;
rtc::scoped_refptr<webrtc::AudioState> GetAudioState() const override;
std::unique_ptr<VoiceMediaSendChannelInterface> CreateSendChannel(
webrtc::Call* call,
const MediaConfig& config,
const AudioOptions& options,
const webrtc::CryptoOptions& crypto_options,
webrtc::AudioCodecPairId codec_pair_id) override;
std::unique_ptr<VoiceMediaReceiveChannelInterface> CreateReceiveChannel(
webrtc::Call* call,
const MediaConfig& config,
const AudioOptions& options,
const webrtc::CryptoOptions& crypto_options,
webrtc::AudioCodecPairId codec_pair_id) override;
const std::vector<Codec>& send_codecs() const override;
const std::vector<Codec>& recv_codecs() const override;
std::vector<webrtc::RtpHeaderExtensionCapability> GetRtpHeaderExtensions()
const override;
// Starts AEC dump using an existing file. A maximum file size in bytes can be
// specified. When the maximum file size is reached, logging is stopped and
// the file is closed. If max_size_bytes is set to <= 0, no limit will be
// used.
bool StartAecDump(webrtc::FileWrapper file, int64_t max_size_bytes) override;
// Stops AEC dump.
void StopAecDump() override;
std::optional<webrtc::AudioDeviceModule::Stats> GetAudioDeviceStats()
override;
private:
// Every option that is "set" will be applied. Every option not "set" will be
// ignored. This allows us to selectively turn on and off different options
// easily at any time.
void ApplyOptions(const AudioOptions& options);
webrtc::TaskQueueFactory* const task_queue_factory_;
std::unique_ptr<webrtc::TaskQueueBase, webrtc::TaskQueueDeleter>
low_priority_worker_queue_;
webrtc::AudioDeviceModule* adm();
webrtc::AudioProcessing* apm() const;
webrtc::AudioState* audio_state();
webrtc::SequenceChecker signal_thread_checker_{
webrtc::SequenceChecker::kDetached};
webrtc::SequenceChecker worker_thread_checker_{
webrtc::SequenceChecker::kDetached};
// The audio device module.
rtc::scoped_refptr<webrtc::AudioDeviceModule> adm_;
rtc::scoped_refptr<webrtc::AudioEncoderFactory> encoder_factory_;
rtc::scoped_refptr<webrtc::AudioDecoderFactory> decoder_factory_;
rtc::scoped_refptr<webrtc::AudioMixer> audio_mixer_;
// The audio processing module.
rtc::scoped_refptr<webrtc::AudioProcessing> apm_;
// Asynchronous audio processing.
std::unique_ptr<webrtc::AudioFrameProcessor> audio_frame_processor_;
// The primary instance of WebRtc VoiceEngine.
rtc::scoped_refptr<webrtc::AudioState> audio_state_;
std::vector<Codec> send_codecs_;
std::vector<Codec> recv_codecs_;
bool is_dumping_aec_ = false;
bool initialized_ = false;
// Jitter buffer settings for new streams.
size_t audio_jitter_buffer_max_packets_ = 200;
bool audio_jitter_buffer_fast_accelerate_ = false;
int audio_jitter_buffer_min_delay_ms_ = 0;
const bool minimized_remsampling_on_mobile_trial_enabled_;
};
class WebRtcVoiceSendChannel final : public MediaChannelUtil,
public VoiceMediaSendChannelInterface {
public:
WebRtcVoiceSendChannel(WebRtcVoiceEngine* engine,
const MediaConfig& config,
const AudioOptions& options,
const webrtc::CryptoOptions& crypto_options,
webrtc::Call* call,
webrtc::AudioCodecPairId codec_pair_id);
WebRtcVoiceSendChannel() = delete;
WebRtcVoiceSendChannel(const WebRtcVoiceSendChannel&) = delete;
WebRtcVoiceSendChannel& operator=(const WebRtcVoiceSendChannel&) = delete;
~WebRtcVoiceSendChannel() override;
MediaType media_type() const override { return MEDIA_TYPE_AUDIO; }
VideoMediaSendChannelInterface* AsVideoSendChannel() override {
RTC_CHECK_NOTREACHED();
return nullptr;
}
VoiceMediaSendChannelInterface* AsVoiceSendChannel() override { return this; }
std::optional<Codec> GetSendCodec() const override;
// Functions imported from MediaChannelUtil
void SetInterface(MediaChannelNetworkInterface* iface) override {
MediaChannelUtil::SetInterface(iface);
}
bool HasNetworkInterface() const override {
return MediaChannelUtil::HasNetworkInterface();
}
void SetExtmapAllowMixed(bool extmap_allow_mixed) override {
MediaChannelUtil::SetExtmapAllowMixed(extmap_allow_mixed);
}
bool ExtmapAllowMixed() const override {
return MediaChannelUtil::ExtmapAllowMixed();
}
const AudioOptions& options() const { return options_; }
bool SetSenderParameters(const AudioSenderParameter& params) override;
webrtc::RtpParameters GetRtpSendParameters(uint32_t ssrc) const override;
webrtc::RTCError SetRtpSendParameters(
uint32_t ssrc,
const webrtc::RtpParameters& parameters,
webrtc::SetParametersCallback callback) override;
void SetSend(bool send) override;
bool SetAudioSend(uint32_t ssrc,
bool enable,
const AudioOptions* options,
AudioSource* source) override;
bool AddSendStream(const StreamParams& sp) override;
bool RemoveSendStream(uint32_t ssrc) override;
void SetSsrcListChangedCallback(
absl::AnyInvocable<void(const std::set<uint32_t>&)> callback) override;
// E2EE Frame API
// Set a frame encryptor to a particular ssrc that will intercept all
// outgoing audio payloads frames and attempt to encrypt them and forward the
// result to the packetizer.
void SetFrameEncryptor(uint32_t ssrc,
rtc::scoped_refptr<webrtc::FrameEncryptorInterface>
frame_encryptor) override;
bool CanInsertDtmf() override;
bool InsertDtmf(uint32_t ssrc, int event, int duration) override;
void OnPacketSent(const rtc::SentPacket& sent_packet) override;
void OnNetworkRouteChanged(absl::string_view transport_name,
const rtc::NetworkRoute& network_route) override;
void OnReadyToSend(bool ready) override;
bool GetStats(VoiceMediaSendInfo* info) override;
// Sets a frame transformer between encoder and packetizer, to transform
// encoded frames before sending them out the network.
void SetEncoderToPacketizerFrameTransformer(
uint32_t ssrc,
rtc::scoped_refptr<webrtc::FrameTransformerInterface> frame_transformer)
override;
bool SenderNackEnabled() const override {
if (!send_codec_spec_) {
return false;
}
return send_codec_spec_->nack_enabled;
}
bool SenderNonSenderRttEnabled() const override {
if (!send_codec_spec_) {
return false;
}
return send_codec_spec_->enable_non_sender_rtt;
}
bool SendCodecHasNack() const override { return SenderNackEnabled(); }
void SetSendCodecChangedCallback(
absl::AnyInvocable<void()> callback) override {
send_codec_changed_callback_ = std::move(callback);
}
private:
bool SetOptions(const AudioOptions& options);
bool SetSendCodecs(const std::vector<Codec>& codecs,
std::optional<Codec> preferred_codec);
bool SetLocalSource(uint32_t ssrc, AudioSource* source);
bool MuteStream(uint32_t ssrc, bool mute);
WebRtcVoiceEngine* engine() { return engine_; }
bool SetMaxSendBitrate(int bps);
void SetupRecording();
webrtc::TaskQueueBase* const worker_thread_;
webrtc::ScopedTaskSafety task_safety_;
webrtc::SequenceChecker network_thread_checker_{
webrtc::SequenceChecker::kDetached};
WebRtcVoiceEngine* const engine_ = nullptr;
std::vector<Codec> send_codecs_;
int max_send_bitrate_bps_ = 0;
AudioOptions options_;
std::optional<int> dtmf_payload_type_;
int dtmf_payload_freq_ = -1;
bool enable_non_sender_rtt_ = false;
bool send_ = false;
webrtc::Call* const call_ = nullptr;
const MediaConfig::Audio audio_config_;
class WebRtcAudioSendStream;
std::map<uint32_t, WebRtcAudioSendStream*> send_streams_;
std::vector<webrtc::RtpExtension> send_rtp_extensions_;
std::string mid_;
webrtc::RtcpMode rtcp_mode_;
std::optional<webrtc::AudioSendStream::Config::SendCodecSpec>
send_codec_spec_;
// TODO(kwiberg): Per-SSRC codec pair IDs?
const webrtc::AudioCodecPairId codec_pair_id_;
// Per peer connection crypto options that last for the lifetime of the peer
// connection.
const webrtc::CryptoOptions crypto_options_;
rtc::scoped_refptr<webrtc::FrameTransformerInterface>
unsignaled_frame_transformer_;
void FillSendCodecStats(VoiceMediaSendInfo* voice_media_info);
// Callback invoked whenever the send codec changes.
// TODO(bugs.webrtc.org/13931): Remove again when coupling isn't needed.
absl::AnyInvocable<void()> send_codec_changed_callback_;
// Callback invoked whenever the list of SSRCs changes.
absl::AnyInvocable<void(const std::set<uint32_t>&)>
ssrc_list_changed_callback_;
};
class WebRtcVoiceReceiveChannel final
: public MediaChannelUtil,
public VoiceMediaReceiveChannelInterface {
public:
WebRtcVoiceReceiveChannel(WebRtcVoiceEngine* engine,
const MediaConfig& config,
const AudioOptions& options,
const webrtc::CryptoOptions& crypto_options,
webrtc::Call* call,
webrtc::AudioCodecPairId codec_pair_id);
WebRtcVoiceReceiveChannel() = delete;
WebRtcVoiceReceiveChannel(const WebRtcVoiceReceiveChannel&) = delete;
WebRtcVoiceReceiveChannel& operator=(const WebRtcVoiceReceiveChannel&) =
delete;
~WebRtcVoiceReceiveChannel() override;
MediaType media_type() const override { return MEDIA_TYPE_AUDIO; }
VideoMediaReceiveChannelInterface* AsVideoReceiveChannel() override {
RTC_CHECK_NOTREACHED();
return nullptr;
}
VoiceMediaReceiveChannelInterface* AsVoiceReceiveChannel() override {
return this;
}
const AudioOptions& options() const { return options_; }
void SetInterface(MediaChannelNetworkInterface* iface) override {
MediaChannelUtil::SetInterface(iface);
}
bool SetReceiverParameters(const AudioReceiverParameters& params) override;
webrtc::RtpParameters GetRtpReceiverParameters(uint32_t ssrc) const override;
webrtc::RtpParameters GetDefaultRtpReceiveParameters() const override;
void SetPlayout(bool playout) override;
bool AddRecvStream(const StreamParams& sp) override;
bool RemoveRecvStream(uint32_t ssrc) override;
void ResetUnsignaledRecvStream() override;
std::optional<uint32_t> GetUnsignaledSsrc() const override;
void ChooseReceiverReportSsrc(const std::set<uint32_t>& choices) override;
void OnDemuxerCriteriaUpdatePending() override;
void OnDemuxerCriteriaUpdateComplete() override;
// E2EE Frame API
// Set a frame decryptor to a particular ssrc that will intercept all
// incoming audio payloads and attempt to decrypt them before forwarding the
// result.
void SetFrameDecryptor(uint32_t ssrc,
rtc::scoped_refptr<webrtc::FrameDecryptorInterface>
frame_decryptor) override;
bool SetOutputVolume(uint32_t ssrc, double volume) override;
// Applies the new volume to current and future unsignaled streams.
bool SetDefaultOutputVolume(double volume) override;
bool SetBaseMinimumPlayoutDelayMs(uint32_t ssrc, int delay_ms) override;
std::optional<int> GetBaseMinimumPlayoutDelayMs(uint32_t ssrc) const override;
void OnPacketReceived(const webrtc::RtpPacketReceived& packet) override;
bool GetStats(VoiceMediaReceiveInfo* info,
bool get_and_clear_legacy_stats) override;
// Set the audio sink for an existing stream.
void SetRawAudioSink(
uint32_t ssrc,
std::unique_ptr<webrtc::AudioSinkInterface> sink) override;
// Will set the audio sink on the latest unsignaled stream, future or
// current. Only one stream at a time will use the sink.
void SetDefaultRawAudioSink(
std::unique_ptr<webrtc::AudioSinkInterface> sink) override;
std::vector<webrtc::RtpSource> GetSources(uint32_t ssrc) const override;
void SetDepacketizerToDecoderFrameTransformer(
uint32_t ssrc,
rtc::scoped_refptr<webrtc::FrameTransformerInterface> frame_transformer)
override;
void SetReceiveNackEnabled(bool enabled) override;
void SetRtcpMode(webrtc::RtcpMode mode) override;
void SetReceiveNonSenderRttEnabled(bool enabled) override;
private:
bool SetOptions(const AudioOptions& options);
bool SetRecvCodecs(const std::vector<Codec>& codecs);
bool SetLocalSource(uint32_t ssrc, AudioSource* source);
bool MuteStream(uint32_t ssrc, bool mute);
WebRtcVoiceEngine* engine() { return engine_; }
void SetupRecording();
// Expected to be invoked once per packet that belongs to this channel that
// can not be demuxed. Returns true if a default receive stream has been
// created.
bool MaybeCreateDefaultReceiveStream(const webrtc::RtpPacketReceived& packet);
// Check if 'ssrc' is an unsignaled stream, and if so mark it as not being
// unsignaled anymore (i.e. it is now removed, or signaled), and return true.
bool MaybeDeregisterUnsignaledRecvStream(uint32_t ssrc);
webrtc::TaskQueueBase* const worker_thread_;
webrtc::ScopedTaskSafety task_safety_;
webrtc::SequenceChecker network_thread_checker_{
webrtc::SequenceChecker::kDetached};
WebRtcVoiceEngine* const engine_ = nullptr;
// TODO(kwiberg): decoder_map_ and recv_codecs_ store the exact same
// information, in slightly different formats. Eliminate recv_codecs_.
std::map<int, webrtc::SdpAudioFormat> decoder_map_;
std::vector<Codec> recv_codecs_;
AudioOptions options_;
bool recv_nack_enabled_ = false;
webrtc::RtcpMode recv_rtcp_mode_ = webrtc::RtcpMode::kCompound;
bool enable_non_sender_rtt_ = false;
bool playout_ = false;
webrtc::Call* const call_ = nullptr;
const MediaConfig::Audio audio_config_;
// Queue of unsignaled SSRCs; oldest at the beginning.
std::vector<uint32_t> unsignaled_recv_ssrcs_;
// This is a stream param that comes from the remote description, but wasn't
// signaled with any a=ssrc lines. It holds the information that was signaled
// before the unsignaled receive stream is created when the first packet is
// received.
StreamParams unsignaled_stream_params_;
// Volume for unsignaled streams, which may be set before the stream exists.
double default_recv_volume_ = 1.0;
// Delay for unsignaled streams, which may be set before the stream exists.
int default_recv_base_minimum_delay_ms_ = 0;
// Sink for latest unsignaled stream - may be set before the stream exists.
std::unique_ptr<webrtc::AudioSinkInterface> default_sink_;
// Default SSRC to use for RTCP receiver reports in case of no signaled
// send streams. See: https://code.google.com/p/webrtc/issues/detail?id=4740
// and https://code.google.com/p/chromium/issues/detail?id=547661
uint32_t receiver_reports_ssrc_ = 0xFA17FA17u;
std::string mid_;
class WebRtcAudioReceiveStream;
std::map<uint32_t, WebRtcAudioReceiveStream*> recv_streams_;
std::vector<webrtc::RtpExtension> recv_rtp_extensions_;
webrtc::RtpHeaderExtensionMap recv_rtp_extension_map_;
std::optional<webrtc::AudioSendStream::Config::SendCodecSpec>
send_codec_spec_;
// TODO(kwiberg): Per-SSRC codec pair IDs?
const webrtc::AudioCodecPairId codec_pair_id_;
// Per peer connection crypto options that last for the lifetime of the peer
// connection.
const webrtc::CryptoOptions crypto_options_;
// Unsignaled streams have an option to have a frame decryptor set on them.
rtc::scoped_refptr<webrtc::FrameDecryptorInterface>
unsignaled_frame_decryptor_;
rtc::scoped_refptr<webrtc::FrameTransformerInterface>
unsignaled_frame_transformer_;
void FillReceiveCodecStats(VoiceMediaReceiveInfo* voice_media_info);
};
} // namespace cricket
#endif // MEDIA_ENGINE_WEBRTC_VOICE_ENGINE_H_