| /* |
| * Copyright (c) 2013 The WebRTC project authors. All Rights Reserved. |
| * |
| * Use of this source code is governed by a BSD-style license |
| * that can be found in the LICENSE file in the root of the source |
| * tree. An additional intellectual property rights grant can be found |
| * in the file PATENTS. All contributing project authors may |
| * be found in the AUTHORS file in the root of the source tree. |
| */ |
| |
| #include "modules/audio_coding/acm2/acm_receiver.h" |
| |
| #include <algorithm> // std::min |
| #include <memory> |
| |
| #include "api/audio_codecs/builtin_audio_decoder_factory.h" |
| #include "api/audio_codecs/builtin_audio_encoder_factory.h" |
| #include "modules/audio_coding/codecs/cng/audio_encoder_cng.h" |
| #include "modules/audio_coding/include/audio_coding_module.h" |
| #include "modules/audio_coding/neteq/tools/rtp_generator.h" |
| #include "modules/include/module_common_types.h" |
| #include "rtc_base/checks.h" |
| #include "rtc_base/numerics/safe_conversions.h" |
| #include "system_wrappers/include/clock.h" |
| #include "test/gtest.h" |
| #include "test/testsupport/file_utils.h" |
| |
| namespace webrtc { |
| |
| namespace acm2 { |
| |
| class AcmReceiverTestOldApi : public AudioPacketizationCallback, |
| public ::testing::Test { |
| protected: |
| AcmReceiverTestOldApi() |
| : timestamp_(0), |
| packet_sent_(false), |
| last_packet_send_timestamp_(timestamp_), |
| last_frame_type_(AudioFrameType::kEmptyFrame) { |
| config_.decoder_factory = decoder_factory_; |
| } |
| |
| ~AcmReceiverTestOldApi() {} |
| |
| void SetUp() override { |
| acm_.reset(AudioCodingModule::Create(config_)); |
| receiver_.reset(new AcmReceiver(config_)); |
| ASSERT_TRUE(receiver_.get() != NULL); |
| ASSERT_TRUE(acm_.get() != NULL); |
| acm_->InitializeReceiver(); |
| acm_->RegisterTransportCallback(this); |
| |
| rtp_header_.sequenceNumber = 0; |
| rtp_header_.timestamp = 0; |
| rtp_header_.markerBit = false; |
| rtp_header_.ssrc = 0x12345678; // Arbitrary. |
| rtp_header_.numCSRCs = 0; |
| rtp_header_.payloadType = 0; |
| } |
| |
| void TearDown() override {} |
| |
| AudioCodecInfo SetEncoder(int payload_type, |
| const SdpAudioFormat& format, |
| const std::map<int, int> cng_payload_types = {}) { |
| // Create the speech encoder. |
| AudioCodecInfo info = encoder_factory_->QueryAudioEncoder(format).value(); |
| std::unique_ptr<AudioEncoder> enc = |
| encoder_factory_->MakeAudioEncoder(payload_type, format, absl::nullopt); |
| |
| // If we have a compatible CN specification, stack a CNG on top. |
| auto it = cng_payload_types.find(info.sample_rate_hz); |
| if (it != cng_payload_types.end()) { |
| AudioEncoderCngConfig config; |
| config.speech_encoder = std::move(enc); |
| config.num_channels = 1; |
| config.payload_type = it->second; |
| config.vad_mode = Vad::kVadNormal; |
| enc = CreateComfortNoiseEncoder(std::move(config)); |
| } |
| |
| // Actually start using the new encoder. |
| acm_->SetEncoder(std::move(enc)); |
| return info; |
| } |
| |
| int InsertOnePacketOfSilence(const AudioCodecInfo& info) { |
| // Frame setup according to the codec. |
| AudioFrame frame; |
| frame.sample_rate_hz_ = info.sample_rate_hz; |
| frame.samples_per_channel_ = info.sample_rate_hz / 100; // 10 ms. |
| frame.num_channels_ = info.num_channels; |
| frame.Mute(); |
| packet_sent_ = false; |
| last_packet_send_timestamp_ = timestamp_; |
| int num_10ms_frames = 0; |
| while (!packet_sent_) { |
| frame.timestamp_ = timestamp_; |
| timestamp_ += rtc::checked_cast<uint32_t>(frame.samples_per_channel_); |
| EXPECT_GE(acm_->Add10MsData(frame), 0); |
| ++num_10ms_frames; |
| } |
| return num_10ms_frames; |
| } |
| |
| int SendData(AudioFrameType frame_type, |
| uint8_t payload_type, |
| uint32_t timestamp, |
| const uint8_t* payload_data, |
| size_t payload_len_bytes, |
| int64_t absolute_capture_timestamp_ms) override { |
| if (frame_type == AudioFrameType::kEmptyFrame) |
| return 0; |
| |
| rtp_header_.payloadType = payload_type; |
| rtp_header_.timestamp = timestamp; |
| |
| int ret_val = receiver_->InsertPacket( |
| rtp_header_, |
| rtc::ArrayView<const uint8_t>(payload_data, payload_len_bytes)); |
| if (ret_val < 0) { |
| assert(false); |
| return -1; |
| } |
| rtp_header_.sequenceNumber++; |
| packet_sent_ = true; |
| last_frame_type_ = frame_type; |
| return 0; |
| } |
| |
| const rtc::scoped_refptr<AudioEncoderFactory> encoder_factory_ = |
| CreateBuiltinAudioEncoderFactory(); |
| const rtc::scoped_refptr<AudioDecoderFactory> decoder_factory_ = |
| CreateBuiltinAudioDecoderFactory(); |
| AudioCodingModule::Config config_; |
| std::unique_ptr<AcmReceiver> receiver_; |
| std::unique_ptr<AudioCodingModule> acm_; |
| RTPHeader rtp_header_; |
| uint32_t timestamp_; |
| bool packet_sent_; // Set when SendData is called reset when inserting audio. |
| uint32_t last_packet_send_timestamp_; |
| AudioFrameType last_frame_type_; |
| }; |
| |
| #if defined(WEBRTC_ANDROID) |
| #define MAYBE_SampleRate DISABLED_SampleRate |
| #else |
| #define MAYBE_SampleRate SampleRate |
| #endif |
| TEST_F(AcmReceiverTestOldApi, MAYBE_SampleRate) { |
| const std::map<int, SdpAudioFormat> codecs = {{0, {"ISAC", 16000, 1}}, |
| {1, {"ISAC", 32000, 1}}}; |
| receiver_->SetCodecs(codecs); |
| |
| constexpr int kOutSampleRateHz = 8000; // Different than codec sample rate. |
| for (size_t i = 0; i < codecs.size(); ++i) { |
| const int payload_type = rtc::checked_cast<int>(i); |
| const int num_10ms_frames = |
| InsertOnePacketOfSilence(SetEncoder(payload_type, codecs.at(i))); |
| for (int k = 0; k < num_10ms_frames; ++k) { |
| AudioFrame frame; |
| bool muted; |
| EXPECT_EQ(0, receiver_->GetAudio(kOutSampleRateHz, &frame, &muted)); |
| } |
| EXPECT_EQ(encoder_factory_->QueryAudioEncoder(codecs.at(i))->sample_rate_hz, |
| receiver_->last_output_sample_rate_hz()); |
| } |
| } |
| |
| class AcmReceiverTestFaxModeOldApi : public AcmReceiverTestOldApi { |
| protected: |
| AcmReceiverTestFaxModeOldApi() { |
| config_.neteq_config.for_test_no_time_stretching = true; |
| } |
| |
| void RunVerifyAudioFrame(const SdpAudioFormat& codec) { |
| // Make sure "fax mode" is enabled. This will avoid delay changes unless the |
| // packet-loss concealment is made. We do this in order to make the |
| // timestamp increments predictable; in normal mode, NetEq may decide to do |
| // accelerate or pre-emptive expand operations after some time, offsetting |
| // the timestamp. |
| EXPECT_TRUE(config_.neteq_config.for_test_no_time_stretching); |
| |
| constexpr int payload_type = 17; |
| receiver_->SetCodecs({{payload_type, codec}}); |
| |
| const AudioCodecInfo info = SetEncoder(payload_type, codec); |
| const int output_sample_rate_hz = info.sample_rate_hz; |
| const size_t output_channels = info.num_channels; |
| const size_t samples_per_ms = rtc::checked_cast<size_t>( |
| rtc::CheckedDivExact(output_sample_rate_hz, 1000)); |
| const AudioFrame::VADActivity expected_vad_activity = |
| output_sample_rate_hz > 16000 ? AudioFrame::kVadActive |
| : AudioFrame::kVadPassive; |
| |
| // Expect the first output timestamp to be 5*fs/8000 samples before the |
| // first inserted timestamp (because of NetEq's look-ahead). (This value is |
| // defined in Expand::overlap_length_.) |
| uint32_t expected_output_ts = |
| last_packet_send_timestamp_ - |
| rtc::CheckedDivExact(5 * output_sample_rate_hz, 8000); |
| |
| AudioFrame frame; |
| bool muted; |
| EXPECT_EQ(0, receiver_->GetAudio(output_sample_rate_hz, &frame, &muted)); |
| // Expect timestamp = 0 before first packet is inserted. |
| EXPECT_EQ(0u, frame.timestamp_); |
| for (int i = 0; i < 5; ++i) { |
| const int num_10ms_frames = InsertOnePacketOfSilence(info); |
| for (int k = 0; k < num_10ms_frames; ++k) { |
| EXPECT_EQ(0, |
| receiver_->GetAudio(output_sample_rate_hz, &frame, &muted)); |
| EXPECT_EQ(expected_output_ts, frame.timestamp_); |
| expected_output_ts += rtc::checked_cast<uint32_t>(10 * samples_per_ms); |
| EXPECT_EQ(10 * samples_per_ms, frame.samples_per_channel_); |
| EXPECT_EQ(output_sample_rate_hz, frame.sample_rate_hz_); |
| EXPECT_EQ(output_channels, frame.num_channels_); |
| EXPECT_EQ(AudioFrame::kNormalSpeech, frame.speech_type_); |
| EXPECT_EQ(expected_vad_activity, frame.vad_activity_); |
| EXPECT_FALSE(muted); |
| } |
| } |
| } |
| }; |
| |
| #if defined(WEBRTC_ANDROID) |
| #define MAYBE_VerifyAudioFramePCMU DISABLED_VerifyAudioFramePCMU |
| #else |
| #define MAYBE_VerifyAudioFramePCMU VerifyAudioFramePCMU |
| #endif |
| TEST_F(AcmReceiverTestFaxModeOldApi, MAYBE_VerifyAudioFramePCMU) { |
| RunVerifyAudioFrame({"PCMU", 8000, 1}); |
| } |
| |
| #if defined(WEBRTC_ANDROID) |
| #define MAYBE_VerifyAudioFrameISAC DISABLED_VerifyAudioFrameISAC |
| #else |
| #define MAYBE_VerifyAudioFrameISAC VerifyAudioFrameISAC |
| #endif |
| TEST_F(AcmReceiverTestFaxModeOldApi, MAYBE_VerifyAudioFrameISAC) { |
| RunVerifyAudioFrame({"ISAC", 16000, 1}); |
| } |
| |
| #if defined(WEBRTC_ANDROID) |
| #define MAYBE_VerifyAudioFrameOpus DISABLED_VerifyAudioFrameOpus |
| #else |
| #define MAYBE_VerifyAudioFrameOpus VerifyAudioFrameOpus |
| #endif |
| TEST_F(AcmReceiverTestFaxModeOldApi, MAYBE_VerifyAudioFrameOpus) { |
| RunVerifyAudioFrame({"opus", 48000, 2}); |
| } |
| |
| #if defined(WEBRTC_ANDROID) |
| #define MAYBE_PostdecodingVad DISABLED_PostdecodingVad |
| #else |
| #define MAYBE_PostdecodingVad PostdecodingVad |
| #endif |
| TEST_F(AcmReceiverTestOldApi, MAYBE_PostdecodingVad) { |
| EXPECT_TRUE(config_.neteq_config.enable_post_decode_vad); |
| constexpr int payload_type = 34; |
| const SdpAudioFormat codec = {"L16", 16000, 1}; |
| const AudioCodecInfo info = SetEncoder(payload_type, codec); |
| receiver_->SetCodecs({{payload_type, codec}}); |
| constexpr int kNumPackets = 5; |
| AudioFrame frame; |
| for (int n = 0; n < kNumPackets; ++n) { |
| const int num_10ms_frames = InsertOnePacketOfSilence(info); |
| for (int k = 0; k < num_10ms_frames; ++k) { |
| bool muted; |
| ASSERT_EQ(0, receiver_->GetAudio(info.sample_rate_hz, &frame, &muted)); |
| } |
| } |
| EXPECT_EQ(AudioFrame::kVadPassive, frame.vad_activity_); |
| } |
| |
| class AcmReceiverTestPostDecodeVadPassiveOldApi : public AcmReceiverTestOldApi { |
| protected: |
| AcmReceiverTestPostDecodeVadPassiveOldApi() { |
| config_.neteq_config.enable_post_decode_vad = false; |
| } |
| }; |
| |
| #if defined(WEBRTC_ANDROID) |
| #define MAYBE_PostdecodingVad DISABLED_PostdecodingVad |
| #else |
| #define MAYBE_PostdecodingVad PostdecodingVad |
| #endif |
| TEST_F(AcmReceiverTestPostDecodeVadPassiveOldApi, MAYBE_PostdecodingVad) { |
| EXPECT_FALSE(config_.neteq_config.enable_post_decode_vad); |
| constexpr int payload_type = 34; |
| const SdpAudioFormat codec = {"L16", 16000, 1}; |
| const AudioCodecInfo info = SetEncoder(payload_type, codec); |
| auto const value = encoder_factory_->QueryAudioEncoder(codec); |
| ASSERT_TRUE(value.has_value()); |
| receiver_->SetCodecs({{payload_type, codec}}); |
| const int kNumPackets = 5; |
| AudioFrame frame; |
| for (int n = 0; n < kNumPackets; ++n) { |
| const int num_10ms_frames = InsertOnePacketOfSilence(info); |
| for (int k = 0; k < num_10ms_frames; ++k) { |
| bool muted; |
| ASSERT_EQ(0, receiver_->GetAudio(info.sample_rate_hz, &frame, &muted)); |
| } |
| } |
| EXPECT_EQ(AudioFrame::kVadUnknown, frame.vad_activity_); |
| } |
| |
| #if defined(WEBRTC_ANDROID) |
| #define MAYBE_LastAudioCodec DISABLED_LastAudioCodec |
| #else |
| #define MAYBE_LastAudioCodec LastAudioCodec |
| #endif |
| #if defined(WEBRTC_CODEC_ISAC) |
| TEST_F(AcmReceiverTestOldApi, MAYBE_LastAudioCodec) { |
| const std::map<int, SdpAudioFormat> codecs = {{0, {"ISAC", 16000, 1}}, |
| {1, {"PCMA", 8000, 1}}, |
| {2, {"ISAC", 32000, 1}}, |
| {3, {"L16", 32000, 1}}}; |
| const std::map<int, int> cng_payload_types = { |
| {8000, 100}, {16000, 101}, {32000, 102}}; |
| { |
| std::map<int, SdpAudioFormat> receive_codecs = codecs; |
| for (const auto& cng_type : cng_payload_types) { |
| receive_codecs.emplace(std::make_pair( |
| cng_type.second, SdpAudioFormat("CN", cng_type.first, 1))); |
| } |
| receiver_->SetCodecs(receive_codecs); |
| } |
| |
| // No audio payload is received. |
| EXPECT_EQ(absl::nullopt, receiver_->LastDecoder()); |
| |
| // Start with sending DTX. |
| packet_sent_ = false; |
| InsertOnePacketOfSilence( |
| SetEncoder(0, codecs.at(0), cng_payload_types)); // Enough to test |
| // with one codec. |
| ASSERT_TRUE(packet_sent_); |
| EXPECT_EQ(AudioFrameType::kAudioFrameCN, last_frame_type_); |
| |
| // Has received, only, DTX. Last Audio codec is undefined. |
| EXPECT_EQ(absl::nullopt, receiver_->LastDecoder()); |
| EXPECT_EQ(absl::nullopt, receiver_->last_packet_sample_rate_hz()); |
| |
| for (size_t i = 0; i < codecs.size(); ++i) { |
| // Set DTX off to send audio payload. |
| packet_sent_ = false; |
| const int payload_type = rtc::checked_cast<int>(i); |
| const AudioCodecInfo info_without_cng = |
| SetEncoder(payload_type, codecs.at(i)); |
| InsertOnePacketOfSilence(info_without_cng); |
| |
| // Sanity check if Actually an audio payload received, and it should be |
| // of type "speech." |
| ASSERT_TRUE(packet_sent_); |
| ASSERT_EQ(AudioFrameType::kAudioFrameSpeech, last_frame_type_); |
| EXPECT_EQ(info_without_cng.sample_rate_hz, |
| receiver_->last_packet_sample_rate_hz()); |
| |
| // Set VAD on to send DTX. Then check if the "Last Audio codec" returns |
| // the expected codec. Encode repeatedly until a DTX is sent. |
| const AudioCodecInfo info_with_cng = |
| SetEncoder(payload_type, codecs.at(i), cng_payload_types); |
| while (last_frame_type_ != AudioFrameType::kAudioFrameCN) { |
| packet_sent_ = false; |
| InsertOnePacketOfSilence(info_with_cng); |
| ASSERT_TRUE(packet_sent_); |
| } |
| EXPECT_EQ(info_with_cng.sample_rate_hz, |
| receiver_->last_packet_sample_rate_hz()); |
| EXPECT_EQ(codecs.at(i), receiver_->LastDecoder()->second); |
| } |
| } |
| #endif |
| |
| // Check if the statistics are initialized correctly. Before any call to ACM |
| // all fields have to be zero. |
| #if defined(WEBRTC_ANDROID) |
| #define MAYBE_InitializedToZero DISABLED_InitializedToZero |
| #else |
| #define MAYBE_InitializedToZero InitializedToZero |
| #endif |
| TEST_F(AcmReceiverTestOldApi, MAYBE_InitializedToZero) { |
| AudioDecodingCallStats stats; |
| receiver_->GetDecodingCallStatistics(&stats); |
| EXPECT_EQ(0, stats.calls_to_neteq); |
| EXPECT_EQ(0, stats.calls_to_silence_generator); |
| EXPECT_EQ(0, stats.decoded_normal); |
| EXPECT_EQ(0, stats.decoded_cng); |
| EXPECT_EQ(0, stats.decoded_neteq_plc); |
| EXPECT_EQ(0, stats.decoded_plc_cng); |
| EXPECT_EQ(0, stats.decoded_muted_output); |
| } |
| |
| // Insert some packets and pull audio. Check statistics are valid. Then, |
| // simulate packet loss and check if PLC and PLC-to-CNG statistics are |
| // correctly updated. |
| #if defined(WEBRTC_ANDROID) |
| #define MAYBE_NetEqCalls DISABLED_NetEqCalls |
| #else |
| #define MAYBE_NetEqCalls NetEqCalls |
| #endif |
| TEST_F(AcmReceiverTestOldApi, MAYBE_NetEqCalls) { |
| AudioDecodingCallStats stats; |
| const int kNumNormalCalls = 10; |
| const int kSampleRateHz = 16000; |
| const int kNumSamples10ms = kSampleRateHz / 100; |
| const int kFrameSizeMs = 10; // Multiple of 10. |
| const int kFrameSizeSamples = kFrameSizeMs / 10 * kNumSamples10ms; |
| const int kPayloadSizeBytes = kFrameSizeSamples * sizeof(int16_t); |
| const uint8_t kPayloadType = 111; |
| RTPHeader rtp_header; |
| AudioFrame audio_frame; |
| bool muted; |
| |
| receiver_->SetCodecs( |
| {{kPayloadType, SdpAudioFormat("L16", kSampleRateHz, 1)}}); |
| rtp_header.sequenceNumber = 0xABCD; |
| rtp_header.timestamp = 0xABCDEF01; |
| rtp_header.payloadType = kPayloadType; |
| rtp_header.markerBit = false; |
| rtp_header.ssrc = 0x1234; |
| rtp_header.numCSRCs = 0; |
| rtp_header.payload_type_frequency = kSampleRateHz; |
| |
| for (int num_calls = 0; num_calls < kNumNormalCalls; ++num_calls) { |
| const uint8_t kPayload[kPayloadSizeBytes] = {0}; |
| ASSERT_EQ(0, receiver_->InsertPacket(rtp_header, kPayload)); |
| ++rtp_header.sequenceNumber; |
| rtp_header.timestamp += kFrameSizeSamples; |
| ASSERT_EQ(0, receiver_->GetAudio(-1, &audio_frame, &muted)); |
| EXPECT_FALSE(muted); |
| } |
| receiver_->GetDecodingCallStatistics(&stats); |
| EXPECT_EQ(kNumNormalCalls, stats.calls_to_neteq); |
| EXPECT_EQ(0, stats.calls_to_silence_generator); |
| EXPECT_EQ(kNumNormalCalls, stats.decoded_normal); |
| EXPECT_EQ(0, stats.decoded_cng); |
| EXPECT_EQ(0, stats.decoded_neteq_plc); |
| EXPECT_EQ(0, stats.decoded_plc_cng); |
| EXPECT_EQ(0, stats.decoded_muted_output); |
| |
| const int kNumPlc = 3; |
| const int kNumPlcCng = 5; |
| |
| // Simulate packet-loss. NetEq first performs PLC then PLC fades to CNG. |
| for (int n = 0; n < kNumPlc + kNumPlcCng; ++n) { |
| ASSERT_EQ(0, receiver_->GetAudio(-1, &audio_frame, &muted)); |
| EXPECT_FALSE(muted); |
| } |
| receiver_->GetDecodingCallStatistics(&stats); |
| EXPECT_EQ(kNumNormalCalls + kNumPlc + kNumPlcCng, stats.calls_to_neteq); |
| EXPECT_EQ(0, stats.calls_to_silence_generator); |
| EXPECT_EQ(kNumNormalCalls, stats.decoded_normal); |
| EXPECT_EQ(0, stats.decoded_cng); |
| EXPECT_EQ(kNumPlc, stats.decoded_neteq_plc); |
| EXPECT_EQ(kNumPlcCng, stats.decoded_plc_cng); |
| EXPECT_EQ(0, stats.decoded_muted_output); |
| // TODO(henrik.lundin) Add a test with muted state enabled. |
| } |
| |
| } // namespace acm2 |
| |
| } // namespace webrtc |