| /* |
| * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. |
| * |
| * Use of this source code is governed by a BSD-style license |
| * that can be found in the LICENSE file in the root of the source |
| * tree. An additional intellectual property rights grant can be found |
| * in the file PATENTS. All contributing project authors may |
| * be found in the AUTHORS file in the root of the source tree. |
| */ |
| #include <math.h> |
| #include <stdio.h> |
| |
| #include <algorithm> |
| #include <limits> |
| #include <memory> |
| #include <queue> |
| |
| #include "common_audio/include/audio_util.h" |
| #include "common_audio/resampler/include/push_resampler.h" |
| #include "common_audio/resampler/push_sinc_resampler.h" |
| #include "common_audio/signal_processing/include/signal_processing_library.h" |
| #include "modules/audio_processing/aec_dump/aec_dump_factory.h" |
| #include "modules/audio_processing/audio_processing_impl.h" |
| #include "modules/audio_processing/common.h" |
| #include "modules/audio_processing/include/audio_processing.h" |
| #include "modules/audio_processing/include/mock_audio_processing.h" |
| #include "modules/audio_processing/test/protobuf_utils.h" |
| #include "modules/audio_processing/test/test_utils.h" |
| #include "rtc_base/arraysize.h" |
| #include "rtc_base/checks.h" |
| #include "rtc_base/fake_clock.h" |
| #include "rtc_base/gtest_prod_util.h" |
| #include "rtc_base/ignore_wundef.h" |
| #include "rtc_base/numerics/safe_conversions.h" |
| #include "rtc_base/numerics/safe_minmax.h" |
| #include "rtc_base/protobuf_utils.h" |
| #include "rtc_base/ref_counted_object.h" |
| #include "rtc_base/strings/string_builder.h" |
| #include "rtc_base/swap_queue.h" |
| #include "rtc_base/system/arch.h" |
| #include "rtc_base/task_queue.h" |
| #include "rtc_base/thread.h" |
| #include "test/gtest.h" |
| #include "test/testsupport/file_utils.h" |
| |
| RTC_PUSH_IGNORING_WUNDEF() |
| #ifdef WEBRTC_ANDROID_PLATFORM_BUILD |
| #include "external/webrtc/webrtc/modules/audio_processing/test/unittest.pb.h" |
| #else |
| #include "modules/audio_processing/test/unittest.pb.h" |
| #endif |
| RTC_POP_IGNORING_WUNDEF() |
| |
| namespace webrtc { |
| namespace { |
| |
| // TODO(ekmeyerson): Switch to using StreamConfig and ProcessingConfig where |
| // applicable. |
| |
| // TODO(bjornv): This is not feasible until the functionality has been |
| // re-implemented; see comment at the bottom of this file. For now, the user has |
| // to hard code the |write_ref_data| value. |
| // When false, this will compare the output data with the results stored to |
| // file. This is the typical case. When the file should be updated, it can |
| // be set to true with the command-line switch --write_ref_data. |
| bool write_ref_data = false; |
| const int32_t kChannels[] = {1, 2}; |
| const int kSampleRates[] = {8000, 16000, 32000, 48000}; |
| |
| #if defined(WEBRTC_AUDIOPROC_FIXED_PROFILE) |
| // Android doesn't support 48kHz. |
| const int kProcessSampleRates[] = {8000, 16000, 32000}; |
| #elif defined(WEBRTC_AUDIOPROC_FLOAT_PROFILE) |
| const int kProcessSampleRates[] = {8000, 16000, 32000, 48000}; |
| #endif |
| |
| enum StreamDirection { kForward = 0, kReverse }; |
| |
| void ConvertToFloat(const int16_t* int_data, ChannelBuffer<float>* cb) { |
| ChannelBuffer<int16_t> cb_int(cb->num_frames(), |
| cb->num_channels()); |
| Deinterleave(int_data, |
| cb->num_frames(), |
| cb->num_channels(), |
| cb_int.channels()); |
| for (size_t i = 0; i < cb->num_channels(); ++i) { |
| S16ToFloat(cb_int.channels()[i], |
| cb->num_frames(), |
| cb->channels()[i]); |
| } |
| } |
| |
| void ConvertToFloat(const AudioFrame& frame, ChannelBuffer<float>* cb) { |
| ConvertToFloat(frame.data(), cb); |
| } |
| |
| // Number of channels including the keyboard channel. |
| size_t TotalChannelsFromLayout(AudioProcessing::ChannelLayout layout) { |
| switch (layout) { |
| case AudioProcessing::kMono: |
| return 1; |
| case AudioProcessing::kMonoAndKeyboard: |
| case AudioProcessing::kStereo: |
| return 2; |
| case AudioProcessing::kStereoAndKeyboard: |
| return 3; |
| } |
| RTC_NOTREACHED(); |
| return 0; |
| } |
| |
| int TruncateToMultipleOf10(int value) { |
| return (value / 10) * 10; |
| } |
| |
| void MixStereoToMono(const float* stereo, float* mono, |
| size_t samples_per_channel) { |
| for (size_t i = 0; i < samples_per_channel; ++i) |
| mono[i] = (stereo[i * 2] + stereo[i * 2 + 1]) / 2; |
| } |
| |
| void MixStereoToMono(const int16_t* stereo, int16_t* mono, |
| size_t samples_per_channel) { |
| for (size_t i = 0; i < samples_per_channel; ++i) |
| mono[i] = (stereo[i * 2] + stereo[i * 2 + 1]) >> 1; |
| } |
| |
| void CopyLeftToRightChannel(int16_t* stereo, size_t samples_per_channel) { |
| for (size_t i = 0; i < samples_per_channel; i++) { |
| stereo[i * 2 + 1] = stereo[i * 2]; |
| } |
| } |
| |
| void VerifyChannelsAreEqual(const int16_t* stereo, size_t samples_per_channel) { |
| for (size_t i = 0; i < samples_per_channel; i++) { |
| EXPECT_EQ(stereo[i * 2 + 1], stereo[i * 2]); |
| } |
| } |
| |
| void SetFrameTo(AudioFrame* frame, int16_t value) { |
| int16_t* frame_data = frame->mutable_data(); |
| for (size_t i = 0; i < frame->samples_per_channel_ * frame->num_channels_; |
| ++i) { |
| frame_data[i] = value; |
| } |
| } |
| |
| void SetFrameTo(AudioFrame* frame, int16_t left, int16_t right) { |
| ASSERT_EQ(2u, frame->num_channels_); |
| int16_t* frame_data = frame->mutable_data(); |
| for (size_t i = 0; i < frame->samples_per_channel_ * 2; i += 2) { |
| frame_data[i] = left; |
| frame_data[i + 1] = right; |
| } |
| } |
| |
| void ScaleFrame(AudioFrame* frame, float scale) { |
| int16_t* frame_data = frame->mutable_data(); |
| for (size_t i = 0; i < frame->samples_per_channel_ * frame->num_channels_; |
| ++i) { |
| frame_data[i] = FloatS16ToS16(frame_data[i] * scale); |
| } |
| } |
| |
| bool FrameDataAreEqual(const AudioFrame& frame1, const AudioFrame& frame2) { |
| if (frame1.samples_per_channel_ != frame2.samples_per_channel_) { |
| return false; |
| } |
| if (frame1.num_channels_ != frame2.num_channels_) { |
| return false; |
| } |
| if (memcmp(frame1.data(), frame2.data(), |
| frame1.samples_per_channel_ * frame1.num_channels_ * |
| sizeof(int16_t))) { |
| return false; |
| } |
| return true; |
| } |
| |
| void EnableAllAPComponents(AudioProcessing* ap) { |
| AudioProcessing::Config apm_config = ap->GetConfig(); |
| apm_config.echo_canceller.enabled = true; |
| #if defined(WEBRTC_AUDIOPROC_FIXED_PROFILE) |
| apm_config.echo_canceller.mobile_mode = true; |
| |
| EXPECT_NOERR(ap->gain_control()->set_mode(GainControl::kAdaptiveDigital)); |
| EXPECT_NOERR(ap->gain_control()->Enable(true)); |
| #elif defined(WEBRTC_AUDIOPROC_FLOAT_PROFILE) |
| apm_config.echo_canceller.mobile_mode = false; |
| apm_config.echo_canceller.legacy_moderate_suppression_level = true; |
| |
| EXPECT_NOERR(ap->gain_control()->set_mode(GainControl::kAdaptiveAnalog)); |
| EXPECT_NOERR(ap->gain_control()->set_analog_level_limits(0, 255)); |
| EXPECT_NOERR(ap->gain_control()->Enable(true)); |
| #endif |
| |
| apm_config.high_pass_filter.enabled = true; |
| apm_config.level_estimation.enabled = true; |
| ap->ApplyConfig(apm_config); |
| |
| EXPECT_NOERR(ap->level_estimator()->Enable(true)); |
| EXPECT_NOERR(ap->noise_suppression()->Enable(true)); |
| |
| EXPECT_NOERR(ap->voice_detection()->Enable(true)); |
| } |
| |
| // These functions are only used by ApmTest.Process. |
| template <class T> |
| T AbsValue(T a) { |
| return a > 0 ? a: -a; |
| } |
| |
| int16_t MaxAudioFrame(const AudioFrame& frame) { |
| const size_t length = frame.samples_per_channel_ * frame.num_channels_; |
| const int16_t* frame_data = frame.data(); |
| int16_t max_data = AbsValue(frame_data[0]); |
| for (size_t i = 1; i < length; i++) { |
| max_data = std::max(max_data, AbsValue(frame_data[i])); |
| } |
| |
| return max_data; |
| } |
| |
| void OpenFileAndWriteMessage(const std::string& filename, |
| const MessageLite& msg) { |
| FILE* file = fopen(filename.c_str(), "wb"); |
| ASSERT_TRUE(file != NULL); |
| |
| int32_t size = rtc::checked_cast<int32_t>(msg.ByteSizeLong()); |
| ASSERT_GT(size, 0); |
| std::unique_ptr<uint8_t[]> array(new uint8_t[size]); |
| ASSERT_TRUE(msg.SerializeToArray(array.get(), size)); |
| |
| ASSERT_EQ(1u, fwrite(&size, sizeof(size), 1, file)); |
| ASSERT_EQ(static_cast<size_t>(size), |
| fwrite(array.get(), sizeof(array[0]), size, file)); |
| fclose(file); |
| } |
| |
| std::string ResourceFilePath(const std::string& name, int sample_rate_hz) { |
| rtc::StringBuilder ss; |
| // Resource files are all stereo. |
| ss << name << sample_rate_hz / 1000 << "_stereo"; |
| return test::ResourcePath(ss.str(), "pcm"); |
| } |
| |
| // Temporary filenames unique to this process. Used to be able to run these |
| // tests in parallel as each process needs to be running in isolation they can't |
| // have competing filenames. |
| std::map<std::string, std::string> temp_filenames; |
| |
| std::string OutputFilePath(const std::string& name, |
| int input_rate, |
| int output_rate, |
| int reverse_input_rate, |
| int reverse_output_rate, |
| size_t num_input_channels, |
| size_t num_output_channels, |
| size_t num_reverse_input_channels, |
| size_t num_reverse_output_channels, |
| StreamDirection file_direction) { |
| rtc::StringBuilder ss; |
| ss << name << "_i" << num_input_channels << "_" << input_rate / 1000 << "_ir" |
| << num_reverse_input_channels << "_" << reverse_input_rate / 1000 << "_"; |
| if (num_output_channels == 1) { |
| ss << "mono"; |
| } else if (num_output_channels == 2) { |
| ss << "stereo"; |
| } else { |
| RTC_NOTREACHED(); |
| } |
| ss << output_rate / 1000; |
| if (num_reverse_output_channels == 1) { |
| ss << "_rmono"; |
| } else if (num_reverse_output_channels == 2) { |
| ss << "_rstereo"; |
| } else { |
| RTC_NOTREACHED(); |
| } |
| ss << reverse_output_rate / 1000; |
| ss << "_d" << file_direction << "_pcm"; |
| |
| std::string filename = ss.str(); |
| if (temp_filenames[filename].empty()) |
| temp_filenames[filename] = test::TempFilename(test::OutputPath(), filename); |
| return temp_filenames[filename]; |
| } |
| |
| void ClearTempFiles() { |
| for (auto& kv : temp_filenames) |
| remove(kv.second.c_str()); |
| } |
| |
| // Only remove "out" files. Keep "ref" files. |
| void ClearTempOutFiles() { |
| for (auto it = temp_filenames.begin(); it != temp_filenames.end();) { |
| const std::string& filename = it->first; |
| if (filename.substr(0, 3).compare("out") == 0) { |
| remove(it->second.c_str()); |
| temp_filenames.erase(it++); |
| } else { |
| it++; |
| } |
| } |
| } |
| |
| void OpenFileAndReadMessage(const std::string& filename, MessageLite* msg) { |
| FILE* file = fopen(filename.c_str(), "rb"); |
| ASSERT_TRUE(file != NULL); |
| ReadMessageFromFile(file, msg); |
| fclose(file); |
| } |
| |
| // Reads a 10 ms chunk of int16 interleaved audio from the given (assumed |
| // stereo) file, converts to deinterleaved float (optionally downmixing) and |
| // returns the result in |cb|. Returns false if the file ended (or on error) and |
| // true otherwise. |
| // |
| // |int_data| and |float_data| are just temporary space that must be |
| // sufficiently large to hold the 10 ms chunk. |
| bool ReadChunk(FILE* file, int16_t* int_data, float* float_data, |
| ChannelBuffer<float>* cb) { |
| // The files always contain stereo audio. |
| size_t frame_size = cb->num_frames() * 2; |
| size_t read_count = fread(int_data, sizeof(int16_t), frame_size, file); |
| if (read_count != frame_size) { |
| // Check that the file really ended. |
| RTC_DCHECK(feof(file)); |
| return false; // This is expected. |
| } |
| |
| S16ToFloat(int_data, frame_size, float_data); |
| if (cb->num_channels() == 1) { |
| MixStereoToMono(float_data, cb->channels()[0], cb->num_frames()); |
| } else { |
| Deinterleave(float_data, cb->num_frames(), 2, |
| cb->channels()); |
| } |
| |
| return true; |
| } |
| |
| class ApmTest : public ::testing::Test { |
| protected: |
| ApmTest(); |
| virtual void SetUp(); |
| virtual void TearDown(); |
| |
| static void SetUpTestCase() { |
| } |
| |
| static void TearDownTestCase() { |
| ClearTempFiles(); |
| } |
| |
| // Used to select between int and float interface tests. |
| enum Format { |
| kIntFormat, |
| kFloatFormat |
| }; |
| |
| void Init(int sample_rate_hz, |
| int output_sample_rate_hz, |
| int reverse_sample_rate_hz, |
| size_t num_input_channels, |
| size_t num_output_channels, |
| size_t num_reverse_channels, |
| bool open_output_file); |
| void Init(AudioProcessing* ap); |
| void EnableAllComponents(); |
| bool ReadFrame(FILE* file, AudioFrame* frame); |
| bool ReadFrame(FILE* file, AudioFrame* frame, ChannelBuffer<float>* cb); |
| void ReadFrameWithRewind(FILE* file, AudioFrame* frame); |
| void ReadFrameWithRewind(FILE* file, AudioFrame* frame, |
| ChannelBuffer<float>* cb); |
| void ProcessWithDefaultStreamParameters(AudioFrame* frame); |
| void ProcessDelayVerificationTest(int delay_ms, int system_delay_ms, |
| int delay_min, int delay_max); |
| void TestChangingChannelsInt16Interface( |
| size_t num_channels, |
| AudioProcessing::Error expected_return); |
| void TestChangingForwardChannels(size_t num_in_channels, |
| size_t num_out_channels, |
| AudioProcessing::Error expected_return); |
| void TestChangingReverseChannels(size_t num_rev_channels, |
| AudioProcessing::Error expected_return); |
| void RunQuantizedVolumeDoesNotGetStuckTest(int sample_rate); |
| void RunManualVolumeChangeIsPossibleTest(int sample_rate); |
| void StreamParametersTest(Format format); |
| int ProcessStreamChooser(Format format); |
| int AnalyzeReverseStreamChooser(Format format); |
| void ProcessDebugDump(const std::string& in_filename, |
| const std::string& out_filename, |
| Format format, |
| int max_size_bytes); |
| void VerifyDebugDumpTest(Format format); |
| |
| const std::string output_path_; |
| const std::string ref_filename_; |
| std::unique_ptr<AudioProcessing> apm_; |
| AudioFrame* frame_; |
| AudioFrame* revframe_; |
| std::unique_ptr<ChannelBuffer<float> > float_cb_; |
| std::unique_ptr<ChannelBuffer<float> > revfloat_cb_; |
| int output_sample_rate_hz_; |
| size_t num_output_channels_; |
| FILE* far_file_; |
| FILE* near_file_; |
| FILE* out_file_; |
| }; |
| |
| ApmTest::ApmTest() |
| : output_path_(test::OutputPath()), |
| #if defined(WEBRTC_AUDIOPROC_FIXED_PROFILE) |
| ref_filename_(test::ResourcePath("audio_processing/output_data_fixed", |
| "pb")), |
| #elif defined(WEBRTC_AUDIOPROC_FLOAT_PROFILE) |
| #if defined(WEBRTC_MAC) |
| // A different file for Mac is needed because on this platform the AEC |
| // constant |kFixedDelayMs| value is 20 and not 50 as it is on the rest. |
| ref_filename_(test::ResourcePath("audio_processing/output_data_mac", |
| "pb")), |
| #else |
| ref_filename_(test::ResourcePath("audio_processing/output_data_float", |
| "pb")), |
| #endif |
| #endif |
| frame_(NULL), |
| revframe_(NULL), |
| output_sample_rate_hz_(0), |
| num_output_channels_(0), |
| far_file_(NULL), |
| near_file_(NULL), |
| out_file_(NULL) { |
| Config config; |
| config.Set<ExperimentalAgc>(new ExperimentalAgc(false)); |
| apm_.reset(AudioProcessingBuilder().Create(config)); |
| } |
| |
| void ApmTest::SetUp() { |
| ASSERT_TRUE(apm_.get() != NULL); |
| |
| frame_ = new AudioFrame(); |
| revframe_ = new AudioFrame(); |
| |
| Init(32000, 32000, 32000, 2, 2, 2, false); |
| } |
| |
| void ApmTest::TearDown() { |
| if (frame_) { |
| delete frame_; |
| } |
| frame_ = NULL; |
| |
| if (revframe_) { |
| delete revframe_; |
| } |
| revframe_ = NULL; |
| |
| if (far_file_) { |
| ASSERT_EQ(0, fclose(far_file_)); |
| } |
| far_file_ = NULL; |
| |
| if (near_file_) { |
| ASSERT_EQ(0, fclose(near_file_)); |
| } |
| near_file_ = NULL; |
| |
| if (out_file_) { |
| ASSERT_EQ(0, fclose(out_file_)); |
| } |
| out_file_ = NULL; |
| } |
| |
| void ApmTest::Init(AudioProcessing* ap) { |
| ASSERT_EQ(kNoErr, |
| ap->Initialize( |
| {{{frame_->sample_rate_hz_, frame_->num_channels_}, |
| {output_sample_rate_hz_, num_output_channels_}, |
| {revframe_->sample_rate_hz_, revframe_->num_channels_}, |
| {revframe_->sample_rate_hz_, revframe_->num_channels_}}})); |
| } |
| |
| void ApmTest::Init(int sample_rate_hz, |
| int output_sample_rate_hz, |
| int reverse_sample_rate_hz, |
| size_t num_input_channels, |
| size_t num_output_channels, |
| size_t num_reverse_channels, |
| bool open_output_file) { |
| SetContainerFormat(sample_rate_hz, num_input_channels, frame_, &float_cb_); |
| output_sample_rate_hz_ = output_sample_rate_hz; |
| num_output_channels_ = num_output_channels; |
| |
| SetContainerFormat(reverse_sample_rate_hz, num_reverse_channels, revframe_, |
| &revfloat_cb_); |
| Init(apm_.get()); |
| |
| if (far_file_) { |
| ASSERT_EQ(0, fclose(far_file_)); |
| } |
| std::string filename = ResourceFilePath("far", sample_rate_hz); |
| far_file_ = fopen(filename.c_str(), "rb"); |
| ASSERT_TRUE(far_file_ != NULL) << "Could not open file " << |
| filename << "\n"; |
| |
| if (near_file_) { |
| ASSERT_EQ(0, fclose(near_file_)); |
| } |
| filename = ResourceFilePath("near", sample_rate_hz); |
| near_file_ = fopen(filename.c_str(), "rb"); |
| ASSERT_TRUE(near_file_ != NULL) << "Could not open file " << |
| filename << "\n"; |
| |
| if (open_output_file) { |
| if (out_file_) { |
| ASSERT_EQ(0, fclose(out_file_)); |
| } |
| filename = OutputFilePath( |
| "out", sample_rate_hz, output_sample_rate_hz, reverse_sample_rate_hz, |
| reverse_sample_rate_hz, num_input_channels, num_output_channels, |
| num_reverse_channels, num_reverse_channels, kForward); |
| out_file_ = fopen(filename.c_str(), "wb"); |
| ASSERT_TRUE(out_file_ != NULL) << "Could not open file " << |
| filename << "\n"; |
| } |
| } |
| |
| void ApmTest::EnableAllComponents() { |
| EnableAllAPComponents(apm_.get()); |
| } |
| |
| bool ApmTest::ReadFrame(FILE* file, AudioFrame* frame, |
| ChannelBuffer<float>* cb) { |
| // The files always contain stereo audio. |
| size_t frame_size = frame->samples_per_channel_ * 2; |
| size_t read_count = fread(frame->mutable_data(), |
| sizeof(int16_t), |
| frame_size, |
| file); |
| if (read_count != frame_size) { |
| // Check that the file really ended. |
| EXPECT_NE(0, feof(file)); |
| return false; // This is expected. |
| } |
| |
| if (frame->num_channels_ == 1) { |
| MixStereoToMono(frame->data(), frame->mutable_data(), |
| frame->samples_per_channel_); |
| } |
| |
| if (cb) { |
| ConvertToFloat(*frame, cb); |
| } |
| return true; |
| } |
| |
| bool ApmTest::ReadFrame(FILE* file, AudioFrame* frame) { |
| return ReadFrame(file, frame, NULL); |
| } |
| |
| // If the end of the file has been reached, rewind it and attempt to read the |
| // frame again. |
| void ApmTest::ReadFrameWithRewind(FILE* file, AudioFrame* frame, |
| ChannelBuffer<float>* cb) { |
| if (!ReadFrame(near_file_, frame_, cb)) { |
| rewind(near_file_); |
| ASSERT_TRUE(ReadFrame(near_file_, frame_, cb)); |
| } |
| } |
| |
| void ApmTest::ReadFrameWithRewind(FILE* file, AudioFrame* frame) { |
| ReadFrameWithRewind(file, frame, NULL); |
| } |
| |
| void ApmTest::ProcessWithDefaultStreamParameters(AudioFrame* frame) { |
| EXPECT_EQ(apm_->kNoError, apm_->set_stream_delay_ms(0)); |
| EXPECT_EQ(apm_->kNoError, |
| apm_->gain_control()->set_stream_analog_level(127)); |
| EXPECT_EQ(apm_->kNoError, apm_->ProcessStream(frame)); |
| } |
| |
| int ApmTest::ProcessStreamChooser(Format format) { |
| if (format == kIntFormat) { |
| return apm_->ProcessStream(frame_); |
| } |
| return apm_->ProcessStream(float_cb_->channels(), |
| frame_->samples_per_channel_, |
| frame_->sample_rate_hz_, |
| LayoutFromChannels(frame_->num_channels_), |
| output_sample_rate_hz_, |
| LayoutFromChannels(num_output_channels_), |
| float_cb_->channels()); |
| } |
| |
| int ApmTest::AnalyzeReverseStreamChooser(Format format) { |
| if (format == kIntFormat) { |
| return apm_->ProcessReverseStream(revframe_); |
| } |
| return apm_->AnalyzeReverseStream( |
| revfloat_cb_->channels(), |
| revframe_->samples_per_channel_, |
| revframe_->sample_rate_hz_, |
| LayoutFromChannels(revframe_->num_channels_)); |
| } |
| |
| void ApmTest::ProcessDelayVerificationTest(int delay_ms, int system_delay_ms, |
| int delay_min, int delay_max) { |
| // The |revframe_| and |frame_| should include the proper frame information, |
| // hence can be used for extracting information. |
| AudioFrame tmp_frame; |
| std::queue<AudioFrame*> frame_queue; |
| bool causal = true; |
| |
| tmp_frame.CopyFrom(*revframe_); |
| SetFrameTo(&tmp_frame, 0); |
| |
| EXPECT_EQ(apm_->kNoError, apm_->Initialize()); |
| // Initialize the |frame_queue| with empty frames. |
| int frame_delay = delay_ms / 10; |
| while (frame_delay < 0) { |
| AudioFrame* frame = new AudioFrame(); |
| frame->CopyFrom(tmp_frame); |
| frame_queue.push(frame); |
| frame_delay++; |
| causal = false; |
| } |
| while (frame_delay > 0) { |
| AudioFrame* frame = new AudioFrame(); |
| frame->CopyFrom(tmp_frame); |
| frame_queue.push(frame); |
| frame_delay--; |
| } |
| // Run for 4.5 seconds, skipping statistics from the first 2.5 seconds. We |
| // need enough frames with audio to have reliable estimates, but as few as |
| // possible to keep processing time down. 4.5 seconds seemed to be a good |
| // compromise for this recording. |
| for (int frame_count = 0; frame_count < 450; ++frame_count) { |
| AudioFrame* frame = new AudioFrame(); |
| frame->CopyFrom(tmp_frame); |
| // Use the near end recording, since that has more speech in it. |
| ASSERT_TRUE(ReadFrame(near_file_, frame)); |
| frame_queue.push(frame); |
| AudioFrame* reverse_frame = frame; |
| AudioFrame* process_frame = frame_queue.front(); |
| if (!causal) { |
| reverse_frame = frame_queue.front(); |
| // When we call ProcessStream() the frame is modified, so we can't use the |
| // pointer directly when things are non-causal. Use an intermediate frame |
| // and copy the data. |
| process_frame = &tmp_frame; |
| process_frame->CopyFrom(*frame); |
| } |
| EXPECT_EQ(apm_->kNoError, apm_->ProcessReverseStream(reverse_frame)); |
| EXPECT_EQ(apm_->kNoError, apm_->set_stream_delay_ms(system_delay_ms)); |
| EXPECT_EQ(apm_->kNoError, apm_->ProcessStream(process_frame)); |
| frame = frame_queue.front(); |
| frame_queue.pop(); |
| delete frame; |
| |
| if (frame_count == 250) { |
| // Discard the first delay metrics to avoid convergence effects. |
| static_cast<void>(apm_->GetStatistics(true /* has_remote_tracks */)); |
| } |
| } |
| |
| rewind(near_file_); |
| while (!frame_queue.empty()) { |
| AudioFrame* frame = frame_queue.front(); |
| frame_queue.pop(); |
| delete frame; |
| } |
| // Calculate expected delay estimate and acceptable regions. Further, |
| // limit them w.r.t. AEC delay estimation support. |
| const size_t samples_per_ms = |
| rtc::SafeMin<size_t>(16u, frame_->samples_per_channel_ / 10); |
| const int expected_median = |
| rtc::SafeClamp<int>(delay_ms - system_delay_ms, delay_min, delay_max); |
| const int expected_median_high = rtc::SafeClamp<int>( |
| expected_median + rtc::dchecked_cast<int>(96 / samples_per_ms), delay_min, |
| delay_max); |
| const int expected_median_low = rtc::SafeClamp<int>( |
| expected_median - rtc::dchecked_cast<int>(96 / samples_per_ms), delay_min, |
| delay_max); |
| // Verify delay metrics. |
| AudioProcessingStats stats = |
| apm_->GetStatistics(true /* has_remote_tracks */); |
| ASSERT_TRUE(stats.delay_median_ms.has_value()); |
| int32_t median = *stats.delay_median_ms; |
| EXPECT_GE(expected_median_high, median); |
| EXPECT_LE(expected_median_low, median); |
| } |
| |
| void ApmTest::StreamParametersTest(Format format) { |
| // No errors when the components are disabled. |
| EXPECT_EQ(apm_->kNoError, ProcessStreamChooser(format)); |
| |
| // -- Missing AGC level -- |
| EXPECT_EQ(apm_->kNoError, apm_->gain_control()->Enable(true)); |
| EXPECT_EQ(apm_->kStreamParameterNotSetError, |
| ProcessStreamChooser(format)); |
| |
| // Resets after successful ProcessStream(). |
| EXPECT_EQ(apm_->kNoError, |
| apm_->gain_control()->set_stream_analog_level(127)); |
| EXPECT_EQ(apm_->kNoError, ProcessStreamChooser(format)); |
| EXPECT_EQ(apm_->kStreamParameterNotSetError, |
| ProcessStreamChooser(format)); |
| |
| // Other stream parameters set correctly. |
| AudioProcessing::Config apm_config = apm_->GetConfig(); |
| apm_config.echo_canceller.enabled = true; |
| apm_config.echo_canceller.mobile_mode = false; |
| apm_->ApplyConfig(apm_config); |
| EXPECT_EQ(apm_->kNoError, apm_->set_stream_delay_ms(100)); |
| EXPECT_EQ(apm_->kStreamParameterNotSetError, |
| ProcessStreamChooser(format)); |
| EXPECT_EQ(apm_->kNoError, apm_->gain_control()->Enable(false)); |
| |
| // -- Missing delay -- |
| EXPECT_EQ(apm_->kNoError, ProcessStreamChooser(format)); |
| EXPECT_EQ(apm_->kStreamParameterNotSetError, |
| ProcessStreamChooser(format)); |
| |
| // Resets after successful ProcessStream(). |
| EXPECT_EQ(apm_->kNoError, apm_->set_stream_delay_ms(100)); |
| EXPECT_EQ(apm_->kNoError, ProcessStreamChooser(format)); |
| EXPECT_EQ(apm_->kStreamParameterNotSetError, |
| ProcessStreamChooser(format)); |
| |
| // Other stream parameters set correctly. |
| EXPECT_EQ(apm_->kNoError, apm_->gain_control()->Enable(true)); |
| EXPECT_EQ(apm_->kNoError, |
| apm_->gain_control()->set_stream_analog_level(127)); |
| EXPECT_EQ(apm_->kStreamParameterNotSetError, |
| ProcessStreamChooser(format)); |
| EXPECT_EQ(apm_->kNoError, apm_->gain_control()->Enable(false)); |
| |
| // -- No stream parameters -- |
| EXPECT_EQ(apm_->kNoError, |
| AnalyzeReverseStreamChooser(format)); |
| EXPECT_EQ(apm_->kStreamParameterNotSetError, |
| ProcessStreamChooser(format)); |
| |
| // -- All there -- |
| EXPECT_EQ(apm_->kNoError, apm_->set_stream_delay_ms(100)); |
| EXPECT_EQ(apm_->kNoError, |
| apm_->gain_control()->set_stream_analog_level(127)); |
| EXPECT_EQ(apm_->kNoError, ProcessStreamChooser(format)); |
| } |
| |
| TEST_F(ApmTest, StreamParametersInt) { |
| StreamParametersTest(kIntFormat); |
| } |
| |
| TEST_F(ApmTest, StreamParametersFloat) { |
| StreamParametersTest(kFloatFormat); |
| } |
| |
| TEST_F(ApmTest, DefaultDelayOffsetIsZero) { |
| EXPECT_EQ(0, apm_->delay_offset_ms()); |
| EXPECT_EQ(apm_->kNoError, apm_->set_stream_delay_ms(50)); |
| EXPECT_EQ(50, apm_->stream_delay_ms()); |
| } |
| |
| TEST_F(ApmTest, DelayOffsetWithLimitsIsSetProperly) { |
| // High limit of 500 ms. |
| apm_->set_delay_offset_ms(100); |
| EXPECT_EQ(100, apm_->delay_offset_ms()); |
| EXPECT_EQ(apm_->kBadStreamParameterWarning, apm_->set_stream_delay_ms(450)); |
| EXPECT_EQ(500, apm_->stream_delay_ms()); |
| EXPECT_EQ(apm_->kNoError, apm_->set_stream_delay_ms(100)); |
| EXPECT_EQ(200, apm_->stream_delay_ms()); |
| |
| // Low limit of 0 ms. |
| apm_->set_delay_offset_ms(-50); |
| EXPECT_EQ(-50, apm_->delay_offset_ms()); |
| EXPECT_EQ(apm_->kBadStreamParameterWarning, apm_->set_stream_delay_ms(20)); |
| EXPECT_EQ(0, apm_->stream_delay_ms()); |
| EXPECT_EQ(apm_->kNoError, apm_->set_stream_delay_ms(100)); |
| EXPECT_EQ(50, apm_->stream_delay_ms()); |
| } |
| |
| void ApmTest::TestChangingChannelsInt16Interface( |
| size_t num_channels, |
| AudioProcessing::Error expected_return) { |
| frame_->num_channels_ = num_channels; |
| EXPECT_EQ(expected_return, apm_->ProcessStream(frame_)); |
| EXPECT_EQ(expected_return, apm_->ProcessReverseStream(frame_)); |
| } |
| |
| void ApmTest::TestChangingForwardChannels( |
| size_t num_in_channels, |
| size_t num_out_channels, |
| AudioProcessing::Error expected_return) { |
| const StreamConfig input_stream = {frame_->sample_rate_hz_, num_in_channels}; |
| const StreamConfig output_stream = {output_sample_rate_hz_, num_out_channels}; |
| |
| EXPECT_EQ(expected_return, |
| apm_->ProcessStream(float_cb_->channels(), input_stream, |
| output_stream, float_cb_->channels())); |
| } |
| |
| void ApmTest::TestChangingReverseChannels( |
| size_t num_rev_channels, |
| AudioProcessing::Error expected_return) { |
| const ProcessingConfig processing_config = { |
| {{frame_->sample_rate_hz_, apm_->num_input_channels()}, |
| {output_sample_rate_hz_, apm_->num_output_channels()}, |
| {frame_->sample_rate_hz_, num_rev_channels}, |
| {frame_->sample_rate_hz_, num_rev_channels}}}; |
| |
| EXPECT_EQ( |
| expected_return, |
| apm_->ProcessReverseStream( |
| float_cb_->channels(), processing_config.reverse_input_stream(), |
| processing_config.reverse_output_stream(), float_cb_->channels())); |
| } |
| |
| TEST_F(ApmTest, ChannelsInt16Interface) { |
| // Testing number of invalid and valid channels. |
| Init(16000, 16000, 16000, 4, 4, 4, false); |
| |
| TestChangingChannelsInt16Interface(0, apm_->kBadNumberChannelsError); |
| |
| for (size_t i = 1; i < 4; i++) { |
| TestChangingChannelsInt16Interface(i, kNoErr); |
| EXPECT_EQ(i, apm_->num_input_channels()); |
| } |
| } |
| |
| TEST_F(ApmTest, Channels) { |
| // Testing number of invalid and valid channels. |
| Init(16000, 16000, 16000, 4, 4, 4, false); |
| |
| TestChangingForwardChannels(0, 1, apm_->kBadNumberChannelsError); |
| TestChangingReverseChannels(0, apm_->kBadNumberChannelsError); |
| |
| for (size_t i = 1; i < 4; ++i) { |
| for (size_t j = 0; j < 1; ++j) { |
| // Output channels much be one or match input channels. |
| if (j == 1 || i == j) { |
| TestChangingForwardChannels(i, j, kNoErr); |
| TestChangingReverseChannels(i, kNoErr); |
| |
| EXPECT_EQ(i, apm_->num_input_channels()); |
| EXPECT_EQ(j, apm_->num_output_channels()); |
| // The number of reverse channels used for processing to is always 1. |
| EXPECT_EQ(1u, apm_->num_reverse_channels()); |
| } else { |
| TestChangingForwardChannels(i, j, |
| AudioProcessing::kBadNumberChannelsError); |
| } |
| } |
| } |
| } |
| |
| TEST_F(ApmTest, SampleRatesInt) { |
| // Testing invalid sample rates |
| SetContainerFormat(10000, 2, frame_, &float_cb_); |
| EXPECT_EQ(apm_->kBadSampleRateError, ProcessStreamChooser(kIntFormat)); |
| // Testing valid sample rates |
| int fs[] = {8000, 16000, 32000, 48000}; |
| for (size_t i = 0; i < arraysize(fs); i++) { |
| SetContainerFormat(fs[i], 2, frame_, &float_cb_); |
| EXPECT_NOERR(ProcessStreamChooser(kIntFormat)); |
| } |
| } |
| |
| TEST_F(ApmTest, DISABLED_EchoCancellationReportsCorrectDelays) { |
| // TODO(bjornv): Fix this test to work with DA-AEC. |
| // Enable AEC only. |
| AudioProcessing::Config apm_config = apm_->GetConfig(); |
| apm_config.echo_canceller.enabled = true; |
| apm_config.echo_canceller.mobile_mode = false; |
| apm_->ApplyConfig(apm_config); |
| Config config; |
| config.Set<DelayAgnostic>(new DelayAgnostic(false)); |
| apm_->SetExtraOptions(config); |
| |
| // Internally in the AEC the amount of lookahead the delay estimation can |
| // handle is 15 blocks and the maximum delay is set to 60 blocks. |
| const int kLookaheadBlocks = 15; |
| const int kMaxDelayBlocks = 60; |
| // The AEC has a startup time before it actually starts to process. This |
| // procedure can flush the internal far-end buffer, which of course affects |
| // the delay estimation. Therefore, we set a system_delay high enough to |
| // avoid that. The smallest system_delay you can report without flushing the |
| // buffer is 66 ms in 8 kHz. |
| // |
| // It is known that for 16 kHz (and 32 kHz) sampling frequency there is an |
| // additional stuffing of 8 ms on the fly, but it seems to have no impact on |
| // delay estimation. This should be noted though. In case of test failure, |
| // this could be the cause. |
| const int kSystemDelayMs = 66; |
| // Test a couple of corner cases and verify that the estimated delay is |
| // within a valid region (set to +-1.5 blocks). Note that these cases are |
| // sampling frequency dependent. |
| for (size_t i = 0; i < arraysize(kProcessSampleRates); i++) { |
| Init(kProcessSampleRates[i], |
| kProcessSampleRates[i], |
| kProcessSampleRates[i], |
| 2, |
| 2, |
| 2, |
| false); |
| // Sampling frequency dependent variables. |
| const int num_ms_per_block = |
| std::max(4, static_cast<int>(640 / frame_->samples_per_channel_)); |
| const int delay_min_ms = -kLookaheadBlocks * num_ms_per_block; |
| const int delay_max_ms = (kMaxDelayBlocks - 1) * num_ms_per_block; |
| |
| // 1) Verify correct delay estimate at lookahead boundary. |
| int delay_ms = TruncateToMultipleOf10(kSystemDelayMs + delay_min_ms); |
| ProcessDelayVerificationTest(delay_ms, kSystemDelayMs, delay_min_ms, |
| delay_max_ms); |
| // 2) A delay less than maximum lookahead should give an delay estimate at |
| // the boundary (= -kLookaheadBlocks * num_ms_per_block). |
| delay_ms -= 20; |
| ProcessDelayVerificationTest(delay_ms, kSystemDelayMs, delay_min_ms, |
| delay_max_ms); |
| // 3) Three values around zero delay. Note that we need to compensate for |
| // the fake system_delay. |
| delay_ms = TruncateToMultipleOf10(kSystemDelayMs - 10); |
| ProcessDelayVerificationTest(delay_ms, kSystemDelayMs, delay_min_ms, |
| delay_max_ms); |
| delay_ms = TruncateToMultipleOf10(kSystemDelayMs); |
| ProcessDelayVerificationTest(delay_ms, kSystemDelayMs, delay_min_ms, |
| delay_max_ms); |
| delay_ms = TruncateToMultipleOf10(kSystemDelayMs + 10); |
| ProcessDelayVerificationTest(delay_ms, kSystemDelayMs, delay_min_ms, |
| delay_max_ms); |
| // 4) Verify correct delay estimate at maximum delay boundary. |
| delay_ms = TruncateToMultipleOf10(kSystemDelayMs + delay_max_ms); |
| ProcessDelayVerificationTest(delay_ms, kSystemDelayMs, delay_min_ms, |
| delay_max_ms); |
| // 5) A delay above the maximum delay should give an estimate at the |
| // boundary (= (kMaxDelayBlocks - 1) * num_ms_per_block). |
| delay_ms += 20; |
| ProcessDelayVerificationTest(delay_ms, kSystemDelayMs, delay_min_ms, |
| delay_max_ms); |
| } |
| } |
| |
| TEST_F(ApmTest, GainControl) { |
| // Testing gain modes |
| EXPECT_EQ(apm_->kNoError, |
| apm_->gain_control()->set_mode( |
| apm_->gain_control()->mode())); |
| |
| GainControl::Mode mode[] = { |
| GainControl::kAdaptiveAnalog, |
| GainControl::kAdaptiveDigital, |
| GainControl::kFixedDigital |
| }; |
| for (size_t i = 0; i < arraysize(mode); i++) { |
| EXPECT_EQ(apm_->kNoError, |
| apm_->gain_control()->set_mode(mode[i])); |
| EXPECT_EQ(mode[i], apm_->gain_control()->mode()); |
| } |
| // Testing invalid target levels |
| EXPECT_EQ(apm_->kBadParameterError, |
| apm_->gain_control()->set_target_level_dbfs(-3)); |
| EXPECT_EQ(apm_->kBadParameterError, |
| apm_->gain_control()->set_target_level_dbfs(-40)); |
| // Testing valid target levels |
| EXPECT_EQ(apm_->kNoError, |
| apm_->gain_control()->set_target_level_dbfs( |
| apm_->gain_control()->target_level_dbfs())); |
| |
| int level_dbfs[] = {0, 6, 31}; |
| for (size_t i = 0; i < arraysize(level_dbfs); i++) { |
| EXPECT_EQ(apm_->kNoError, |
| apm_->gain_control()->set_target_level_dbfs(level_dbfs[i])); |
| EXPECT_EQ(level_dbfs[i], apm_->gain_control()->target_level_dbfs()); |
| } |
| |
| // Testing invalid compression gains |
| EXPECT_EQ(apm_->kBadParameterError, |
| apm_->gain_control()->set_compression_gain_db(-1)); |
| EXPECT_EQ(apm_->kBadParameterError, |
| apm_->gain_control()->set_compression_gain_db(100)); |
| |
| // Testing valid compression gains |
| EXPECT_EQ(apm_->kNoError, |
| apm_->gain_control()->set_compression_gain_db( |
| apm_->gain_control()->compression_gain_db())); |
| |
| int gain_db[] = {0, 10, 90}; |
| for (size_t i = 0; i < arraysize(gain_db); i++) { |
| EXPECT_EQ(apm_->kNoError, |
| apm_->gain_control()->set_compression_gain_db(gain_db[i])); |
| EXPECT_EQ(gain_db[i], apm_->gain_control()->compression_gain_db()); |
| } |
| |
| // Testing limiter off/on |
| EXPECT_EQ(apm_->kNoError, apm_->gain_control()->enable_limiter(false)); |
| EXPECT_FALSE(apm_->gain_control()->is_limiter_enabled()); |
| EXPECT_EQ(apm_->kNoError, apm_->gain_control()->enable_limiter(true)); |
| EXPECT_TRUE(apm_->gain_control()->is_limiter_enabled()); |
| |
| // Testing invalid level limits |
| EXPECT_EQ(apm_->kBadParameterError, |
| apm_->gain_control()->set_analog_level_limits(-1, 512)); |
| EXPECT_EQ(apm_->kBadParameterError, |
| apm_->gain_control()->set_analog_level_limits(100000, 512)); |
| EXPECT_EQ(apm_->kBadParameterError, |
| apm_->gain_control()->set_analog_level_limits(512, -1)); |
| EXPECT_EQ(apm_->kBadParameterError, |
| apm_->gain_control()->set_analog_level_limits(512, 100000)); |
| EXPECT_EQ(apm_->kBadParameterError, |
| apm_->gain_control()->set_analog_level_limits(512, 255)); |
| |
| // Testing valid level limits |
| EXPECT_EQ(apm_->kNoError, |
| apm_->gain_control()->set_analog_level_limits( |
| apm_->gain_control()->analog_level_minimum(), |
| apm_->gain_control()->analog_level_maximum())); |
| |
| int min_level[] = {0, 255, 1024}; |
| for (size_t i = 0; i < arraysize(min_level); i++) { |
| EXPECT_EQ(apm_->kNoError, |
| apm_->gain_control()->set_analog_level_limits(min_level[i], 1024)); |
| EXPECT_EQ(min_level[i], apm_->gain_control()->analog_level_minimum()); |
| } |
| |
| int max_level[] = {0, 1024, 65535}; |
| for (size_t i = 0; i < arraysize(min_level); i++) { |
| EXPECT_EQ(apm_->kNoError, |
| apm_->gain_control()->set_analog_level_limits(0, max_level[i])); |
| EXPECT_EQ(max_level[i], apm_->gain_control()->analog_level_maximum()); |
| } |
| |
| // TODO(ajm): stream_is_saturated() and stream_analog_level() |
| |
| // Turn AGC off |
| EXPECT_EQ(apm_->kNoError, apm_->gain_control()->Enable(false)); |
| EXPECT_FALSE(apm_->gain_control()->is_enabled()); |
| } |
| |
| void ApmTest::RunQuantizedVolumeDoesNotGetStuckTest(int sample_rate) { |
| Init(sample_rate, sample_rate, sample_rate, 2, 2, 2, false); |
| EXPECT_EQ(apm_->kNoError, |
| apm_->gain_control()->set_mode(GainControl::kAdaptiveAnalog)); |
| EXPECT_EQ(apm_->kNoError, apm_->gain_control()->Enable(true)); |
| |
| int out_analog_level = 0; |
| for (int i = 0; i < 2000; ++i) { |
| ReadFrameWithRewind(near_file_, frame_); |
| // Ensure the audio is at a low level, so the AGC will try to increase it. |
| ScaleFrame(frame_, 0.25); |
| |
| // Always pass in the same volume. |
| EXPECT_EQ(apm_->kNoError, |
| apm_->gain_control()->set_stream_analog_level(100)); |
| EXPECT_EQ(apm_->kNoError, apm_->ProcessStream(frame_)); |
| out_analog_level = apm_->gain_control()->stream_analog_level(); |
| } |
| |
| // Ensure the AGC is still able to reach the maximum. |
| EXPECT_EQ(255, out_analog_level); |
| } |
| |
| // Verifies that despite volume slider quantization, the AGC can continue to |
| // increase its volume. |
| TEST_F(ApmTest, QuantizedVolumeDoesNotGetStuck) { |
| for (size_t i = 0; i < arraysize(kSampleRates); ++i) { |
| RunQuantizedVolumeDoesNotGetStuckTest(kSampleRates[i]); |
| } |
| } |
| |
| void ApmTest::RunManualVolumeChangeIsPossibleTest(int sample_rate) { |
| Init(sample_rate, sample_rate, sample_rate, 2, 2, 2, false); |
| EXPECT_EQ(apm_->kNoError, |
| apm_->gain_control()->set_mode(GainControl::kAdaptiveAnalog)); |
| EXPECT_EQ(apm_->kNoError, apm_->gain_control()->Enable(true)); |
| |
| int out_analog_level = 100; |
| for (int i = 0; i < 1000; ++i) { |
| ReadFrameWithRewind(near_file_, frame_); |
| // Ensure the audio is at a low level, so the AGC will try to increase it. |
| ScaleFrame(frame_, 0.25); |
| |
| EXPECT_EQ(apm_->kNoError, |
| apm_->gain_control()->set_stream_analog_level(out_analog_level)); |
| EXPECT_EQ(apm_->kNoError, apm_->ProcessStream(frame_)); |
| out_analog_level = apm_->gain_control()->stream_analog_level(); |
| } |
| |
| // Ensure the volume was raised. |
| EXPECT_GT(out_analog_level, 100); |
| int highest_level_reached = out_analog_level; |
| // Simulate a user manual volume change. |
| out_analog_level = 100; |
| |
| for (int i = 0; i < 300; ++i) { |
| ReadFrameWithRewind(near_file_, frame_); |
| ScaleFrame(frame_, 0.25); |
| |
| EXPECT_EQ(apm_->kNoError, |
| apm_->gain_control()->set_stream_analog_level(out_analog_level)); |
| EXPECT_EQ(apm_->kNoError, apm_->ProcessStream(frame_)); |
| out_analog_level = apm_->gain_control()->stream_analog_level(); |
| // Check that AGC respected the manually adjusted volume. |
| EXPECT_LT(out_analog_level, highest_level_reached); |
| } |
| // Check that the volume was still raised. |
| EXPECT_GT(out_analog_level, 100); |
| } |
| |
| TEST_F(ApmTest, ManualVolumeChangeIsPossible) { |
| for (size_t i = 0; i < arraysize(kSampleRates); ++i) { |
| RunManualVolumeChangeIsPossibleTest(kSampleRates[i]); |
| } |
| } |
| |
| TEST_F(ApmTest, NoiseSuppression) { |
| // Test valid suppression levels. |
| NoiseSuppression::Level level[] = { |
| NoiseSuppression::kLow, |
| NoiseSuppression::kModerate, |
| NoiseSuppression::kHigh, |
| NoiseSuppression::kVeryHigh |
| }; |
| for (size_t i = 0; i < arraysize(level); i++) { |
| EXPECT_EQ(apm_->kNoError, |
| apm_->noise_suppression()->set_level(level[i])); |
| EXPECT_EQ(level[i], apm_->noise_suppression()->level()); |
| } |
| |
| // Turn NS on/off |
| EXPECT_EQ(apm_->kNoError, apm_->noise_suppression()->Enable(true)); |
| EXPECT_TRUE(apm_->noise_suppression()->is_enabled()); |
| EXPECT_EQ(apm_->kNoError, apm_->noise_suppression()->Enable(false)); |
| EXPECT_FALSE(apm_->noise_suppression()->is_enabled()); |
| } |
| |
| TEST_F(ApmTest, HighPassFilter) { |
| // Turn HP filter on/off |
| AudioProcessing::Config apm_config; |
| apm_config.high_pass_filter.enabled = true; |
| apm_->ApplyConfig(apm_config); |
| apm_config.high_pass_filter.enabled = false; |
| apm_->ApplyConfig(apm_config); |
| } |
| |
| TEST_F(ApmTest, LevelEstimator) { |
| // Turn level estimator on/off |
| EXPECT_EQ(apm_->kNoError, apm_->level_estimator()->Enable(false)); |
| EXPECT_FALSE(apm_->level_estimator()->is_enabled()); |
| |
| EXPECT_EQ(apm_->kNotEnabledError, apm_->level_estimator()->RMS()); |
| |
| EXPECT_EQ(apm_->kNoError, apm_->level_estimator()->Enable(true)); |
| EXPECT_TRUE(apm_->level_estimator()->is_enabled()); |
| |
| // Run this test in wideband; in super-wb, the splitting filter distorts the |
| // audio enough to cause deviation from the expectation for small values. |
| frame_->samples_per_channel_ = 160; |
| frame_->num_channels_ = 2; |
| frame_->sample_rate_hz_ = 16000; |
| |
| // Min value if no frames have been processed. |
| EXPECT_EQ(127, apm_->level_estimator()->RMS()); |
| |
| // Min value on zero frames. |
| SetFrameTo(frame_, 0); |
| EXPECT_EQ(apm_->kNoError, apm_->ProcessStream(frame_)); |
| EXPECT_EQ(apm_->kNoError, apm_->ProcessStream(frame_)); |
| EXPECT_EQ(127, apm_->level_estimator()->RMS()); |
| |
| // Try a few RMS values. |
| // (These also test that the value resets after retrieving it.) |
| SetFrameTo(frame_, 32767); |
| EXPECT_EQ(apm_->kNoError, apm_->ProcessStream(frame_)); |
| EXPECT_EQ(apm_->kNoError, apm_->ProcessStream(frame_)); |
| EXPECT_EQ(0, apm_->level_estimator()->RMS()); |
| |
| SetFrameTo(frame_, 30000); |
| EXPECT_EQ(apm_->kNoError, apm_->ProcessStream(frame_)); |
| EXPECT_EQ(apm_->kNoError, apm_->ProcessStream(frame_)); |
| EXPECT_EQ(1, apm_->level_estimator()->RMS()); |
| |
| SetFrameTo(frame_, 10000); |
| EXPECT_EQ(apm_->kNoError, apm_->ProcessStream(frame_)); |
| EXPECT_EQ(apm_->kNoError, apm_->ProcessStream(frame_)); |
| EXPECT_EQ(10, apm_->level_estimator()->RMS()); |
| |
| SetFrameTo(frame_, 10); |
| EXPECT_EQ(apm_->kNoError, apm_->ProcessStream(frame_)); |
| EXPECT_EQ(apm_->kNoError, apm_->ProcessStream(frame_)); |
| EXPECT_EQ(70, apm_->level_estimator()->RMS()); |
| |
| // Verify reset after enable/disable. |
| SetFrameTo(frame_, 32767); |
| EXPECT_EQ(apm_->kNoError, apm_->ProcessStream(frame_)); |
| EXPECT_EQ(apm_->kNoError, apm_->level_estimator()->Enable(false)); |
| EXPECT_EQ(apm_->kNoError, apm_->level_estimator()->Enable(true)); |
| SetFrameTo(frame_, 1); |
| EXPECT_EQ(apm_->kNoError, apm_->ProcessStream(frame_)); |
| EXPECT_EQ(90, apm_->level_estimator()->RMS()); |
| |
| // Verify reset after initialize. |
| SetFrameTo(frame_, 32767); |
| EXPECT_EQ(apm_->kNoError, apm_->ProcessStream(frame_)); |
| EXPECT_EQ(apm_->kNoError, apm_->Initialize()); |
| SetFrameTo(frame_, 1); |
| EXPECT_EQ(apm_->kNoError, apm_->ProcessStream(frame_)); |
| EXPECT_EQ(90, apm_->level_estimator()->RMS()); |
| } |
| |
| TEST_F(ApmTest, VoiceDetection) { |
| // Test external VAD |
| EXPECT_EQ(apm_->kNoError, |
| apm_->voice_detection()->set_stream_has_voice(true)); |
| EXPECT_TRUE(apm_->voice_detection()->stream_has_voice()); |
| EXPECT_EQ(apm_->kNoError, |
| apm_->voice_detection()->set_stream_has_voice(false)); |
| EXPECT_FALSE(apm_->voice_detection()->stream_has_voice()); |
| |
| // Test valid likelihoods |
| VoiceDetection::Likelihood likelihood[] = { |
| VoiceDetection::kVeryLowLikelihood, |
| VoiceDetection::kLowLikelihood, |
| VoiceDetection::kModerateLikelihood, |
| VoiceDetection::kHighLikelihood |
| }; |
| for (size_t i = 0; i < arraysize(likelihood); i++) { |
| EXPECT_EQ(apm_->kNoError, |
| apm_->voice_detection()->set_likelihood(likelihood[i])); |
| EXPECT_EQ(likelihood[i], apm_->voice_detection()->likelihood()); |
| } |
| |
| /* TODO(bjornv): Enable once VAD supports other frame lengths than 10 ms |
| // Test invalid frame sizes |
| EXPECT_EQ(apm_->kBadParameterError, |
| apm_->voice_detection()->set_frame_size_ms(12)); |
| |
| // Test valid frame sizes |
| for (int i = 10; i <= 30; i += 10) { |
| EXPECT_EQ(apm_->kNoError, |
| apm_->voice_detection()->set_frame_size_ms(i)); |
| EXPECT_EQ(i, apm_->voice_detection()->frame_size_ms()); |
| } |
| */ |
| |
| // Turn VAD on/off |
| EXPECT_EQ(apm_->kNoError, apm_->voice_detection()->Enable(true)); |
| EXPECT_TRUE(apm_->voice_detection()->is_enabled()); |
| EXPECT_EQ(apm_->kNoError, apm_->voice_detection()->Enable(false)); |
| EXPECT_FALSE(apm_->voice_detection()->is_enabled()); |
| |
| // Test that AudioFrame activity is maintained when VAD is disabled. |
| EXPECT_EQ(apm_->kNoError, apm_->voice_detection()->Enable(false)); |
| AudioFrame::VADActivity activity[] = { |
| AudioFrame::kVadActive, |
| AudioFrame::kVadPassive, |
| AudioFrame::kVadUnknown |
| }; |
| for (size_t i = 0; i < arraysize(activity); i++) { |
| frame_->vad_activity_ = activity[i]; |
| EXPECT_EQ(apm_->kNoError, apm_->ProcessStream(frame_)); |
| EXPECT_EQ(activity[i], frame_->vad_activity_); |
| } |
| |
| // Test that AudioFrame activity is set when VAD is enabled. |
| EXPECT_EQ(apm_->kNoError, apm_->voice_detection()->Enable(true)); |
| frame_->vad_activity_ = AudioFrame::kVadUnknown; |
| EXPECT_EQ(apm_->kNoError, apm_->ProcessStream(frame_)); |
| EXPECT_NE(AudioFrame::kVadUnknown, frame_->vad_activity_); |
| |
| // TODO(bjornv): Add tests for streamed voice; stream_has_voice() |
| } |
| |
| TEST_F(ApmTest, AllProcessingDisabledByDefault) { |
| AudioProcessing::Config config = apm_->GetConfig(); |
| EXPECT_FALSE(config.echo_canceller.enabled); |
| EXPECT_FALSE(config.high_pass_filter.enabled); |
| EXPECT_FALSE(config.level_estimation.enabled); |
| EXPECT_FALSE(config.voice_detection.enabled); |
| EXPECT_FALSE(apm_->gain_control()->is_enabled()); |
| EXPECT_FALSE(apm_->level_estimator()->is_enabled()); |
| EXPECT_FALSE(apm_->noise_suppression()->is_enabled()); |
| EXPECT_FALSE(apm_->voice_detection()->is_enabled()); |
| } |
| |
| TEST_F(ApmTest, NoProcessingWhenAllComponentsDisabled) { |
| for (size_t i = 0; i < arraysize(kSampleRates); i++) { |
| Init(kSampleRates[i], kSampleRates[i], kSampleRates[i], 2, 2, 2, false); |
| SetFrameTo(frame_, 1000, 2000); |
| AudioFrame frame_copy; |
| frame_copy.CopyFrom(*frame_); |
| for (int j = 0; j < 1000; j++) { |
| EXPECT_EQ(apm_->kNoError, apm_->ProcessStream(frame_)); |
| EXPECT_TRUE(FrameDataAreEqual(*frame_, frame_copy)); |
| EXPECT_EQ(apm_->kNoError, apm_->ProcessReverseStream(frame_)); |
| EXPECT_TRUE(FrameDataAreEqual(*frame_, frame_copy)); |
| } |
| } |
| } |
| |
| TEST_F(ApmTest, NoProcessingWhenAllComponentsDisabledFloat) { |
| // Test that ProcessStream copies input to output even with no processing. |
| const size_t kSamples = 80; |
| const int sample_rate = 8000; |
| const float src[kSamples] = { |
| -1.0f, 0.0f, 1.0f |
| }; |
| float dest[kSamples] = {}; |
| |
| auto src_channels = &src[0]; |
| auto dest_channels = &dest[0]; |
| |
| apm_.reset(AudioProcessingBuilder().Create()); |
| EXPECT_NOERR(apm_->ProcessStream( |
| &src_channels, kSamples, sample_rate, LayoutFromChannels(1), |
| sample_rate, LayoutFromChannels(1), &dest_channels)); |
| |
| for (size_t i = 0; i < kSamples; ++i) { |
| EXPECT_EQ(src[i], dest[i]); |
| } |
| |
| // Same for ProcessReverseStream. |
| float rev_dest[kSamples] = {}; |
| auto rev_dest_channels = &rev_dest[0]; |
| |
| StreamConfig input_stream = {sample_rate, 1}; |
| StreamConfig output_stream = {sample_rate, 1}; |
| EXPECT_NOERR(apm_->ProcessReverseStream(&src_channels, input_stream, |
| output_stream, &rev_dest_channels)); |
| |
| for (size_t i = 0; i < kSamples; ++i) { |
| EXPECT_EQ(src[i], rev_dest[i]); |
| } |
| } |
| |
| TEST_F(ApmTest, IdenticalInputChannelsResultInIdenticalOutputChannels) { |
| EnableAllComponents(); |
| |
| for (size_t i = 0; i < arraysize(kProcessSampleRates); i++) { |
| Init(kProcessSampleRates[i], |
| kProcessSampleRates[i], |
| kProcessSampleRates[i], |
| 2, |
| 2, |
| 2, |
| false); |
| int analog_level = 127; |
| ASSERT_EQ(0, feof(far_file_)); |
| ASSERT_EQ(0, feof(near_file_)); |
| while (ReadFrame(far_file_, revframe_) && ReadFrame(near_file_, frame_)) { |
| CopyLeftToRightChannel(revframe_->mutable_data(), |
| revframe_->samples_per_channel_); |
| |
| ASSERT_EQ(kNoErr, apm_->ProcessReverseStream(revframe_)); |
| |
| CopyLeftToRightChannel(frame_->mutable_data(), |
| frame_->samples_per_channel_); |
| frame_->vad_activity_ = AudioFrame::kVadUnknown; |
| |
| ASSERT_EQ(kNoErr, apm_->set_stream_delay_ms(0)); |
| ASSERT_EQ(kNoErr, |
| apm_->gain_control()->set_stream_analog_level(analog_level)); |
| ASSERT_EQ(kNoErr, apm_->ProcessStream(frame_)); |
| analog_level = apm_->gain_control()->stream_analog_level(); |
| |
| VerifyChannelsAreEqual(frame_->data(), frame_->samples_per_channel_); |
| } |
| rewind(far_file_); |
| rewind(near_file_); |
| } |
| } |
| |
| TEST_F(ApmTest, SplittingFilter) { |
| // Verify the filter is not active through undistorted audio when: |
| // 1. No components are enabled... |
| SetFrameTo(frame_, 1000); |
| AudioFrame frame_copy; |
| frame_copy.CopyFrom(*frame_); |
| EXPECT_EQ(apm_->kNoError, apm_->ProcessStream(frame_)); |
| EXPECT_EQ(apm_->kNoError, apm_->ProcessStream(frame_)); |
| EXPECT_TRUE(FrameDataAreEqual(*frame_, frame_copy)); |
| |
| // 2. Only the level estimator is enabled... |
| SetFrameTo(frame_, 1000); |
| frame_copy.CopyFrom(*frame_); |
| EXPECT_EQ(apm_->kNoError, apm_->level_estimator()->Enable(true)); |
| EXPECT_EQ(apm_->kNoError, apm_->ProcessStream(frame_)); |
| EXPECT_EQ(apm_->kNoError, apm_->ProcessStream(frame_)); |
| EXPECT_TRUE(FrameDataAreEqual(*frame_, frame_copy)); |
| EXPECT_EQ(apm_->kNoError, apm_->level_estimator()->Enable(false)); |
| |
| // 3. Only VAD is enabled... |
| SetFrameTo(frame_, 1000); |
| frame_copy.CopyFrom(*frame_); |
| EXPECT_EQ(apm_->kNoError, apm_->voice_detection()->Enable(true)); |
| EXPECT_EQ(apm_->kNoError, apm_->ProcessStream(frame_)); |
| EXPECT_EQ(apm_->kNoError, apm_->ProcessStream(frame_)); |
| EXPECT_TRUE(FrameDataAreEqual(*frame_, frame_copy)); |
| EXPECT_EQ(apm_->kNoError, apm_->voice_detection()->Enable(false)); |
| |
| // 4. Only GetStatistics-reporting VAD is enabled... |
| SetFrameTo(frame_, 1000); |
| frame_copy.CopyFrom(*frame_); |
| auto apm_config = apm_->GetConfig(); |
| apm_config.voice_detection.enabled = true; |
| apm_->ApplyConfig(apm_config); |
| EXPECT_EQ(apm_->kNoError, apm_->ProcessStream(frame_)); |
| EXPECT_EQ(apm_->kNoError, apm_->ProcessStream(frame_)); |
| EXPECT_TRUE(FrameDataAreEqual(*frame_, frame_copy)); |
| apm_config.voice_detection.enabled = false; |
| apm_->ApplyConfig(apm_config); |
| |
| // 5. Both VADs and the level estimator are enabled... |
| SetFrameTo(frame_, 1000); |
| frame_copy.CopyFrom(*frame_); |
| EXPECT_EQ(apm_->kNoError, apm_->level_estimator()->Enable(true)); |
| EXPECT_EQ(apm_->kNoError, apm_->voice_detection()->Enable(true)); |
| apm_config.voice_detection.enabled = true; |
| apm_->ApplyConfig(apm_config); |
| EXPECT_EQ(apm_->kNoError, apm_->ProcessStream(frame_)); |
| EXPECT_EQ(apm_->kNoError, apm_->ProcessStream(frame_)); |
| EXPECT_TRUE(FrameDataAreEqual(*frame_, frame_copy)); |
| EXPECT_EQ(apm_->kNoError, apm_->level_estimator()->Enable(false)); |
| EXPECT_EQ(apm_->kNoError, apm_->voice_detection()->Enable(false)); |
| apm_config.voice_detection.enabled = false; |
| apm_->ApplyConfig(apm_config); |
| |
| // Check the test is valid. We should have distortion from the filter |
| // when AEC is enabled (which won't affect the audio). |
| apm_config.echo_canceller.enabled = true; |
| apm_config.echo_canceller.mobile_mode = false; |
| apm_->ApplyConfig(apm_config); |
| frame_->samples_per_channel_ = 320; |
| frame_->num_channels_ = 2; |
| frame_->sample_rate_hz_ = 32000; |
| SetFrameTo(frame_, 1000); |
| frame_copy.CopyFrom(*frame_); |
| EXPECT_EQ(apm_->kNoError, apm_->set_stream_delay_ms(0)); |
| EXPECT_EQ(apm_->kNoError, apm_->ProcessStream(frame_)); |
| EXPECT_FALSE(FrameDataAreEqual(*frame_, frame_copy)); |
| } |
| |
| #ifdef WEBRTC_AUDIOPROC_DEBUG_DUMP |
| void ApmTest::ProcessDebugDump(const std::string& in_filename, |
| const std::string& out_filename, |
| Format format, |
| int max_size_bytes) { |
| rtc::TaskQueue worker_queue("ApmTest_worker_queue"); |
| FILE* in_file = fopen(in_filename.c_str(), "rb"); |
| ASSERT_TRUE(in_file != NULL); |
| audioproc::Event event_msg; |
| bool first_init = true; |
| |
| while (ReadMessageFromFile(in_file, &event_msg)) { |
| if (event_msg.type() == audioproc::Event::INIT) { |
| const audioproc::Init msg = event_msg.init(); |
| int reverse_sample_rate = msg.sample_rate(); |
| if (msg.has_reverse_sample_rate()) { |
| reverse_sample_rate = msg.reverse_sample_rate(); |
| } |
| int output_sample_rate = msg.sample_rate(); |
| if (msg.has_output_sample_rate()) { |
| output_sample_rate = msg.output_sample_rate(); |
| } |
| |
| Init(msg.sample_rate(), |
| output_sample_rate, |
| reverse_sample_rate, |
| msg.num_input_channels(), |
| msg.num_output_channels(), |
| msg.num_reverse_channels(), |
| false); |
| if (first_init) { |
| // AttachAecDump() writes an additional init message. Don't start |
| // recording until after the first init to avoid the extra message. |
| auto aec_dump = |
| AecDumpFactory::Create(out_filename, max_size_bytes, &worker_queue); |
| EXPECT_TRUE(aec_dump); |
| apm_->AttachAecDump(std::move(aec_dump)); |
| first_init = false; |
| } |
| |
| } else if (event_msg.type() == audioproc::Event::REVERSE_STREAM) { |
| const audioproc::ReverseStream msg = event_msg.reverse_stream(); |
| |
| if (msg.channel_size() > 0) { |
| ASSERT_EQ(revframe_->num_channels_, |
| static_cast<size_t>(msg.channel_size())); |
| for (int i = 0; i < msg.channel_size(); ++i) { |
| memcpy(revfloat_cb_->channels()[i], |
| msg.channel(i).data(), |
| msg.channel(i).size()); |
| } |
| } else { |
| memcpy(revframe_->mutable_data(), msg.data().data(), msg.data().size()); |
| if (format == kFloatFormat) { |
| // We're using an int16 input file; convert to float. |
| ConvertToFloat(*revframe_, revfloat_cb_.get()); |
| } |
| } |
| AnalyzeReverseStreamChooser(format); |
| |
| } else if (event_msg.type() == audioproc::Event::STREAM) { |
| const audioproc::Stream msg = event_msg.stream(); |
| // ProcessStream could have changed this for the output frame. |
| frame_->num_channels_ = apm_->num_input_channels(); |
| |
| EXPECT_NOERR(apm_->gain_control()->set_stream_analog_level(msg.level())); |
| EXPECT_NOERR(apm_->set_stream_delay_ms(msg.delay())); |
| if (msg.has_keypress()) { |
| apm_->set_stream_key_pressed(msg.keypress()); |
| } else { |
| apm_->set_stream_key_pressed(true); |
| } |
| |
| if (msg.input_channel_size() > 0) { |
| ASSERT_EQ(frame_->num_channels_, |
| static_cast<size_t>(msg.input_channel_size())); |
| for (int i = 0; i < msg.input_channel_size(); ++i) { |
| memcpy(float_cb_->channels()[i], |
| msg.input_channel(i).data(), |
| msg.input_channel(i).size()); |
| } |
| } else { |
| memcpy(frame_->mutable_data(), msg.input_data().data(), |
| msg.input_data().size()); |
| if (format == kFloatFormat) { |
| // We're using an int16 input file; convert to float. |
| ConvertToFloat(*frame_, float_cb_.get()); |
| } |
| } |
| ProcessStreamChooser(format); |
| } |
| } |
| apm_->DetachAecDump(); |
| fclose(in_file); |
| } |
| |
| void ApmTest::VerifyDebugDumpTest(Format format) { |
| rtc::ScopedFakeClock fake_clock; |
| const std::string in_filename = test::ResourcePath("ref03", "aecdump"); |
| std::string format_string; |
| switch (format) { |
| case kIntFormat: |
| format_string = "_int"; |
| break; |
| case kFloatFormat: |
| format_string = "_float"; |
| break; |
| } |
| const std::string ref_filename = test::TempFilename( |
| test::OutputPath(), std::string("ref") + format_string + "_aecdump"); |
| const std::string out_filename = test::TempFilename( |
| test::OutputPath(), std::string("out") + format_string + "_aecdump"); |
| const std::string limited_filename = test::TempFilename( |
| test::OutputPath(), std::string("limited") + format_string + "_aecdump"); |
| const size_t logging_limit_bytes = 100000; |
| // We expect at least this many bytes in the created logfile. |
| const size_t logging_expected_bytes = 95000; |
| EnableAllComponents(); |
| ProcessDebugDump(in_filename, ref_filename, format, -1); |
| ProcessDebugDump(ref_filename, out_filename, format, -1); |
| ProcessDebugDump(ref_filename, limited_filename, format, logging_limit_bytes); |
| |
| FILE* ref_file = fopen(ref_filename.c_str(), "rb"); |
| FILE* out_file = fopen(out_filename.c_str(), "rb"); |
| FILE* limited_file = fopen(limited_filename.c_str(), "rb"); |
| ASSERT_TRUE(ref_file != NULL); |
| ASSERT_TRUE(out_file != NULL); |
| ASSERT_TRUE(limited_file != NULL); |
| std::unique_ptr<uint8_t[]> ref_bytes; |
| std::unique_ptr<uint8_t[]> out_bytes; |
| std::unique_ptr<uint8_t[]> limited_bytes; |
| |
| size_t ref_size = ReadMessageBytesFromFile(ref_file, &ref_bytes); |
| size_t out_size = ReadMessageBytesFromFile(out_file, &out_bytes); |
| size_t limited_size = ReadMessageBytesFromFile(limited_file, &limited_bytes); |
| size_t bytes_read = 0; |
| size_t bytes_read_limited = 0; |
| while (ref_size > 0 && out_size > 0) { |
| bytes_read += ref_size; |
| bytes_read_limited += limited_size; |
| EXPECT_EQ(ref_size, out_size); |
| EXPECT_GE(ref_size, limited_size); |
| EXPECT_EQ(0, memcmp(ref_bytes.get(), out_bytes.get(), ref_size)); |
| EXPECT_EQ(0, memcmp(ref_bytes.get(), limited_bytes.get(), limited_size)); |
| ref_size = ReadMessageBytesFromFile(ref_file, &ref_bytes); |
| out_size = ReadMessageBytesFromFile(out_file, &out_bytes); |
| limited_size = ReadMessageBytesFromFile(limited_file, &limited_bytes); |
| } |
| EXPECT_GT(bytes_read, 0u); |
| EXPECT_GT(bytes_read_limited, logging_expected_bytes); |
| EXPECT_LE(bytes_read_limited, logging_limit_bytes); |
| EXPECT_NE(0, feof(ref_file)); |
| EXPECT_NE(0, feof(out_file)); |
| EXPECT_NE(0, feof(limited_file)); |
| ASSERT_EQ(0, fclose(ref_file)); |
| ASSERT_EQ(0, fclose(out_file)); |
| ASSERT_EQ(0, fclose(limited_file)); |
| remove(ref_filename.c_str()); |
| remove(out_filename.c_str()); |
| remove(limited_filename.c_str()); |
| } |
| |
| TEST_F(ApmTest, VerifyDebugDumpInt) { |
| VerifyDebugDumpTest(kIntFormat); |
| } |
| |
| TEST_F(ApmTest, VerifyDebugDumpFloat) { |
| VerifyDebugDumpTest(kFloatFormat); |
| } |
| #endif |
| |
| // TODO(andrew): expand test to verify output. |
| TEST_F(ApmTest, DebugDump) { |
| rtc::TaskQueue worker_queue("ApmTest_worker_queue"); |
| const std::string filename = |
| test::TempFilename(test::OutputPath(), "debug_aec"); |
| { |
| auto aec_dump = AecDumpFactory::Create("", -1, &worker_queue); |
| EXPECT_FALSE(aec_dump); |
| } |
| |
| #ifdef WEBRTC_AUDIOPROC_DEBUG_DUMP |
| // Stopping without having started should be OK. |
| apm_->DetachAecDump(); |
| |
| auto aec_dump = AecDumpFactory::Create(filename, -1, &worker_queue); |
| EXPECT_TRUE(aec_dump); |
| apm_->AttachAecDump(std::move(aec_dump)); |
| EXPECT_EQ(apm_->kNoError, apm_->ProcessStream(frame_)); |
| EXPECT_EQ(apm_->kNoError, apm_->ProcessReverseStream(revframe_)); |
| apm_->DetachAecDump(); |
| |
| // Verify the file has been written. |
| FILE* fid = fopen(filename.c_str(), "r"); |
| ASSERT_TRUE(fid != NULL); |
| |
| // Clean it up. |
| ASSERT_EQ(0, fclose(fid)); |
| ASSERT_EQ(0, remove(filename.c_str())); |
| #else |
| // Verify the file has NOT been written. |
| ASSERT_TRUE(fopen(filename.c_str(), "r") == NULL); |
| #endif // WEBRTC_AUDIOPROC_DEBUG_DUMP |
| } |
| |
| // TODO(andrew): expand test to verify output. |
| TEST_F(ApmTest, DebugDumpFromFileHandle) { |
| rtc::TaskQueue worker_queue("ApmTest_worker_queue"); |
| |
| const std::string filename = |
| test::TempFilename(test::OutputPath(), "debug_aec"); |
| FILE* fid = fopen(filename.c_str(), "w"); |
| ASSERT_TRUE(fid); |
| |
| #ifdef WEBRTC_AUDIOPROC_DEBUG_DUMP |
| // Stopping without having started should be OK. |
| apm_->DetachAecDump(); |
| |
| auto aec_dump = AecDumpFactory::Create(fid, -1, &worker_queue); |
| EXPECT_TRUE(aec_dump); |
| apm_->AttachAecDump(std::move(aec_dump)); |
| EXPECT_EQ(apm_->kNoError, apm_->ProcessReverseStream(revframe_)); |
| EXPECT_EQ(apm_->kNoError, apm_->ProcessStream(frame_)); |
| apm_->DetachAecDump(); |
| |
| // Verify the file has been written. |
| fid = fopen(filename.c_str(), "r"); |
| ASSERT_TRUE(fid != NULL); |
| |
| // Clean it up. |
| ASSERT_EQ(0, fclose(fid)); |
| ASSERT_EQ(0, remove(filename.c_str())); |
| #else |
| ASSERT_EQ(0, fclose(fid)); |
| #endif // WEBRTC_AUDIOPROC_DEBUG_DUMP |
| } |
| |
| TEST_F(ApmTest, FloatAndIntInterfacesGiveSimilarResults) { |
| audioproc::OutputData ref_data; |
| OpenFileAndReadMessage(ref_filename_, &ref_data); |
| |
| Config config; |
| config.Set<ExperimentalAgc>(new ExperimentalAgc(false)); |
| std::unique_ptr<AudioProcessing> fapm( |
| AudioProcessingBuilder().Create(config)); |
| EnableAllComponents(); |
| EnableAllAPComponents(fapm.get()); |
| for (int i = 0; i < ref_data.test_size(); i++) { |
| printf("Running test %d of %d...\n", i + 1, ref_data.test_size()); |
| |
| audioproc::Test* test = ref_data.mutable_test(i); |
| // TODO(ajm): Restore downmixing test cases. |
| if (test->num_input_channels() != test->num_output_channels()) |
| continue; |
| |
| const size_t num_render_channels = |
| static_cast<size_t>(test->num_reverse_channels()); |
| const size_t num_input_channels = |
| static_cast<size_t>(test->num_input_channels()); |
| const size_t num_output_channels = |
| static_cast<size_t>(test->num_output_channels()); |
| const size_t samples_per_channel = static_cast<size_t>( |
| test->sample_rate() * AudioProcessing::kChunkSizeMs / 1000); |
| |
| Init(test->sample_rate(), test->sample_rate(), test->sample_rate(), |
| num_input_channels, num_output_channels, num_render_channels, true); |
| Init(fapm.get()); |
| |
| ChannelBuffer<int16_t> output_cb(samples_per_channel, num_input_channels); |
| ChannelBuffer<int16_t> output_int16(samples_per_channel, |
| num_input_channels); |
| |
| int analog_level = 127; |
| size_t num_bad_chunks = 0; |
| while (ReadFrame(far_file_, revframe_, revfloat_cb_.get()) && |
| ReadFrame(near_file_, frame_, float_cb_.get())) { |
| frame_->vad_activity_ = AudioFrame::kVadUnknown; |
| |
| EXPECT_NOERR(apm_->ProcessReverseStream(revframe_)); |
| EXPECT_NOERR(fapm->AnalyzeReverseStream( |
| revfloat_cb_->channels(), |
| samples_per_channel, |
| test->sample_rate(), |
| LayoutFromChannels(num_render_channels))); |
| |
| EXPECT_NOERR(apm_->set_stream_delay_ms(0)); |
| EXPECT_NOERR(fapm->set_stream_delay_ms(0)); |
| EXPECT_NOERR(apm_->gain_control()->set_stream_analog_level(analog_level)); |
| EXPECT_NOERR(fapm->gain_control()->set_stream_analog_level(analog_level)); |
| |
| EXPECT_NOERR(apm_->ProcessStream(frame_)); |
| Deinterleave(frame_->data(), samples_per_channel, num_output_channels, |
| output_int16.channels()); |
| |
| EXPECT_NOERR(fapm->ProcessStream( |
| float_cb_->channels(), |
| samples_per_channel, |
| test->sample_rate(), |
| LayoutFromChannels(num_input_channels), |
| test->sample_rate(), |
| LayoutFromChannels(num_output_channels), |
| float_cb_->channels())); |
| for (size_t j = 0; j < num_output_channels; ++j) { |
| FloatToS16(float_cb_->channels()[j], |
| samples_per_channel, |
| output_cb.channels()[j]); |
| float variance = 0; |
| float snr = ComputeSNR(output_int16.channels()[j], |
| output_cb.channels()[j], |
| samples_per_channel, &variance); |
| |
| const float kVarianceThreshold = 20; |
| const float kSNRThreshold = 20; |
| |
| // Skip frames with low energy. |
| if (sqrt(variance) > kVarianceThreshold && snr < kSNRThreshold) { |
| ++num_bad_chunks; |
| } |
| } |
| |
| analog_level = fapm->gain_control()->stream_analog_level(); |
| EXPECT_EQ(apm_->gain_control()->stream_analog_level(), |
| fapm->gain_control()->stream_analog_level()); |
| EXPECT_NEAR(apm_->noise_suppression()->speech_probability(), |
| fapm->noise_suppression()->speech_probability(), |
| 0.01); |
| |
| // Reset in case of downmixing. |
| frame_->num_channels_ = static_cast<size_t>(test->num_input_channels()); |
| } |
| |
| #if defined(WEBRTC_AUDIOPROC_FLOAT_PROFILE) |
| const size_t kMaxNumBadChunks = 0; |
| #elif defined(WEBRTC_AUDIOPROC_FIXED_PROFILE) |
| // There are a few chunks in the fixed-point profile that give low SNR. |
| // Listening confirmed the difference is acceptable. |
| const size_t kMaxNumBadChunks = 60; |
| #endif |
| EXPECT_LE(num_bad_chunks, kMaxNumBadChunks); |
| |
| rewind(far_file_); |
| rewind(near_file_); |
| } |
| } |
| |
| // TODO(andrew): Add a test to process a few frames with different combinations |
| // of enabled components. |
| |
| TEST_F(ApmTest, Process) { |
| GOOGLE_PROTOBUF_VERIFY_VERSION; |
| audioproc::OutputData ref_data; |
| |
| if (!write_ref_data) { |
| OpenFileAndReadMessage(ref_filename_, &ref_data); |
| } else { |
| // Write the desired tests to the protobuf reference file. |
| for (size_t i = 0; i < arraysize(kChannels); i++) { |
| for (size_t j = 0; j < arraysize(kChannels); j++) { |
| for (size_t l = 0; l < arraysize(kProcessSampleRates); l++) { |
| audioproc::Test* test = ref_data.add_test(); |
| test->set_num_reverse_channels(kChannels[i]); |
| test->set_num_input_channels(kChannels[j]); |
| test->set_num_output_channels(kChannels[j]); |
| test->set_sample_rate(kProcessSampleRates[l]); |
| test->set_use_aec_extended_filter(false); |
| } |
| } |
| } |
| #if defined(WEBRTC_AUDIOPROC_FLOAT_PROFILE) |
| // To test the extended filter mode. |
| audioproc::Test* test = ref_data.add_test(); |
| test->set_num_reverse_channels(2); |
| test->set_num_input_channels(2); |
| test->set_num_output_channels(2); |
| test->set_sample_rate(AudioProcessing::kSampleRate32kHz); |
| test->set_use_aec_extended_filter(true); |
| #endif |
| } |
| |
| for (int i = 0; i < ref_data.test_size(); i++) { |
| printf("Running test %d of %d...\n", i + 1, ref_data.test_size()); |
| |
| audioproc::Test* test = ref_data.mutable_test(i); |
| // TODO(ajm): We no longer allow different input and output channels. Skip |
| // these tests for now, but they should be removed from the set. |
| if (test->num_input_channels() != test->num_output_channels()) |
| continue; |
| |
| Config config; |
| config.Set<ExperimentalAgc>(new ExperimentalAgc(false)); |
| config.Set<ExtendedFilter>( |
| new ExtendedFilter(test->use_aec_extended_filter())); |
| apm_.reset(AudioProcessingBuilder().Create(config)); |
| |
| EnableAllComponents(); |
| |
| Init(test->sample_rate(), |
| test->sample_rate(), |
| test->sample_rate(), |
| static_cast<size_t>(test->num_input_channels()), |
| static_cast<size_t>(test->num_output_channels()), |
| static_cast<size_t>(test->num_reverse_channels()), |
| true); |
| |
| int frame_count = 0; |
| int has_voice_count = 0; |
| int is_saturated_count = 0; |
| int analog_level = 127; |
| int analog_level_average = 0; |
| int max_output_average = 0; |
| float ns_speech_prob_average = 0.0f; |
| float rms_dbfs_average = 0.0f; |
| #if defined(WEBRTC_AUDIOPROC_FLOAT_PROFILE) |
| int stats_index = 0; |
| #endif |
| |
| while (ReadFrame(far_file_, revframe_) && ReadFrame(near_file_, frame_)) { |
| EXPECT_EQ(apm_->kNoError, apm_->ProcessReverseStream(revframe_)); |
| |
| frame_->vad_activity_ = AudioFrame::kVadUnknown; |
| |
| EXPECT_EQ(apm_->kNoError, apm_->set_stream_delay_ms(0)); |
| EXPECT_EQ(apm_->kNoError, |
| apm_->gain_control()->set_stream_analog_level(analog_level)); |
| |
| EXPECT_EQ(apm_->kNoError, apm_->ProcessStream(frame_)); |
| |
| // Ensure the frame was downmixed properly. |
| EXPECT_EQ(static_cast<size_t>(test->num_output_channels()), |
| frame_->num_channels_); |
| |
| max_output_average += MaxAudioFrame(*frame_); |
| |
| analog_level = apm_->gain_control()->stream_analog_level(); |
| analog_level_average += analog_level; |
| if (apm_->gain_control()->stream_is_saturated()) { |
| is_saturated_count++; |
| } |
| if (apm_->voice_detection()->stream_has_voice()) { |
| has_voice_count++; |
| EXPECT_EQ(AudioFrame::kVadActive, frame_->vad_activity_); |
| } else { |
| EXPECT_EQ(AudioFrame::kVadPassive, frame_->vad_activity_); |
| } |
| |
| ns_speech_prob_average += apm_->noise_suppression()->speech_probability(); |
| AudioProcessingStats stats = |
| apm_->GetStatistics(/*has_remote_tracks=*/false); |
| rms_dbfs_average += *stats.output_rms_dbfs; |
| |
| size_t frame_size = frame_->samples_per_channel_ * frame_->num_channels_; |
| size_t write_count = fwrite(frame_->data(), |
| sizeof(int16_t), |
| frame_size, |
| out_file_); |
| ASSERT_EQ(frame_size, write_count); |
| |
| // Reset in case of downmixing. |
| frame_->num_channels_ = static_cast<size_t>(test->num_input_channels()); |
| frame_count++; |
| |
| #if defined(WEBRTC_AUDIOPROC_FLOAT_PROFILE) |
| const int kStatsAggregationFrameNum = 100; // 1 second. |
| if (frame_count % kStatsAggregationFrameNum == 0) { |
| // Get echo and delay metrics. |
| AudioProcessingStats stats = |
| apm_->GetStatistics(true /* has_remote_tracks */); |
| |
| // Echo metrics. |
| const float echo_return_loss = stats.echo_return_loss.value_or(-1.0f); |
| const float echo_return_loss_enhancement = |
| stats.echo_return_loss_enhancement.value_or(-1.0f); |
| const float divergent_filter_fraction = |
| stats.divergent_filter_fraction.value_or(-1.0f); |
| const float residual_echo_likelihood = |
| stats.residual_echo_likelihood.value_or(-1.0f); |
| const float residual_echo_likelihood_recent_max = |
| stats.residual_echo_likelihood_recent_max.value_or(-1.0f); |
| |
| // Delay metrics. |
| const int32_t delay_median_ms = stats.delay_median_ms.value_or(-1.0); |
| const int32_t delay_standard_deviation_ms = |
| stats.delay_standard_deviation_ms.value_or(-1.0); |
| |
| if (!write_ref_data) { |
| const audioproc::Test::EchoMetrics& reference = |
| test->echo_metrics(stats_index); |
| constexpr float kEpsilon = 0.01; |
| EXPECT_NEAR(echo_return_loss, reference.echo_return_loss(), kEpsilon); |
| EXPECT_NEAR(echo_return_loss_enhancement, |
| reference.echo_return_loss_enhancement(), kEpsilon); |
| EXPECT_NEAR(divergent_filter_fraction, |
| reference.divergent_filter_fraction(), kEpsilon); |
| EXPECT_NEAR(residual_echo_likelihood, |
| reference.residual_echo_likelihood(), kEpsilon); |
| EXPECT_NEAR(residual_echo_likelihood_recent_max, |
| reference.residual_echo_likelihood_recent_max(), |
| kEpsilon); |
| |
| const audioproc::Test::DelayMetrics& reference_delay = |
| test->delay_metrics(stats_index); |
| EXPECT_EQ(reference_delay.median(), delay_median_ms); |
| EXPECT_EQ(reference_delay.std(), delay_standard_deviation_ms); |
| |
| ++stats_index; |
| } else { |
| audioproc::Test::EchoMetrics* message_echo = test->add_echo_metrics(); |
| message_echo->set_echo_return_loss(echo_return_loss); |
| message_echo->set_echo_return_loss_enhancement( |
| echo_return_loss_enhancement); |
| message_echo->set_divergent_filter_fraction( |
| divergent_filter_fraction); |
| message_echo->set_residual_echo_likelihood(residual_echo_likelihood); |
| message_echo->set_residual_echo_likelihood_recent_max( |
| residual_echo_likelihood_recent_max); |
| audioproc::Test::DelayMetrics* message_delay = |
| test->add_delay_metrics(); |
| message_delay->set_median(delay_median_ms); |
| message_delay->set_std(delay_standard_deviation_ms); |
| } |
| } |
| #endif // defined(WEBRTC_AUDIOPROC_FLOAT_PROFILE). |
| } |
| max_output_average /= frame_count; |
| analog_level_average /= frame_count; |
| ns_speech_prob_average /= frame_count; |
| rms_dbfs_average /= frame_count; |
| |
| if (!write_ref_data) { |
| const int kIntNear = 1; |
| // When running the test on a N7 we get a {2, 6} difference of |
| // |has_voice_count| and |max_output_average| is up to 18 higher. |
| // All numbers being consistently higher on N7 compare to ref_data. |
| // TODO(bjornv): If we start getting more of these offsets on Android we |
| // should consider a different approach. Either using one slack for all, |
| // or generate a separate android reference. |
| #if defined(WEBRTC_ANDROID) || defined(WEBRTC_IOS) |
| const int kHasVoiceCountOffset = 3; |
| const int kHasVoiceCountNear = 8; |
| const int kMaxOutputAverageOffset = 9; |
| const int kMaxOutputAverageNear = 26; |
| #else |
| const int kHasVoiceCountOffset = 0; |
| const int kHasVoiceCountNear = kIntNear; |
| const int kMaxOutputAverageOffset = 0; |
| const int kMaxOutputAverageNear = kIntNear; |
| #endif |
| EXPECT_NEAR(test->has_voice_count(), |
| has_voice_count - kHasVoiceCountOffset, |
| kHasVoiceCountNear); |
| EXPECT_NEAR(test->is_saturated_count(), is_saturated_count, kIntNear); |
| |
| EXPECT_NEAR(test->analog_level_average(), analog_level_average, kIntNear); |
| EXPECT_NEAR(test->max_output_average(), |
| max_output_average - kMaxOutputAverageOffset, |
| kMaxOutputAverageNear); |
| #if defined(WEBRTC_AUDIOPROC_FLOAT_PROFILE) |
| const double kFloatNear = 0.0005; |
| EXPECT_NEAR(test->ns_speech_probability_average(), |
| ns_speech_prob_average, |
| kFloatNear); |
| EXPECT_NEAR(test->rms_dbfs_average(), rms_dbfs_average, kFloatNear); |
| #endif |
| } else { |
| test->set_has_voice_count(has_voice_count); |
| test->set_is_saturated_count(is_saturated_count); |
| |
| test->set_analog_level_average(analog_level_average); |
| test->set_max_output_average(max_output_average); |
| |
| #if defined(WEBRTC_AUDIOPROC_FLOAT_PROFILE) |
| EXPECT_LE(0.0f, ns_speech_prob_average); |
| EXPECT_GE(1.0f, ns_speech_prob_average); |
| test->set_ns_speech_probability_average(ns_speech_prob_average); |
| test->set_rms_dbfs_average(rms_dbfs_average); |
| #endif |
| } |
| |
| rewind(far_file_); |
| rewind(near_file_); |
| } |
| |
| if (write_ref_data) { |
| OpenFileAndWriteMessage(ref_filename_, ref_data); |
| } |
| } |
| |
| TEST_F(ApmTest, NoErrorsWithKeyboardChannel) { |
| struct ChannelFormat { |
| AudioProcessing::ChannelLayout in_layout; |
| AudioProcessing::ChannelLayout out_layout; |
| }; |
| ChannelFormat cf[] = { |
| {AudioProcessing::kMonoAndKeyboard, AudioProcessing::kMono}, |
| {AudioProcessing::kStereoAndKeyboard, AudioProcessing::kMono}, |
| {AudioProcessing::kStereoAndKeyboard, AudioProcessing::kStereo}, |
| }; |
| |
| std::unique_ptr<AudioProcessing> ap(AudioProcessingBuilder().Create()); |
| // Enable one component just to ensure some processing takes place. |
| ap->noise_suppression()->Enable(true); |
| for (size_t i = 0; i < arraysize(cf); ++i) { |
| const int in_rate = 44100; |
| const int out_rate = 48000; |
| ChannelBuffer<float> in_cb(SamplesFromRate(in_rate), |
| TotalChannelsFromLayout(cf[i].in_layout)); |
| ChannelBuffer<float> out_cb(SamplesFromRate(out_rate), |
| ChannelsFromLayout(cf[i].out_layout)); |
| |
| // Run over a few chunks. |
| for (int j = 0; j < 10; ++j) { |
| EXPECT_NOERR(ap->ProcessStream( |
| in_cb.channels(), |
| in_cb.num_frames(), |
| in_rate, |
| cf[i].in_layout, |
| out_rate, |
| cf[i].out_layout, |
| out_cb.channels())); |
| } |
| } |
| } |
| |
| // Compares the reference and test arrays over a region around the expected |
| // delay. Finds the highest SNR in that region and adds the variance and squared |
| // error results to the supplied accumulators. |
| void UpdateBestSNR(const float* ref, |
| const float* test, |
| size_t length, |
| int expected_delay, |
| double* variance_acc, |
| double* sq_error_acc) { |
| double best_snr = std::numeric_limits<double>::min(); |
| double best_variance = 0; |
| double best_sq_error = 0; |
| // Search over a region of eight samples around the expected delay. |
| for (int delay = std::max(expected_delay - 4, 0); delay <= expected_delay + 4; |
| ++delay) { |
| double sq_error = 0; |
| double variance = 0; |
| for (size_t i = 0; i < length - delay; ++i) { |
| double error = test[i + delay] - ref[i]; |
| sq_error += error * error; |
| variance += ref[i] * ref[i]; |
| } |
| |
| if (sq_error == 0) { |
| *variance_acc += variance; |
| return; |
| } |
| double snr = variance / sq_error; |
| if (snr > best_snr) { |
| best_snr = snr; |
| best_variance = variance; |
| best_sq_error = sq_error; |
| } |
| } |
| |
| *variance_acc += best_variance; |
| *sq_error_acc += best_sq_error; |
| } |
| |
| // Used to test a multitude of sample rate and channel combinations. It works |
| // by first producing a set of reference files (in SetUpTestCase) that are |
| // assumed to be correct, as the used parameters are verified by other tests |
| // in this collection. Primarily the reference files are all produced at |
| // "native" rates which do not involve any resampling. |
| |
| // Each test pass produces an output file with a particular format. The output |
| // is matched against the reference file closest to its internal processing |
| // format. If necessary the output is resampled back to its process format. |
| // Due to the resampling distortion, we don't expect identical results, but |
| // enforce SNR thresholds which vary depending on the format. 0 is a special |
| // case SNR which corresponds to inf, or zero error. |
| typedef std::tuple<int, int, int, int, double, double> AudioProcessingTestData; |
| class AudioProcessingTest |
| : public testing::TestWithParam<AudioProcessingTestData> { |
| public: |
| AudioProcessingTest() |
| : input_rate_(std::get<0>(GetParam())), |
| output_rate_(std::get<1>(GetParam())), |
| reverse_input_rate_(std::get<2>(GetParam())), |
| reverse_output_rate_(std::get<3>(GetParam())), |
| expected_snr_(std::get<4>(GetParam())), |
| expected_reverse_snr_(std::get<5>(GetParam())) {} |
| |
| virtual ~AudioProcessingTest() {} |
| |
| static void SetUpTestCase() { |
| // Create all needed output reference files. |
| const int kNativeRates[] = {8000, 16000, 32000, 48000}; |
| const size_t kNumChannels[] = {1, 2}; |
| for (size_t i = 0; i < arraysize(kNativeRates); ++i) { |
| for (size_t j = 0; j < arraysize(kNumChannels); ++j) { |
| for (size_t k = 0; k < arraysize(kNumChannels); ++k) { |
| // The reference files always have matching input and output channels. |
| ProcessFormat(kNativeRates[i], kNativeRates[i], kNativeRates[i], |
| kNativeRates[i], kNumChannels[j], kNumChannels[j], |
| kNumChannels[k], kNumChannels[k], "ref"); |
| } |
| } |
| } |
| } |
| |
| void TearDown() { |
| // Remove "out" files after each test. |
| ClearTempOutFiles(); |
| } |
| |
| static void TearDownTestCase() { |
| ClearTempFiles(); |
| } |
| |
| // Runs a process pass on files with the given parameters and dumps the output |
| // to a file specified with |output_file_prefix|. Both forward and reverse |
| // output streams are dumped. |
| static void ProcessFormat(int input_rate, |
| int output_rate, |
| int reverse_input_rate, |
| int reverse_output_rate, |
| size_t num_input_channels, |
| size_t num_output_channels, |
| size_t num_reverse_input_channels, |
| size_t num_reverse_output_channels, |
| const std::string& output_file_prefix) { |
| Config config; |
| config.Set<ExperimentalAgc>(new ExperimentalAgc(false)); |
| std::unique_ptr<AudioProcessing> ap( |
| AudioProcessingBuilder().Create(config)); |
| EnableAllAPComponents(ap.get()); |
| |
| ProcessingConfig processing_config = { |
| {{input_rate, num_input_channels}, |
| {output_rate, num_output_channels}, |
| {reverse_input_rate, num_reverse_input_channels}, |
| {reverse_output_rate, num_reverse_output_channels}}}; |
| ap->Initialize(processing_config); |
| |
| FILE* far_file = |
| fopen(ResourceFilePath("far", reverse_input_rate).c_str(), "rb"); |
| FILE* near_file = fopen(ResourceFilePath("near", input_rate).c_str(), "rb"); |
| FILE* out_file = |
| fopen(OutputFilePath(output_file_prefix, input_rate, output_rate, |
| reverse_input_rate, reverse_output_rate, |
| num_input_channels, num_output_channels, |
| num_reverse_input_channels, |
| num_reverse_output_channels, kForward).c_str(), |
| "wb"); |
| FILE* rev_out_file = |
| fopen(OutputFilePath(output_file_prefix, input_rate, output_rate, |
| reverse_input_rate, reverse_output_rate, |
| num_input_channels, num_output_channels, |
| num_reverse_input_channels, |
| num_reverse_output_channels, kReverse).c_str(), |
| "wb"); |
| ASSERT_TRUE(far_file != NULL); |
| ASSERT_TRUE(near_file != NULL); |
| ASSERT_TRUE(out_file != NULL); |
| ASSERT_TRUE(rev_out_file != NULL); |
| |
| ChannelBuffer<float> fwd_cb(SamplesFromRate(input_rate), |
| num_input_channels); |
| ChannelBuffer<float> rev_cb(SamplesFromRate(reverse_input_rate), |
| num_reverse_input_channels); |
| ChannelBuffer<float> out_cb(SamplesFromRate(output_rate), |
| num_output_channels); |
| ChannelBuffer<float> rev_out_cb(SamplesFromRate(reverse_output_rate), |
| num_reverse_output_channels); |
| |
| // Temporary buffers. |
| const int max_length = |
| 2 * std::max(std::max(out_cb.num_frames(), rev_out_cb.num_frames()), |
| std::max(fwd_cb.num_frames(), rev_cb.num_frames())); |
| std::unique_ptr<float[]> float_data(new float[max_length]); |
| std::unique_ptr<int16_t[]> int_data(new int16_t[max_length]); |
| |
| int analog_level = 127; |
| while (ReadChunk(far_file, int_data.get(), float_data.get(), &rev_cb) && |
| ReadChunk(near_file, int_data.get(), float_data.get(), &fwd_cb)) { |
| EXPECT_NOERR(ap->ProcessReverseStream( |
| rev_cb.channels(), processing_config.reverse_input_stream(), |
| processing_config.reverse_output_stream(), rev_out_cb.channels())); |
| |
| EXPECT_NOERR(ap->set_stream_delay_ms(0)); |
| EXPECT_NOERR(ap->gain_control()->set_stream_analog_level(analog_level)); |
| |
| EXPECT_NOERR(ap->ProcessStream( |
| fwd_cb.channels(), |
| fwd_cb.num_frames(), |
| input_rate, |
| LayoutFromChannels(num_input_channels), |
| output_rate, |
| LayoutFromChannels(num_output_channels), |
| out_cb.channels())); |
| |
| // Dump forward output to file. |
| Interleave(out_cb.channels(), out_cb.num_frames(), out_cb.num_channels(), |
| float_data.get()); |
| size_t out_length = out_cb.num_channels() * out_cb.num_frames(); |
| |
| ASSERT_EQ(out_length, |
| fwrite(float_data.get(), sizeof(float_data[0]), |
| out_length, out_file)); |
| |
| // Dump reverse output to file. |
| Interleave(rev_out_cb.channels(), rev_out_cb.num_frames(), |
| rev_out_cb.num_channels(), float_data.get()); |
| size_t rev_out_length = |
| rev_out_cb.num_channels() * rev_out_cb.num_frames(); |
| |
| ASSERT_EQ(rev_out_length, |
| fwrite(float_data.get(), sizeof(float_data[0]), rev_out_length, |
| rev_out_file)); |
| |
| analog_level = ap->gain_control()->stream_analog_level(); |
| } |
| fclose(far_file); |
| fclose(near_file); |
| fclose(out_file); |
| fclose(rev_out_file); |
| } |
| |
| protected: |
| int input_rate_; |
| int output_rate_; |
| int reverse_input_rate_; |
| int reverse_output_rate_; |
| double expected_snr_; |
| double expected_reverse_snr_; |
| }; |
| |
| TEST_P(AudioProcessingTest, Formats) { |
| struct ChannelFormat { |
| int num_input; |
| int num_output; |
| int num_reverse_input; |
| int num_reverse_output; |
| }; |
| ChannelFormat cf[] = { |
| {1, 1, 1, 1}, |
| {1, 1, 2, 1}, |
| {2, 1, 1, 1}, |
| {2, 1, 2, 1}, |
| {2, 2, 1, 1}, |
| {2, 2, 2, 2}, |
| }; |
| |
| for (size_t i = 0; i < arraysize(cf); ++i) { |
| ProcessFormat(input_rate_, output_rate_, reverse_input_rate_, |
| reverse_output_rate_, cf[i].num_input, cf[i].num_output, |
| cf[i].num_reverse_input, cf[i].num_reverse_output, "out"); |
| |
| // Verify output for both directions. |
| std::vector<StreamDirection> stream_directions; |
| stream_directions.push_back(kForward); |
| stream_directions.push_back(kReverse); |
| for (StreamDirection file_direction : stream_directions) { |
| const int in_rate = file_direction ? reverse_input_rate_ : input_rate_; |
| const int out_rate = file_direction ? reverse_output_rate_ : output_rate_; |
| const int out_num = |
| file_direction ? cf[i].num_reverse_output : cf[i].num_output; |
| const double expected_snr = |
| file_direction ? expected_reverse_snr_ : expected_snr_; |
| |
| const int min_ref_rate = std::min(in_rate, out_rate); |
| int ref_rate; |
| |
| if (min_ref_rate > 32000) { |
| ref_rate = 48000; |
| } else if (min_ref_rate > 16000) { |
| ref_rate = 32000; |
| } else if (min_ref_rate > 8000) { |
| ref_rate = 16000; |
| } else { |
| ref_rate = 8000; |
| } |
| #ifdef WEBRTC_ARCH_ARM_FAMILY |
| if (file_direction == kForward) { |
| ref_rate = std::min(ref_rate, 32000); |
| } |
| #endif |
| FILE* out_file = fopen( |
| OutputFilePath("out", input_rate_, output_rate_, reverse_input_rate_, |
| reverse_output_rate_, cf[i].num_input, |
| cf[i].num_output, cf[i].num_reverse_input, |
| cf[i].num_reverse_output, file_direction).c_str(), |
| "rb"); |
| // The reference files always have matching input and output channels. |
| FILE* ref_file = fopen( |
| OutputFilePath("ref", ref_rate, ref_rate, ref_rate, ref_rate, |
| cf[i].num_output, cf[i].num_output, |
| cf[i].num_reverse_output, cf[i].num_reverse_output, |
| file_direction).c_str(), |
| "rb"); |
| ASSERT_TRUE(out_file != NULL); |
| ASSERT_TRUE(ref_file != NULL); |
| |
| const size_t ref_length = SamplesFromRate(ref_rate) * out_num; |
| const size_t out_length = SamplesFromRate(out_rate) * out_num; |
| // Data from the reference file. |
| std::unique_ptr<float[]> ref_data(new float[ref_length]); |
| // Data from the output file. |
| std::unique_ptr<float[]> out_data(new float[out_length]); |
| // Data from the resampled output, in case the reference and output rates |
| // don't match. |
| std::unique_ptr<float[]> cmp_data(new float[ref_length]); |
| |
| PushResampler<float> resampler; |
| resampler.InitializeIfNeeded(out_rate, ref_rate, out_num); |
| |
| // Compute the resampling delay of the output relative to the reference, |
| // to find the region over which we should search for the best SNR. |
| float expected_delay_sec = 0; |
| if (in_rate != ref_rate) { |
| // Input resampling delay. |
| expected_delay_sec += |
| PushSincResampler::AlgorithmicDelaySeconds(in_rate); |
| } |
| if (out_rate != ref_rate) { |
| // Output resampling delay. |
| expected_delay_sec += |
| PushSincResampler::AlgorithmicDelaySeconds(ref_rate); |
| // Delay of converting the output back to its processing rate for |
| // testing. |
| expected_delay_sec += |
| PushSincResampler::AlgorithmicDelaySeconds(out_rate); |
| } |
| int expected_delay = |
| floor(expected_delay_sec * ref_rate + 0.5f) * out_num; |
| |
| double variance = 0; |
| double sq_error = 0; |
| while (fread(out_data.get(), sizeof(out_data[0]), out_length, out_file) && |
| fread(ref_data.get(), sizeof(ref_data[0]), ref_length, ref_file)) { |
| float* out_ptr = out_data.get(); |
| if (out_rate != ref_rate) { |
| // Resample the output back to its internal processing rate if |
| // necssary. |
| ASSERT_EQ(ref_length, |
| static_cast<size_t>(resampler.Resample( |
| out_ptr, out_length, cmp_data.get(), ref_length))); |
| out_ptr = cmp_data.get(); |
| } |
| |
| // Update the |sq_error| and |variance| accumulators with the highest |
| // SNR of reference vs output. |
| UpdateBestSNR(ref_data.get(), out_ptr, ref_length, expected_delay, |
| &variance, &sq_error); |
| } |
| |
| std::cout << "(" << input_rate_ << ", " << output_rate_ << ", " |
| << reverse_input_rate_ << ", " << reverse_output_rate_ << ", " |
| << cf[i].num_input << ", " << cf[i].num_output << ", " |
| << cf[i].num_reverse_input << ", " << cf[i].num_reverse_output |
| << ", " << file_direction << "): "; |
| if (sq_error > 0) { |
| double snr = 10 * log10(variance / sq_error); |
| EXPECT_GE(snr, expected_snr); |
| EXPECT_NE(0, expected_snr); |
| std::cout << "SNR=" << snr << " dB" << std::endl; |
| } else { |
| std::cout << "SNR=inf dB" << std::endl; |
| } |
| |
| fclose(out_file); |
| fclose(ref_file); |
| } |
| } |
| } |
| |
| #if defined(WEBRTC_AUDIOPROC_FLOAT_PROFILE) |
| INSTANTIATE_TEST_SUITE_P( |
| CommonFormats, |
| AudioProcessingTest, |
| testing::Values(std::make_tuple(48000, 48000, 48000, 48000, 0, 0), |
| std::make_tuple(48000, 48000, 32000, 48000, 40, 30), |
| std::make_tuple(48000, 48000, 16000, 48000, 40, 20), |
| std::make_tuple(48000, 44100, 48000, 44100, 20, 20), |
| std::make_tuple(48000, 44100, 32000, 44100, 20, 15), |
| std::make_tuple(48000, 44100, 16000, 44100, 20, 15), |
| std::make_tuple(48000, 32000, 48000, 32000, 30, 35), |
| std::make_tuple(48000, 32000, 32000, 32000, 30, 0), |
| std::make_tuple(48000, 32000, 16000, 32000, 30, 20), |
| std::make_tuple(48000, 16000, 48000, 16000, 25, 20), |
| std::make_tuple(48000, 16000, 32000, 16000, 25, 20), |
| std::make_tuple(48000, 16000, 16000, 16000, 25, 0), |
| |
| std::make_tuple(44100, 48000, 48000, 48000, 30, 0), |
| std::make_tuple(44100, 48000, 32000, 48000, 30, 30), |
| std::make_tuple(44100, 48000, 16000, 48000, 30, 20), |
| std::make_tuple(44100, 44100, 48000, 44100, 20, 20), |
| std::make_tuple(44100, 44100, 32000, 44100, 20, 15), |
| std::make_tuple(44100, 44100, 16000, 44100, 20, 15), |
| std::make_tuple(44100, 32000, 48000, 32000, 30, 35), |
| std::make_tuple(44100, 32000, 32000, 32000, 30, 0), |
| std::make_tuple(44100, 32000, 16000, 32000, 30, 20), |
| std::make_tuple(44100, 16000, 48000, 16000, 25, 20), |
| std::make_tuple(44100, 16000, 32000, 16000, 25, 20), |
| std::make_tuple(44100, 16000, 16000, 16000, 25, 0), |
| |
| std::make_tuple(32000, 48000, 48000, 48000, 30, 0), |
| std::make_tuple(32000, 48000, 32000, 48000, 35, 30), |
| std::make_tuple(32000, 48000, 16000, 48000, 30, 20), |
| std::make_tuple(32000, 44100, 48000, 44100, 20, 20), |
| std::make_tuple(32000, 44100, 32000, 44100, 20, 15), |
| std::make_tuple(32000, 44100, 16000, 44100, 20, 15), |
| std::make_tuple(32000, 32000, 48000, 32000, 40, 35), |
| std::make_tuple(32000, 32000, 32000, 32000, 0, 0), |
| std::make_tuple(32000, 32000, 16000, 32000, 40, 20), |
| std::make_tuple(32000, 16000, 48000, 16000, 25, 20), |
| std::make_tuple(32000, 16000, 32000, 16000, 25, 20), |
| std::make_tuple(32000, 16000, 16000, 16000, 25, 0), |
| |
| std::make_tuple(16000, 48000, 48000, 48000, 25, 0), |
| std::make_tuple(16000, 48000, 32000, 48000, 25, 30), |
| std::make_tuple(16000, 48000, 16000, 48000, 25, 20), |
| std::make_tuple(16000, 44100, 48000, 44100, 15, 20), |
| std::make_tuple(16000, 44100, 32000, 44100, 15, 15), |
| std::make_tuple(16000, 44100, 16000, 44100, 15, 15), |
| std::make_tuple(16000, 32000, 48000, 32000, 25, 35), |
| std::make_tuple(16000, 32000, 32000, 32000, 25, 0), |
| std::make_tuple(16000, 32000, 16000, 32000, 25, 20), |
| std::make_tuple(16000, 16000, 48000, 16000, 40, 20), |
| std::make_tuple(16000, 16000, 32000, 16000, 40, 20), |
| std::make_tuple(16000, 16000, 16000, 16000, 0, 0))); |
| |
| #elif defined(WEBRTC_AUDIOPROC_FIXED_PROFILE) |
| INSTANTIATE_TEST_SUITE_P( |
| CommonFormats, |
| AudioProcessingTest, |
| testing::Values(std::make_tuple(48000, 48000, 48000, 48000, 20, 0), |
| std::make_tuple(48000, 48000, 32000, 48000, 20, 30), |
| std::make_tuple(48000, 48000, 16000, 48000, 20, 20), |
| std::make_tuple(48000, 44100, 48000, 44100, 15, 20), |
| std::make_tuple(48000, 44100, 32000, 44100, 15, 15), |
| std::make_tuple(48000, 44100, 16000, 44100, 15, 15), |
| std::make_tuple(48000, 32000, 48000, 32000, 20, 35), |
| std::make_tuple(48000, 32000, 32000, 32000, 20, 0), |
| std::make_tuple(48000, 32000, 16000, 32000, 20, 20), |
| std::make_tuple(48000, 16000, 48000, 16000, 20, 20), |
| std::make_tuple(48000, 16000, 32000, 16000, 20, 20), |
| std::make_tuple(48000, 16000, 16000, 16000, 20, 0), |
| |
| std::make_tuple(44100, 48000, 48000, 48000, 15, 0), |
| std::make_tuple(44100, 48000, 32000, 48000, 15, 30), |
| std::make_tuple(44100, 48000, 16000, 48000, 15, 20), |
| std::make_tuple(44100, 44100, 48000, 44100, 15, 20), |
| std::make_tuple(44100, 44100, 32000, 44100, 15, 15), |
| std::make_tuple(44100, 44100, 16000, 44100, 15, 15), |
| std::make_tuple(44100, 32000, 48000, 32000, 20, 35), |
| std::make_tuple(44100, 32000, 32000, 32000, 20, 0), |
| std::make_tuple(44100, 32000, 16000, 32000, 20, 20), |
| std::make_tuple(44100, 16000, 48000, 16000, 20, 20), |
| std::make_tuple(44100, 16000, 32000, 16000, 20, 20), |
| std::make_tuple(44100, 16000, 16000, 16000, 20, 0), |
| |
| std::make_tuple(32000, 48000, 48000, 48000, 35, 0), |
| std::make_tuple(32000, 48000, 32000, 48000, 65, 30), |
| std::make_tuple(32000, 48000, 16000, 48000, 40, 20), |
| std::make_tuple(32000, 44100, 48000, 44100, 20, 20), |
| std::make_tuple(32000, 44100, 32000, 44100, 20, 15), |
| std::make_tuple(32000, 44100, 16000, 44100, 20, 15), |
| std::make_tuple(32000, 32000, 48000, 32000, 35, 35), |
| std::make_tuple(32000, 32000, 32000, 32000, 0, 0), |
| std::make_tuple(32000, 32000, 16000, 32000, 40, 20), |
| std::make_tuple(32000, 16000, 48000, 16000, 20, 20), |
| std::make_tuple(32000, 16000, 32000, 16000, 20, 20), |
| std::make_tuple(32000, 16000, 16000, 16000, 20, 0), |
| |
| std::make_tuple(16000, 48000, 48000, 48000, 25, 0), |
| std::make_tuple(16000, 48000, 32000, 48000, 25, 30), |
| std::make_tuple(16000, 48000, 16000, 48000, 25, 20), |
| std::make_tuple(16000, 44100, 48000, 44100, 15, 20), |
| std::make_tuple(16000, 44100, 32000, 44100, 15, 15), |
| std::make_tuple(16000, 44100, 16000, 44100, 15, 15), |
| std::make_tuple(16000, 32000, 48000, 32000, 25, 35), |
| std::make_tuple(16000, 32000, 32000, 32000, 25, 0), |
| std::make_tuple(16000, 32000, 16000, 32000, 25, 20), |
| std::make_tuple(16000, 16000, 48000, 16000, 35, 20), |
| std::make_tuple(16000, 16000, 32000, 16000, 35, 20), |
| std::make_tuple(16000, 16000, 16000, 16000, 0, 0))); |
| #endif |
| |
| } // namespace |
| |
| TEST(RuntimeSettingTest, TestDefaultCtor) { |
| auto s = AudioProcessing::RuntimeSetting(); |
| EXPECT_EQ(AudioProcessing::RuntimeSetting::Type::kNotSpecified, s.type()); |
| } |
| |
| TEST(RuntimeSettingTest, TestCapturePreGain) { |
| using Type = AudioProcessing::RuntimeSetting::Type; |
| { |
| auto s = AudioProcessing::RuntimeSetting::CreateCapturePreGain(1.25f); |
| EXPECT_EQ(Type::kCapturePreGain, s.type()); |
| float v; |
| s.GetFloat(&v); |
| EXPECT_EQ(1.25f, v); |
| } |
| |
| #if RTC_DCHECK_IS_ON && GTEST_HAS_DEATH_TEST && !defined(WEBRTC_ANDROID) |
| EXPECT_DEATH(AudioProcessing::RuntimeSetting::CreateCapturePreGain(0.1f), ""); |
| #endif |
| } |
| |
| TEST(RuntimeSettingTest, TestUsageWithSwapQueue) { |
| SwapQueue<AudioProcessing::RuntimeSetting> q(1); |
| auto s = AudioProcessing::RuntimeSetting(); |
| ASSERT_TRUE(q.Insert(&s)); |
| ASSERT_TRUE(q.Remove(&s)); |
| EXPECT_EQ(AudioProcessing::RuntimeSetting::Type::kNotSpecified, s.type()); |
| } |
| |
| TEST(ApmConfiguration, EnablePostProcessing) { |
| // Verify that apm uses a capture post processing module if one is provided. |
| auto mock_post_processor_ptr = |
| new testing::NiceMock<test::MockCustomProcessing>(); |
| auto mock_post_processor = |
| std::unique_ptr<CustomProcessing>(mock_post_processor_ptr); |
| rtc::scoped_refptr<AudioProcessing> apm = |
| AudioProcessingBuilder() |
| .SetCapturePostProcessing(std::move(mock_post_processor)) |
| .Create(); |
| |
| AudioFrame audio; |
| audio.num_channels_ = 1; |
| SetFrameSampleRate(&audio, AudioProcessing::NativeRate::kSampleRate16kHz); |
| |
| EXPECT_CALL(*mock_post_processor_ptr, Process(testing::_)).Times(1); |
| apm->ProcessStream(&audio); |
| } |
| |
| TEST(ApmConfiguration, EnablePreProcessing) { |
| // Verify that apm uses a capture post processing module if one is provided. |
| auto mock_pre_processor_ptr = |
| new testing::NiceMock<test::MockCustomProcessing>(); |
| auto mock_pre_processor = |
| std::unique_ptr<CustomProcessing>(mock_pre_processor_ptr); |
| rtc::scoped_refptr<AudioProcessing> apm = |
| AudioProcessingBuilder() |
| .SetRenderPreProcessing(std::move(mock_pre_processor)) |
| .Create(); |
| |
| AudioFrame audio; |
| audio.num_channels_ = 1; |
| SetFrameSampleRate(&audio, AudioProcessing::NativeRate::kSampleRate16kHz); |
| |
| EXPECT_CALL(*mock_pre_processor_ptr, Process(testing::_)).Times(1); |
| apm->ProcessReverseStream(&audio); |
| } |
| |
| TEST(ApmConfiguration, EnableCaptureAnalyzer) { |
| // Verify that apm uses a capture analyzer if one is provided. |
| auto mock_capture_analyzer_ptr = |
| new testing::NiceMock<test::MockCustomAudioAnalyzer>(); |
| auto mock_capture_analyzer = |
| std::unique_ptr<CustomAudioAnalyzer>(mock_capture_analyzer_ptr); |
| rtc::scoped_refptr<AudioProcessing> apm = |
| AudioProcessingBuilder() |
| .SetCaptureAnalyzer(std::move(mock_capture_analyzer)) |
| .Create(); |
| |
| AudioFrame audio; |
| audio.num_channels_ = 1; |
| SetFrameSampleRate(&audio, AudioProcessing::NativeRate::kSampleRate16kHz); |
| |
| EXPECT_CALL(*mock_capture_analyzer_ptr, Analyze(testing::_)).Times(1); |
| apm->ProcessStream(&audio); |
| } |
| |
| TEST(ApmConfiguration, PreProcessingReceivesRuntimeSettings) { |
| auto mock_pre_processor_ptr = |
| new testing::NiceMock<test::MockCustomProcessing>(); |
| auto mock_pre_processor = |
| std::unique_ptr<CustomProcessing>(mock_pre_processor_ptr); |
| rtc::scoped_refptr<AudioProcessing> apm = |
| AudioProcessingBuilder() |
| .SetRenderPreProcessing(std::move(mock_pre_processor)) |
| .Create(); |
| apm->SetRuntimeSetting( |
| AudioProcessing::RuntimeSetting::CreateCustomRenderSetting(0)); |
| |
| // RuntimeSettings forwarded during 'Process*Stream' calls. |
| // Therefore we have to make one such call. |
| AudioFrame audio; |
| audio.num_channels_ = 1; |
| SetFrameSampleRate(&audio, AudioProcessing::NativeRate::kSampleRate16kHz); |
| |
| EXPECT_CALL(*mock_pre_processor_ptr, SetRuntimeSetting(testing::_)).Times(1); |
| apm->ProcessReverseStream(&audio); |
| } |
| |
| class MyEchoControlFactory : public EchoControlFactory { |
| public: |
| std::unique_ptr<EchoControl> Create(int sample_rate_hz) { |
| auto ec = new test::MockEchoControl(); |
| EXPECT_CALL(*ec, AnalyzeRender(testing::_)).Times(1); |
| EXPECT_CALL(*ec, AnalyzeCapture(testing::_)).Times(2); |
| EXPECT_CALL(*ec, ProcessCapture(testing::_, testing::_)).Times(2); |
| return std::unique_ptr<EchoControl>(ec); |
| } |
| }; |
| |
| TEST(ApmConfiguration, EchoControlInjection) { |
| // Verify that apm uses an injected echo controller if one is provided. |
| webrtc::Config webrtc_config; |
| std::unique_ptr<EchoControlFactory> echo_control_factory( |
| new MyEchoControlFactory()); |
| |
| rtc::scoped_refptr<AudioProcessing> apm = |
| AudioProcessingBuilder() |
| .SetEchoControlFactory(std::move(echo_control_factory)) |
| .Create(webrtc_config); |
| |
| AudioFrame audio; |
| audio.num_channels_ = 1; |
| SetFrameSampleRate(&audio, AudioProcessing::NativeRate::kSampleRate16kHz); |
| apm->ProcessStream(&audio); |
| apm->ProcessReverseStream(&audio); |
| apm->ProcessStream(&audio); |
| } |
| |
| std::unique_ptr<AudioProcessing> CreateApm(bool use_AEC2) { |
| Config old_config; |
| if (use_AEC2) { |
| old_config.Set<ExtendedFilter>(new ExtendedFilter(true)); |
| old_config.Set<DelayAgnostic>(new DelayAgnostic(true)); |
| } |
| std::unique_ptr<AudioProcessing> apm( |
| AudioProcessingBuilder().Create(old_config)); |
| if (!apm) { |
| return apm; |
| } |
| |
| ProcessingConfig processing_config = { |
| {{32000, 1}, {32000, 1}, {32000, 1}, {32000, 1}}}; |
| |
| if (apm->Initialize(processing_config) != 0) { |
| return nullptr; |
| } |
| |
| // Disable all components except for an AEC and the residual echo detector. |
| AudioProcessing::Config apm_config; |
| apm_config.residual_echo_detector.enabled = true; |
| apm_config.high_pass_filter.enabled = false; |
| apm_config.gain_controller2.enabled = false; |
| apm_config.echo_canceller.enabled = true; |
| apm_config.echo_canceller.mobile_mode = !use_AEC2; |
| apm->ApplyConfig(apm_config); |
| EXPECT_EQ(apm->gain_control()->Enable(false), 0); |
| EXPECT_EQ(apm->level_estimator()->Enable(false), 0); |
| EXPECT_EQ(apm->noise_suppression()->Enable(false), 0); |
| EXPECT_EQ(apm->voice_detection()->Enable(false), 0); |
| return apm; |
| } |
| |
| #if defined(WEBRTC_ANDROID) || defined(WEBRTC_IOS) || defined(WEBRTC_MAC) |
| #define MAYBE_ApmStatistics DISABLED_ApmStatistics |
| #else |
| #define MAYBE_ApmStatistics ApmStatistics |
| #endif |
| |
| TEST(MAYBE_ApmStatistics, AEC2EnabledTest) { |
| // Set up APM with AEC2 and process some audio. |
| std::unique_ptr<AudioProcessing> apm = CreateApm(true); |
| ASSERT_TRUE(apm); |
| |
| // Set up an audioframe. |
| AudioFrame frame; |
| frame.num_channels_ = 1; |
| SetFrameSampleRate(&frame, AudioProcessing::NativeRate::kSampleRate32kHz); |
| |
| // Fill the audio frame with a sawtooth pattern. |
| int16_t* ptr = frame.mutable_data(); |
| for (size_t i = 0; i < frame.kMaxDataSizeSamples; i++) { |
| ptr[i] = 10000 * ((i % 3) - 1); |
| } |
| |
| // Do some processing. |
| for (int i = 0; i < 200; i++) { |
| EXPECT_EQ(apm->ProcessReverseStream(&frame), 0); |
| EXPECT_EQ(apm->set_stream_delay_ms(0), 0); |
| EXPECT_EQ(apm->ProcessStream(&frame), 0); |
| } |
| |
| // Test statistics interface. |
| AudioProcessingStats stats = apm->GetStatistics(true); |
| // We expect all statistics to be set and have a sensible value. |
| ASSERT_TRUE(stats.residual_echo_likelihood); |
| EXPECT_GE(*stats.residual_echo_likelihood, 0.0); |
| EXPECT_LE(*stats.residual_echo_likelihood, 1.0); |
| ASSERT_TRUE(stats.residual_echo_likelihood_recent_max); |
| EXPECT_GE(*stats.residual_echo_likelihood_recent_max, 0.0); |
| EXPECT_LE(*stats.residual_echo_likelihood_recent_max, 1.0); |
| ASSERT_TRUE(stats.echo_return_loss); |
| EXPECT_NE(*stats.echo_return_loss, -100.0); |
| ASSERT_TRUE(stats.echo_return_loss_enhancement); |
| EXPECT_NE(*stats.echo_return_loss_enhancement, -100.0); |
| ASSERT_TRUE(stats.divergent_filter_fraction); |
| EXPECT_NE(*stats.divergent_filter_fraction, -1.0); |
| ASSERT_TRUE(stats.delay_standard_deviation_ms); |
| EXPECT_GE(*stats.delay_standard_deviation_ms, 0); |
| // We don't check stats.delay_median_ms since it takes too long to settle to a |
| // value. At least 20 seconds of data need to be processed before it will get |
| // a value, which would make this test take too much time. |
| |
| // If there are no receive streams, we expect the stats not to be set. The |
| // 'false' argument signals to APM that no receive streams are currently |
| // active. In that situation the statistics would get stuck at their last |
| // calculated value (AEC and echo detection need at least one stream in each |
| // direction), so to avoid that, they should not be set by APM. |
| stats = apm->GetStatistics(false); |
| EXPECT_FALSE(stats.residual_echo_likelihood); |
| EXPECT_FALSE(stats.residual_echo_likelihood_recent_max); |
| EXPECT_FALSE(stats.echo_return_loss); |
| EXPECT_FALSE(stats.echo_return_loss_enhancement); |
| EXPECT_FALSE(stats.divergent_filter_fraction); |
| EXPECT_FALSE(stats.delay_median_ms); |
| EXPECT_FALSE(stats.delay_standard_deviation_ms); |
| } |
| |
| TEST(MAYBE_ApmStatistics, AECMEnabledTest) { |
| // Set up APM with AECM and process some audio. |
| std::unique_ptr<AudioProcessing> apm = CreateApm(false); |
| ASSERT_TRUE(apm); |
| |
| // Set up an audioframe. |
| AudioFrame frame; |
| frame.num_channels_ = 1; |
| SetFrameSampleRate(&frame, AudioProcessing::NativeRate::kSampleRate32kHz); |
| |
| // Fill the audio frame with a sawtooth pattern. |
| int16_t* ptr = frame.mutable_data(); |
| for (size_t i = 0; i < frame.kMaxDataSizeSamples; i++) { |
| ptr[i] = 10000 * ((i % 3) - 1); |
| } |
| |
| // Do some processing. |
| for (int i = 0; i < 200; i++) { |
| EXPECT_EQ(apm->ProcessReverseStream(&frame), 0); |
| EXPECT_EQ(apm->set_stream_delay_ms(0), 0); |
| EXPECT_EQ(apm->ProcessStream(&frame), 0); |
| } |
| |
| // Test statistics interface. |
| AudioProcessingStats stats = apm->GetStatistics(true); |
| // We expect only the residual echo detector statistics to be set and have a |
| // sensible value. |
| EXPECT_TRUE(stats.residual_echo_likelihood); |
| if (stats.residual_echo_likelihood) { |
| EXPECT_GE(*stats.residual_echo_likelihood, 0.0); |
| EXPECT_LE(*stats.residual_echo_likelihood, 1.0); |
| } |
| EXPECT_TRUE(stats.residual_echo_likelihood_recent_max); |
| if (stats.residual_echo_likelihood_recent_max) { |
| EXPECT_GE(*stats.residual_echo_likelihood_recent_max, 0.0); |
| EXPECT_LE(*stats.residual_echo_likelihood_recent_max, 1.0); |
| } |
| EXPECT_FALSE(stats.echo_return_loss); |
| EXPECT_FALSE(stats.echo_return_loss_enhancement); |
| EXPECT_FALSE(stats.divergent_filter_fraction); |
| EXPECT_FALSE(stats.delay_median_ms); |
| EXPECT_FALSE(stats.delay_standard_deviation_ms); |
| |
| // If there are no receive streams, we expect the stats not to be set. |
| stats = apm->GetStatistics(false); |
| EXPECT_FALSE(stats.residual_echo_likelihood); |
| EXPECT_FALSE(stats.residual_echo_likelihood_recent_max); |
| EXPECT_FALSE(stats.echo_return_loss); |
| EXPECT_FALSE(stats.echo_return_loss_enhancement); |
| EXPECT_FALSE(stats.divergent_filter_fraction); |
| EXPECT_FALSE(stats.delay_median_ms); |
| EXPECT_FALSE(stats.delay_standard_deviation_ms); |
| } |
| |
| TEST(ApmStatistics, ReportOutputRmsDbfs) { |
| ProcessingConfig processing_config = { |
| {{32000, 1}, {32000, 1}, {32000, 1}, {32000, 1}}}; |
| AudioProcessing::Config config; |
| |
| // Set up an audioframe. |
| AudioFrame frame; |
| frame.num_channels_ = 1; |
| SetFrameSampleRate(&frame, AudioProcessing::NativeRate::kSampleRate32kHz); |
| |
| // Fill the audio frame with a sawtooth pattern. |
| int16_t* ptr = frame.mutable_data(); |
| for (size_t i = 0; i < frame.kMaxDataSizeSamples; i++) { |
| ptr[i] = 10000 * ((i % 3) - 1); |
| } |
| |
| std::unique_ptr<AudioProcessing> apm(AudioProcessingBuilder().Create()); |
| apm->Initialize(processing_config); |
| |
| // If not enabled, no metric should be reported. |
| EXPECT_EQ(apm->ProcessStream(&frame), 0); |
| EXPECT_FALSE(apm->GetStatistics(false).output_rms_dbfs); |
| |
| // If enabled, metrics should be reported. |
| config.level_estimation.enabled = true; |
| apm->ApplyConfig(config); |
| EXPECT_EQ(apm->ProcessStream(&frame), 0); |
| auto stats = apm->GetStatistics(false); |
| EXPECT_TRUE(stats.output_rms_dbfs); |
| EXPECT_GE(*stats.output_rms_dbfs, 0); |
| |
| // If re-disabled, the value is again not reported. |
| config.level_estimation.enabled = false; |
| apm->ApplyConfig(config); |
| EXPECT_EQ(apm->ProcessStream(&frame), 0); |
| EXPECT_FALSE(apm->GetStatistics(false).output_rms_dbfs); |
| } |
| |
| TEST(ApmStatistics, ReportHasVoice) { |
| ProcessingConfig processing_config = { |
| {{32000, 1}, {32000, 1}, {32000, 1}, {32000, 1}}}; |
| AudioProcessing::Config config; |
| |
| // Set up an audioframe. |
| AudioFrame frame; |
| frame.num_channels_ = 1; |
| SetFrameSampleRate(&frame, AudioProcessing::NativeRate::kSampleRate32kHz); |
| |
| // Fill the audio frame with a sawtooth pattern. |
| int16_t* ptr = frame.mutable_data(); |
| for (size_t i = 0; i < frame.kMaxDataSizeSamples; i++) { |
| ptr[i] = 10000 * ((i % 3) - 1); |
| } |
| |
| std::unique_ptr<AudioProcessing> apm(AudioProcessingBuilder().Create()); |
| apm->Initialize(processing_config); |
| |
| // If not enabled, no metric should be reported. |
| EXPECT_EQ(apm->ProcessStream(&frame), 0); |
| EXPECT_FALSE(apm->GetStatistics(false).voice_detected); |
| |
| // If enabled, metrics should be reported. |
| config.voice_detection.enabled = true; |
| apm->ApplyConfig(config); |
| EXPECT_EQ(apm->ProcessStream(&frame), 0); |
| auto stats = apm->GetStatistics(false); |
| EXPECT_TRUE(stats.voice_detected); |
| |
| // If re-disabled, the value is again not reported. |
| config.voice_detection.enabled = false; |
| apm->ApplyConfig(config); |
| EXPECT_EQ(apm->ProcessStream(&frame), 0); |
| EXPECT_FALSE(apm->GetStatistics(false).voice_detected); |
| } |
| } // namespace webrtc |