modules/audio_processing/agc2/vad_with_level_unittest.cc - src - Git at Google

 /*
  *  Copyright (c) 2018 The WebRTC project authors. All Rights Reserved.
  *
  *  Use of this source code is governed by a BSD-style license
  *  that can be found in the LICENSE file in the root of the source
  *  tree. An additional intellectual property rights grant can be found
  *  in the file PATENTS.  All contributing project authors may
  *  be found in the AUTHORS file in the root of the source tree.
  */

 #include "modules/audio_processing/agc2/vad_with_level.h"

 #include <memory>
 #include <vector>

 #include "modules/audio_processing/agc2/agc2_common.h"
 #include "modules/audio_processing/include/audio_frame_view.h"
 #include "rtc_base/gunit.h"
 #include "rtc_base/numerics/safe_compare.h"
 #include "test/gmock.h"

 namespace webrtc {
 namespace {

 using ::testing::AnyNumber;
 using ::testing::ReturnRoundRobin;

 constexpr float kInstantAttack = 1.f;
 constexpr float kSlowAttack = 0.1f;

 constexpr int kSampleRateHz = 8000;

 class MockVad : public VadLevelAnalyzer::VoiceActivityDetector {
  public:
   MOCK_METHOD(float,
               ComputeProbability,
               (AudioFrameView<const float> frame),
               (override));
 };

 // Creates a `VadLevelAnalyzer` injecting a mock VAD which repeatedly returns
 // the next value from `speech_probabilities` until it reaches the end and will
 // restart from the beginning.
 std::unique_ptr<VadLevelAnalyzer> CreateVadLevelAnalyzerWithMockVad(
     float vad_probability_attack,
     const std::vector<float>& speech_probabilities) {
   auto vad = std::make_unique<MockVad>();
   EXPECT_CALL(*vad, ComputeProbability)
       .Times(AnyNumber())
       .WillRepeatedly(ReturnRoundRobin(speech_probabilities));
   return std::make_unique<VadLevelAnalyzer>(vad_probability_attack,
                                             std::move(vad));
 }

 // 10 ms mono frame.
 struct FrameWithView {
   // Ctor. Initializes the frame samples with `value`.
   FrameWithView(float value = 0.f)
       : channel0(samples.data()),
         view(&channel0, /*num_channels=*/1, samples.size()) {
     samples.fill(value);
   }
   std::array<float, kSampleRateHz / 100> samples;
   const float* const channel0;
   const AudioFrameView<const float> view;
 };

 TEST(AutomaticGainController2VadLevelAnalyzer, PeakLevelGreaterThanRmsLevel) {
   // Handcrafted frame so that the average is lower than the peak value.
   FrameWithView frame(1000.f);  // Constant frame.
   frame.samples[10] = 2000.f;   // Except for one peak value.

   // Compute audio frame levels (the VAD result is ignored).
   VadLevelAnalyzer analyzer;
   auto levels_and_vad_prob = analyzer.AnalyzeFrame(frame.view);

   // Compare peak and RMS levels.
   EXPECT_LT(levels_and_vad_prob.rms_dbfs, levels_and_vad_prob.peak_dbfs);
 }

 // Checks that the unprocessed and the smoothed speech probabilities match when
 // instant attack is used.
 TEST(AutomaticGainController2VadLevelAnalyzer, NoSpeechProbabilitySmoothing) {
   const std::vector<float> speech_probabilities{0.709f, 0.484f, 0.882f, 0.167f,
                                                 0.44f,  0.525f, 0.858f, 0.314f,
                                                 0.653f, 0.965f, 0.413f, 0.f};
   auto analyzer =
       CreateVadLevelAnalyzerWithMockVad(kInstantAttack, speech_probabilities);
   FrameWithView frame;
   for (int i = 0; rtc::SafeLt(i, speech_probabilities.size()); ++i) {
     SCOPED_TRACE(i);
     EXPECT_EQ(speech_probabilities[i],
               analyzer->AnalyzeFrame(frame.view).speech_probability);
   }
 }

 // Checks that the smoothed speech probability does not instantly converge to
 // the unprocessed one when slow attack is used.
 TEST(AutomaticGainController2VadLevelAnalyzer,
      SlowAttackSpeechProbabilitySmoothing) {
   const std::vector<float> speech_probabilities{0.f, 0.f, 1.f, 1.f, 1.f, 1.f};
   auto analyzer =
       CreateVadLevelAnalyzerWithMockVad(kSlowAttack, speech_probabilities);
   FrameWithView frame;
   float prev_probability = 0.f;
   for (int i = 0; rtc::SafeLt(i, speech_probabilities.size()); ++i) {
     SCOPED_TRACE(i);
     const float smoothed_probability =
         analyzer->AnalyzeFrame(frame.view).speech_probability;
     EXPECT_LT(smoothed_probability, 1.f);  // Not enough time to reach 1.
     EXPECT_LE(prev_probability, smoothed_probability);  // Converge towards 1.
     prev_probability = smoothed_probability;
   }
 }

 // Checks that the smoothed speech probability instantly decays to the
 // unprocessed one when slow attack is used.
 TEST(AutomaticGainController2VadLevelAnalyzer, SpeechProbabilityInstantDecay) {
   const std::vector<float> speech_probabilities{1.f, 1.f, 1.f, 1.f, 1.f, 0.f};
   auto analyzer =
       CreateVadLevelAnalyzerWithMockVad(kSlowAttack, speech_probabilities);
   FrameWithView frame;
   for (int i = 0; rtc::SafeLt(i, speech_probabilities.size() - 1); ++i) {
     analyzer->AnalyzeFrame(frame.view);
   }
   EXPECT_EQ(0.f, analyzer->AnalyzeFrame(frame.view).speech_probability);
 }

 }  // namespace
 }  // namespace webrtc
	/*
	* Copyright (c) 2018 The WebRTC project authors. All Rights Reserved.
	*
	* Use of this source code is governed by a BSD-style license
	* that can be found in the LICENSE file in the root of the source
	* tree. An additional intellectual property rights grant can be found
	* in the file PATENTS. All contributing project authors may
	* be found in the AUTHORS file in the root of the source tree.
	*/

	#include "modules/audio_processing/agc2/vad_with_level.h"

	#include <memory>
	#include <vector>

	#include "modules/audio_processing/agc2/agc2_common.h"
	#include "modules/audio_processing/include/audio_frame_view.h"
	#include "rtc_base/gunit.h"
	#include "rtc_base/numerics/safe_compare.h"
	#include "test/gmock.h"

	namespace webrtc {
	namespace {

	using ::testing::AnyNumber;
	using ::testing::ReturnRoundRobin;

	constexpr float kInstantAttack = 1.f;
	constexpr float kSlowAttack = 0.1f;

	constexpr int kSampleRateHz = 8000;

	class MockVad : public VadLevelAnalyzer::VoiceActivityDetector {
	public:
	MOCK_METHOD(float,
	ComputeProbability,
	(AudioFrameView<const float> frame),
	(override));
	};

	// Creates a `VadLevelAnalyzer` injecting a mock VAD which repeatedly returns
	// the next value from `speech_probabilities` until it reaches the end and will
	// restart from the beginning.
	std::unique_ptr<VadLevelAnalyzer> CreateVadLevelAnalyzerWithMockVad(
	float vad_probability_attack,
	const std::vector<float>& speech_probabilities) {
	auto vad = std::make_unique<MockVad>();
	EXPECT_CALL(*vad, ComputeProbability)
	.Times(AnyNumber())
	.WillRepeatedly(ReturnRoundRobin(speech_probabilities));
	return std::make_unique<VadLevelAnalyzer>(vad_probability_attack,
	std::move(vad));
	}

	// 10 ms mono frame.
	struct FrameWithView {
	// Ctor. Initializes the frame samples with `value`.
	FrameWithView(float value = 0.f)
	: channel0(samples.data()),
	view(&channel0, /num_channels=/1, samples.size()) {
	samples.fill(value);
	}
	std::array<float, kSampleRateHz / 100> samples;
	const float* const channel0;
	const AudioFrameView<const float> view;
	};

	TEST(AutomaticGainController2VadLevelAnalyzer, PeakLevelGreaterThanRmsLevel) {
	// Handcrafted frame so that the average is lower than the peak value.
	FrameWithView frame(1000.f); // Constant frame.
	frame.samples[10] = 2000.f; // Except for one peak value.

	// Compute audio frame levels (the VAD result is ignored).
	VadLevelAnalyzer analyzer;
	auto levels_and_vad_prob = analyzer.AnalyzeFrame(frame.view);

	// Compare peak and RMS levels.
	EXPECT_LT(levels_and_vad_prob.rms_dbfs, levels_and_vad_prob.peak_dbfs);
	}

	// Checks that the unprocessed and the smoothed speech probabilities match when
	// instant attack is used.
	TEST(AutomaticGainController2VadLevelAnalyzer, NoSpeechProbabilitySmoothing) {
	const std::vector<float> speech_probabilities{0.709f, 0.484f, 0.882f, 0.167f,
	0.44f, 0.525f, 0.858f, 0.314f,
	0.653f, 0.965f, 0.413f, 0.f};
	auto analyzer =
	CreateVadLevelAnalyzerWithMockVad(kInstantAttack, speech_probabilities);
	FrameWithView frame;
	for (int i = 0; rtc::SafeLt(i, speech_probabilities.size()); ++i) {
	SCOPED_TRACE(i);
	EXPECT_EQ(speech_probabilities[i],
	analyzer->AnalyzeFrame(frame.view).speech_probability);
	}
	}

	// Checks that the smoothed speech probability does not instantly converge to
	// the unprocessed one when slow attack is used.
	TEST(AutomaticGainController2VadLevelAnalyzer,
	SlowAttackSpeechProbabilitySmoothing) {
	const std::vector<float> speech_probabilities{0.f, 0.f, 1.f, 1.f, 1.f, 1.f};
	auto analyzer =
	CreateVadLevelAnalyzerWithMockVad(kSlowAttack, speech_probabilities);
	FrameWithView frame;
	float prev_probability = 0.f;
	for (int i = 0; rtc::SafeLt(i, speech_probabilities.size()); ++i) {
	SCOPED_TRACE(i);
	const float smoothed_probability =
	analyzer->AnalyzeFrame(frame.view).speech_probability;
	EXPECT_LT(smoothed_probability, 1.f); // Not enough time to reach 1.
	EXPECT_LE(prev_probability, smoothed_probability); // Converge towards 1.
	prev_probability = smoothed_probability;
	}
	}

	// Checks that the smoothed speech probability instantly decays to the
	// unprocessed one when slow attack is used.
	TEST(AutomaticGainController2VadLevelAnalyzer, SpeechProbabilityInstantDecay) {
	const std::vector<float> speech_probabilities{1.f, 1.f, 1.f, 1.f, 1.f, 0.f};
	auto analyzer =
	CreateVadLevelAnalyzerWithMockVad(kSlowAttack, speech_probabilities);
	FrameWithView frame;
	for (int i = 0; rtc::SafeLt(i, speech_probabilities.size() - 1); ++i) {
	analyzer->AnalyzeFrame(frame.view);
	}
	EXPECT_EQ(0.f, analyzer->AnalyzeFrame(frame.view).speech_probability);
	}

	} // namespace
	} // namespace webrtc