blob: f4a23a92b95ead4fd08e6a29ac9c76b4334e33c5 [file] [log] [blame]
/*
* Copyright (c) 2018 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include "modules/audio_processing/agc2/adaptive_digital_gain_applier.h"
#include <algorithm>
#include <memory>
#include "common_audio/include/audio_util.h"
#include "modules/audio_processing/agc2/agc2_common.h"
#include "modules/audio_processing/agc2/vector_float_frame.h"
#include "modules/audio_processing/logging/apm_data_dumper.h"
#include "rtc_base/gunit.h"
namespace webrtc {
namespace {
constexpr int kMono = 1;
constexpr int kStereo = 2;
constexpr int kFrameLen10ms8kHz = 80;
constexpr int kFrameLen10ms48kHz = 480;
constexpr float kMaxSpeechProbability = 1.0f;
// Constants used in place of estimated noise levels.
constexpr float kNoNoiseDbfs = kMinLevelDbfs;
constexpr float kWithNoiseDbfs = -20.f;
constexpr float kMaxGainChangePerSecondDb = 3.0f;
constexpr float kMaxGainChangePerFrameDb =
kMaxGainChangePerSecondDb * kFrameDurationMs / 1000.0f;
constexpr float kMaxOutputNoiseLevelDbfs = -50.0f;
// Helper to create initialized `AdaptiveDigitalGainApplier` objects.
struct GainApplierHelper {
GainApplierHelper()
: GainApplierHelper(/*adjacent_speech_frames_threshold=*/1) {}
explicit GainApplierHelper(int adjacent_speech_frames_threshold)
: apm_data_dumper(0),
gain_applier(std::make_unique<AdaptiveDigitalGainApplier>(
&apm_data_dumper,
adjacent_speech_frames_threshold,
kMaxGainChangePerSecondDb,
kMaxOutputNoiseLevelDbfs,
/*dry_run=*/false)) {}
ApmDataDumper apm_data_dumper;
std::unique_ptr<AdaptiveDigitalGainApplier> gain_applier;
};
// Voice on, no noise, low limiter, confident level.
static_assert(std::is_trivially_destructible<
AdaptiveDigitalGainApplier::FrameInfo>::value,
"");
constexpr AdaptiveDigitalGainApplier::FrameInfo kFrameInfo{
/*speech_probability=*/kMaxSpeechProbability,
/*speech_level_dbfs=*/kInitialSpeechLevelEstimateDbfs,
/*speech_level_reliable=*/true,
/*noise_rms_dbfs=*/kNoNoiseDbfs,
/*headroom_db=*/kSaturationProtectorInitialHeadroomDb,
/*limiter_envelope_dbfs=*/-2.0f};
TEST(GainController2AdaptiveGainApplier, GainApplierShouldNotCrash) {
GainApplierHelper helper;
helper.gain_applier->Initialize(/*sample_rate_hz=*/48000, kStereo);
// Make one call with reasonable audio level values and settings.
VectorFloatFrame fake_audio(kStereo, kFrameLen10ms48kHz, 10000.0f);
AdaptiveDigitalGainApplier::FrameInfo info = kFrameInfo;
info.speech_level_dbfs = -5.0f;
helper.gain_applier->Process(kFrameInfo, fake_audio.float_frame_view());
}
// Checks that the maximum allowed gain is applied.
TEST(GainController2AdaptiveGainApplier, MaxGainApplied) {
constexpr int kNumFramesToAdapt =
static_cast<int>(kMaxGainDb / kMaxGainChangePerFrameDb) + 10;
GainApplierHelper helper;
helper.gain_applier->Initialize(/*sample_rate_hz=*/8000, kMono);
AdaptiveDigitalGainApplier::FrameInfo info = kFrameInfo;
info.speech_level_dbfs = -60.0f;
float applied_gain;
for (int i = 0; i < kNumFramesToAdapt; ++i) {
VectorFloatFrame fake_audio(kMono, kFrameLen10ms8kHz, 1.0f);
helper.gain_applier->Process(info, fake_audio.float_frame_view());
applied_gain = fake_audio.float_frame_view().channel(0)[0];
}
const float applied_gain_db = 20.0f * std::log10f(applied_gain);
EXPECT_NEAR(applied_gain_db, kMaxGainDb, 0.1f);
}
TEST(GainController2AdaptiveGainApplier, GainDoesNotChangeFast) {
GainApplierHelper helper;
helper.gain_applier->Initialize(/*sample_rate_hz=*/8000, kMono);
constexpr float initial_level_dbfs = -25.0f;
// A few extra frames for safety.
constexpr int kNumFramesToAdapt =
static_cast<int>(initial_level_dbfs / kMaxGainChangePerFrameDb) + 10;
const float kMaxChangePerFrameLinear = DbToRatio(kMaxGainChangePerFrameDb);
float last_gain_linear = 1.f;
for (int i = 0; i < kNumFramesToAdapt; ++i) {
SCOPED_TRACE(i);
VectorFloatFrame fake_audio(kMono, kFrameLen10ms8kHz, 1.0f);
AdaptiveDigitalGainApplier::FrameInfo info = kFrameInfo;
info.speech_level_dbfs = initial_level_dbfs;
helper.gain_applier->Process(info, fake_audio.float_frame_view());
float current_gain_linear = fake_audio.float_frame_view().channel(0)[0];
EXPECT_LE(std::abs(current_gain_linear - last_gain_linear),
kMaxChangePerFrameLinear);
last_gain_linear = current_gain_linear;
}
// Check that the same is true when gain decreases as well.
for (int i = 0; i < kNumFramesToAdapt; ++i) {
SCOPED_TRACE(i);
VectorFloatFrame fake_audio(kMono, kFrameLen10ms8kHz, 1.0f);
AdaptiveDigitalGainApplier::FrameInfo info = kFrameInfo;
info.speech_level_dbfs = 0.f;
helper.gain_applier->Process(info, fake_audio.float_frame_view());
float current_gain_linear = fake_audio.float_frame_view().channel(0)[0];
EXPECT_LE(std::abs(current_gain_linear - last_gain_linear),
kMaxChangePerFrameLinear);
last_gain_linear = current_gain_linear;
}
}
TEST(GainController2AdaptiveGainApplier, GainIsRampedInAFrame) {
GainApplierHelper helper;
helper.gain_applier->Initialize(/*sample_rate_hz=*/48000, kMono);
constexpr float initial_level_dbfs = -25.0f;
VectorFloatFrame fake_audio(kMono, kFrameLen10ms48kHz, 1.0f);
AdaptiveDigitalGainApplier::FrameInfo info = kFrameInfo;
info.speech_level_dbfs = initial_level_dbfs;
helper.gain_applier->Process(info, fake_audio.float_frame_view());
float maximal_difference = 0.0f;
float current_value = 1.0f * DbToRatio(kInitialAdaptiveDigitalGainDb);
for (const auto& x : fake_audio.float_frame_view().channel(0)) {
const float difference = std::abs(x - current_value);
maximal_difference = std::max(maximal_difference, difference);
current_value = x;
}
const float kMaxChangePerFrameLinear = DbToRatio(kMaxGainChangePerFrameDb);
const float kMaxChangePerSample =
kMaxChangePerFrameLinear / kFrameLen10ms48kHz;
EXPECT_LE(maximal_difference, kMaxChangePerSample);
}
TEST(GainController2AdaptiveGainApplier, NoiseLimitsGain) {
GainApplierHelper helper;
helper.gain_applier->Initialize(/*sample_rate_hz=*/48000, kMono);
constexpr float initial_level_dbfs = -25.0f;
constexpr int num_initial_frames =
kInitialAdaptiveDigitalGainDb / kMaxGainChangePerFrameDb;
constexpr int num_frames = 50;
ASSERT_GT(kWithNoiseDbfs, kMaxOutputNoiseLevelDbfs)
<< "kWithNoiseDbfs is too low";
for (int i = 0; i < num_initial_frames + num_frames; ++i) {
VectorFloatFrame fake_audio(kMono, kFrameLen10ms48kHz, 1.0f);
AdaptiveDigitalGainApplier::FrameInfo info = kFrameInfo;
info.speech_level_dbfs = initial_level_dbfs;
info.noise_rms_dbfs = kWithNoiseDbfs;
helper.gain_applier->Process(info, fake_audio.float_frame_view());
// Wait so that the adaptive gain applier has time to lower the gain.
if (i > num_initial_frames) {
const float maximal_ratio =
*std::max_element(fake_audio.float_frame_view().channel(0).begin(),
fake_audio.float_frame_view().channel(0).end());
EXPECT_NEAR(maximal_ratio, 1.0f, 0.001f);
}
}
}
TEST(GainController2GainApplier, CanHandlePositiveSpeechLevels) {
GainApplierHelper helper;
helper.gain_applier->Initialize(/*sample_rate_hz=*/48000, kStereo);
// Make one call with positive audio level values and settings.
VectorFloatFrame fake_audio(kStereo, kFrameLen10ms48kHz, 10000.0f);
AdaptiveDigitalGainApplier::FrameInfo info = kFrameInfo;
info.speech_level_dbfs = 5.0f;
helper.gain_applier->Process(info, fake_audio.float_frame_view());
}
TEST(GainController2GainApplier, AudioLevelLimitsGain) {
GainApplierHelper helper;
helper.gain_applier->Initialize(/*sample_rate_hz=*/48000, kMono);
constexpr float initial_level_dbfs = -25.0f;
constexpr int num_initial_frames =
kInitialAdaptiveDigitalGainDb / kMaxGainChangePerFrameDb;
constexpr int num_frames = 50;
ASSERT_GT(kWithNoiseDbfs, kMaxOutputNoiseLevelDbfs)
<< "kWithNoiseDbfs is too low";
for (int i = 0; i < num_initial_frames + num_frames; ++i) {
VectorFloatFrame fake_audio(kMono, kFrameLen10ms48kHz, 1.0f);
AdaptiveDigitalGainApplier::FrameInfo info = kFrameInfo;
info.speech_level_dbfs = initial_level_dbfs;
info.limiter_envelope_dbfs = 1.0f;
info.speech_level_reliable = false;
helper.gain_applier->Process(info, fake_audio.float_frame_view());
// Wait so that the adaptive gain applier has time to lower the gain.
if (i > num_initial_frames) {
const float maximal_ratio =
*std::max_element(fake_audio.float_frame_view().channel(0).begin(),
fake_audio.float_frame_view().channel(0).end());
EXPECT_NEAR(maximal_ratio, 1.0f, 0.001f);
}
}
}
class AdaptiveDigitalGainApplierTest : public ::testing::TestWithParam<int> {
protected:
int AdjacentSpeechFramesThreshold() const { return GetParam(); }
};
TEST_P(AdaptiveDigitalGainApplierTest,
DoNotIncreaseGainWithTooFewSpeechFrames) {
const int adjacent_speech_frames_threshold = AdjacentSpeechFramesThreshold();
GainApplierHelper helper(adjacent_speech_frames_threshold);
helper.gain_applier->Initialize(/*sample_rate_hz=*/48000, kMono);
float prev_gain = 0.0f;
for (int i = 0; i < adjacent_speech_frames_threshold; ++i) {
SCOPED_TRACE(i);
VectorFloatFrame audio(kMono, kFrameLen10ms48kHz, 1.0f);
helper.gain_applier->Process(kFrameInfo, audio.float_frame_view());
const float gain = audio.float_frame_view().channel(0)[0];
if (i > 0) {
EXPECT_EQ(prev_gain, gain); // No gain increase.
}
prev_gain = gain;
}
}
TEST_P(AdaptiveDigitalGainApplierTest, IncreaseGainWithEnoughSpeechFrames) {
const int adjacent_speech_frames_threshold = AdjacentSpeechFramesThreshold();
GainApplierHelper helper(adjacent_speech_frames_threshold);
helper.gain_applier->Initialize(/*sample_rate_hz=*/48000, kMono);
float prev_gain = 0.0f;
for (int i = 0; i < adjacent_speech_frames_threshold; ++i) {
SCOPED_TRACE(i);
VectorFloatFrame audio(kMono, kFrameLen10ms48kHz, 1.0f);
helper.gain_applier->Process(kFrameInfo, audio.float_frame_view());
prev_gain = audio.float_frame_view().channel(0)[0];
}
// Process one more speech frame.
VectorFloatFrame audio(kMono, kFrameLen10ms48kHz, 1.0f);
helper.gain_applier->Process(kFrameInfo, audio.float_frame_view());
// The gain has increased.
EXPECT_GT(audio.float_frame_view().channel(0)[0], prev_gain);
}
INSTANTIATE_TEST_SUITE_P(GainController2,
AdaptiveDigitalGainApplierTest,
::testing::Values(1, 7, 31));
// Checks that the input is never modified when running in dry run mode.
TEST(GainController2GainApplier, DryRunDoesNotChangeInput) {
ApmDataDumper apm_data_dumper(0);
AdaptiveDigitalGainApplier gain_applier(
&apm_data_dumper, /*adjacent_speech_frames_threshold=*/1,
kMaxGainChangePerSecondDb, kMaxOutputNoiseLevelDbfs, /*dry_run=*/true);
// Simulate an input signal with log speech level.
AdaptiveDigitalGainApplier::FrameInfo info = kFrameInfo;
info.speech_level_dbfs = -60.0f;
// Allow enough time to reach the maximum gain.
constexpr int kNumFramesToAdapt =
static_cast<int>(kMaxGainDb / kMaxGainChangePerFrameDb) + 10;
constexpr float kPcmSamples = 123.456f;
// Run the gain applier and check that the PCM samples are not modified.
gain_applier.Initialize(/*sample_rate_hz=*/8000, kMono);
for (int i = 0; i < kNumFramesToAdapt; ++i) {
SCOPED_TRACE(i);
VectorFloatFrame fake_audio(kMono, kFrameLen10ms8kHz, kPcmSamples);
gain_applier.Process(info, fake_audio.float_frame_view());
EXPECT_FLOAT_EQ(fake_audio.float_frame_view().channel(0)[0], kPcmSamples);
}
}
// Checks that no sample is modified before and after the sample rate changes.
TEST(GainController2GainApplier, DryRunHandlesSampleRateChange) {
ApmDataDumper apm_data_dumper(0);
AdaptiveDigitalGainApplier gain_applier(
&apm_data_dumper, /*adjacent_speech_frames_threshold=*/1,
kMaxGainChangePerSecondDb, kMaxOutputNoiseLevelDbfs, /*dry_run=*/true);
AdaptiveDigitalGainApplier::FrameInfo info = kFrameInfo;
info.speech_level_dbfs = -60.0f;
constexpr float kPcmSamples = 123.456f;
VectorFloatFrame fake_audio_8k(kMono, kFrameLen10ms8kHz, kPcmSamples);
gain_applier.Initialize(/*sample_rate_hz=*/8000, kMono);
gain_applier.Process(info, fake_audio_8k.float_frame_view());
EXPECT_FLOAT_EQ(fake_audio_8k.float_frame_view().channel(0)[0], kPcmSamples);
gain_applier.Initialize(/*sample_rate_hz=*/48000, kMono);
VectorFloatFrame fake_audio_48k(kMono, kFrameLen10ms48kHz, kPcmSamples);
gain_applier.Process(info, fake_audio_48k.float_frame_view());
EXPECT_FLOAT_EQ(fake_audio_48k.float_frame_view().channel(0)[0], kPcmSamples);
}
// Checks that no sample is modified before and after the number of channels
// changes.
TEST(GainController2GainApplier, DryRunHandlesNumChannelsChange) {
ApmDataDumper apm_data_dumper(0);
AdaptiveDigitalGainApplier gain_applier(
&apm_data_dumper, /*adjacent_speech_frames_threshold=*/1,
kMaxGainChangePerSecondDb, kMaxOutputNoiseLevelDbfs, /*dry_run=*/true);
AdaptiveDigitalGainApplier::FrameInfo info = kFrameInfo;
info.speech_level_dbfs = -60.0f;
constexpr float kPcmSamples = 123.456f;
VectorFloatFrame fake_audio_8k(kMono, kFrameLen10ms8kHz, kPcmSamples);
gain_applier.Initialize(/*sample_rate_hz=*/8000, kMono);
gain_applier.Process(info, fake_audio_8k.float_frame_view());
EXPECT_FLOAT_EQ(fake_audio_8k.float_frame_view().channel(0)[0], kPcmSamples);
VectorFloatFrame fake_audio_48k(kStereo, kFrameLen10ms8kHz, kPcmSamples);
gain_applier.Initialize(/*sample_rate_hz=*/8000, kStereo);
gain_applier.Process(info, fake_audio_48k.float_frame_view());
EXPECT_FLOAT_EQ(fake_audio_48k.float_frame_view().channel(0)[0], kPcmSamples);
EXPECT_FLOAT_EQ(fake_audio_48k.float_frame_view().channel(1)[0], kPcmSamples);
}
} // namespace
} // namespace webrtc