AGC2: max output noise level now part of config
Tested: bit-exactness verified with audioproc_f
Bug: webrtc:7494
Change-Id: Ic42f09dc13560494963cdcd338a0c52a729e108d
Reviewed-on: https://webrtc-review.googlesource.com/c/src/+/186266
Commit-Queue: Alessio Bazzica <alessiob@webrtc.org>
Reviewed-by: Jakob Ivarsson <jakobi@webrtc.org>
Cr-Commit-Position: refs/heads/master@{#32282}
diff --git a/modules/audio_processing/agc2/adaptive_agc.cc b/modules/audio_processing/agc2/adaptive_agc.cc
index 380c39c..0372ccf 100644
--- a/modules/audio_processing/agc2/adaptive_agc.cc
+++ b/modules/audio_processing/agc2/adaptive_agc.cc
@@ -30,6 +30,7 @@
constexpr int kGainApplierAdjacentSpeechFramesThreshold = 1;
constexpr float kMaxGainChangePerSecondDb = 3.f;
+constexpr float kMaxOutputNoiseLevelDbfs = -50.f;
} // namespace
@@ -37,7 +38,8 @@
: speech_level_estimator_(apm_data_dumper),
gain_applier_(apm_data_dumper,
kGainApplierAdjacentSpeechFramesThreshold,
- kMaxGainChangePerSecondDb),
+ kMaxGainChangePerSecondDb,
+ kMaxOutputNoiseLevelDbfs),
apm_data_dumper_(apm_data_dumper),
noise_level_estimator_(apm_data_dumper) {
RTC_DCHECK(apm_data_dumper);
@@ -56,7 +58,8 @@
gain_applier_(
apm_data_dumper,
config.adaptive_digital.gain_applier_adjacent_speech_frames_threshold,
- config.adaptive_digital.max_gain_change_db_per_second),
+ config.adaptive_digital.max_gain_change_db_per_second,
+ config.adaptive_digital.max_output_noise_level_dbfs),
apm_data_dumper_(apm_data_dumper),
noise_level_estimator_(apm_data_dumper) {
RTC_DCHECK(apm_data_dumper);
diff --git a/modules/audio_processing/agc2/adaptive_digital_gain_applier.cc b/modules/audio_processing/agc2/adaptive_digital_gain_applier.cc
index ef048e6..e7999c0 100644
--- a/modules/audio_processing/agc2/adaptive_digital_gain_applier.cc
+++ b/modules/audio_processing/agc2/adaptive_digital_gain_applier.cc
@@ -44,12 +44,16 @@
return 0.f;
}
-// We require 'gain + noise_level <= kMaxNoiseLevelDbfs'.
+// Returns `target_gain` if the output noise level is below
+// `max_output_noise_level_dbfs`; otherwise returns a capped gain so that the
+// output noise level equals `max_output_noise_level_dbfs`.
float LimitGainByNoise(float target_gain,
float input_noise_level_dbfs,
- ApmDataDumper* apm_data_dumper) {
- const float noise_headroom_db = kMaxNoiseLevelDbfs - input_noise_level_dbfs;
- apm_data_dumper->DumpRaw("agc2_noise_headroom_db", noise_headroom_db);
+ float max_output_noise_level_dbfs,
+ ApmDataDumper& apm_data_dumper) {
+ const float noise_headroom_db =
+ max_output_noise_level_dbfs - input_noise_level_dbfs;
+ apm_data_dumper.DumpRaw("agc2_noise_headroom_db", noise_headroom_db);
return std::min(target_gain, std::max(noise_headroom_db, 0.f));
}
@@ -89,7 +93,8 @@
AdaptiveDigitalGainApplier::AdaptiveDigitalGainApplier(
ApmDataDumper* apm_data_dumper,
int adjacent_speech_frames_threshold,
- float max_gain_change_db_per_second)
+ float max_gain_change_db_per_second,
+ float max_output_noise_level_dbfs)
: apm_data_dumper_(apm_data_dumper),
gain_applier_(
/*hard_clip_samples=*/false,
@@ -97,11 +102,14 @@
adjacent_speech_frames_threshold_(adjacent_speech_frames_threshold),
max_gain_change_db_per_10ms_(max_gain_change_db_per_second *
kFrameDurationMs / 1000.f),
+ max_output_noise_level_dbfs_(max_output_noise_level_dbfs),
calls_since_last_gain_log_(0),
frames_to_gain_increase_allowed_(adjacent_speech_frames_threshold_),
last_gain_db_(kInitialAdaptiveDigitalGainDb) {
RTC_DCHECK_GT(max_gain_change_db_per_second, 0.f);
RTC_DCHECK_GE(frames_to_gain_increase_allowed_, 1);
+ RTC_DCHECK_GE(max_output_noise_level_dbfs_, -90.f);
+ RTC_DCHECK_LE(max_output_noise_level_dbfs_, 0.f);
}
void AdaptiveDigitalGainApplier::Process(const FrameInfo& info,
@@ -126,7 +134,8 @@
const float target_gain_db = LimitGainByLowConfidence(
LimitGainByNoise(ComputeGainDb(std::min(info.input_level_dbfs, 0.f)),
- info.input_noise_level_dbfs, apm_data_dumper_),
+ info.input_noise_level_dbfs,
+ max_output_noise_level_dbfs_, *apm_data_dumper_),
last_gain_db_, info.limiter_envelope_dbfs, info.estimate_is_confident);
// Forbid increasing the gain until enough adjacent speech frames are
diff --git a/modules/audio_processing/agc2/adaptive_digital_gain_applier.h b/modules/audio_processing/agc2/adaptive_digital_gain_applier.h
index ca36abc..a65379f 100644
--- a/modules/audio_processing/agc2/adaptive_digital_gain_applier.h
+++ b/modules/audio_processing/agc2/adaptive_digital_gain_applier.h
@@ -34,12 +34,15 @@
bool estimate_is_confident;
};
+ // Ctor.
// `adjacent_speech_frames_threshold` indicates how many speech frames are
// required before a gain increase is allowed. `max_gain_change_db_per_second`
// limits the adaptation speed (uniformly operated across frames).
+ // `max_output_noise_level_dbfs` limits the output noise level.
AdaptiveDigitalGainApplier(ApmDataDumper* apm_data_dumper,
int adjacent_speech_frames_threshold,
- float max_gain_change_db_per_second);
+ float max_gain_change_db_per_second,
+ float max_output_noise_level_dbfs);
AdaptiveDigitalGainApplier(const AdaptiveDigitalGainApplier&) = delete;
AdaptiveDigitalGainApplier& operator=(const AdaptiveDigitalGainApplier&) =
delete;
@@ -54,6 +57,7 @@
const int adjacent_speech_frames_threshold_;
const float max_gain_change_db_per_10ms_;
+ const float max_output_noise_level_dbfs_;
int calls_since_last_gain_log_;
int frames_to_gain_increase_allowed_;
diff --git a/modules/audio_processing/agc2/adaptive_digital_gain_applier_unittest.cc b/modules/audio_processing/agc2/adaptive_digital_gain_applier_unittest.cc
index 63763c8..e2df700 100644
--- a/modules/audio_processing/agc2/adaptive_digital_gain_applier_unittest.cc
+++ b/modules/audio_processing/agc2/adaptive_digital_gain_applier_unittest.cc
@@ -36,14 +36,18 @@
constexpr float kMaxGainChangePerSecondDb = 3.f;
constexpr float kMaxGainChangePerFrameDb =
kMaxGainChangePerSecondDb * kFrameDurationMs / 1000.f;
+constexpr float kMaxOutputNoiseLevelDbfs = -50.f;
// Helper to instance `AdaptiveDigitalGainApplier`.
struct GainApplierHelper {
GainApplierHelper()
+ : GainApplierHelper(/*adjacent_speech_frames_threshold=*/1) {}
+ explicit GainApplierHelper(int adjacent_speech_frames_threshold)
: apm_data_dumper(0),
gain_applier(&apm_data_dumper,
- /*adjacent_speech_frames_threshold=*/1,
- kMaxGainChangePerSecondDb) {}
+ adjacent_speech_frames_threshold,
+ kMaxGainChangePerSecondDb,
+ kMaxOutputNoiseLevelDbfs) {}
ApmDataDumper apm_data_dumper;
AdaptiveDigitalGainApplier gain_applier;
};
@@ -185,7 +189,8 @@
kInitialAdaptiveDigitalGainDb / kMaxGainChangePerFrameDb;
constexpr int num_frames = 50;
- ASSERT_GT(kWithNoiseDbfs, kMaxNoiseLevelDbfs) << "kWithNoiseDbfs is too low";
+ ASSERT_GT(kWithNoiseDbfs, kMaxOutputNoiseLevelDbfs)
+ << "kWithNoiseDbfs is too low";
for (int i = 0; i < num_initial_frames + num_frames; ++i) {
VectorFloatFrame fake_audio(kMono, kFrameLen10ms48kHz, 1.f);
@@ -223,7 +228,8 @@
kInitialAdaptiveDigitalGainDb / kMaxGainChangePerFrameDb;
constexpr int num_frames = 50;
- ASSERT_GT(kWithNoiseDbfs, kMaxNoiseLevelDbfs) << "kWithNoiseDbfs is too low";
+ ASSERT_GT(kWithNoiseDbfs, kMaxOutputNoiseLevelDbfs)
+ << "kWithNoiseDbfs is too low";
for (int i = 0; i < num_initial_frames + num_frames; ++i) {
VectorFloatFrame fake_audio(kMono, kFrameLen10ms48kHz, 1.f);
@@ -252,10 +258,8 @@
TEST_P(AdaptiveDigitalGainApplierTest,
DoNotIncreaseGainWithTooFewSpeechFrames) {
const int adjacent_speech_frames_threshold = AdjacentSpeechFramesThreshold();
- ApmDataDumper apm_data_dumper(0);
- AdaptiveDigitalGainApplier gain_applier(&apm_data_dumper,
- adjacent_speech_frames_threshold,
- kMaxGainChangePerFrameDb);
+ GainApplierHelper helper(adjacent_speech_frames_threshold);
+
AdaptiveDigitalGainApplier::FrameInfo info = kFrameInfo;
info.input_level_dbfs = -25.0;
@@ -263,7 +267,7 @@
for (int i = 0; i < adjacent_speech_frames_threshold; ++i) {
SCOPED_TRACE(i);
VectorFloatFrame audio(kMono, kFrameLen10ms48kHz, 1.f);
- gain_applier.Process(info, audio.float_frame_view());
+ helper.gain_applier.Process(info, audio.float_frame_view());
const float gain = audio.float_frame_view().channel(0)[0];
if (i > 0) {
EXPECT_EQ(prev_gain, gain); // No gain increase.
@@ -274,23 +278,21 @@
TEST_P(AdaptiveDigitalGainApplierTest, IncreaseGainWithEnoughSpeechFrames) {
const int adjacent_speech_frames_threshold = AdjacentSpeechFramesThreshold();
- ApmDataDumper apm_data_dumper(0);
- AdaptiveDigitalGainApplier gain_applier(&apm_data_dumper,
- adjacent_speech_frames_threshold,
- kMaxGainChangePerFrameDb);
+ GainApplierHelper helper(adjacent_speech_frames_threshold);
+
AdaptiveDigitalGainApplier::FrameInfo info = kFrameInfo;
info.input_level_dbfs = -25.0;
float prev_gain = 0.f;
for (int i = 0; i < adjacent_speech_frames_threshold; ++i) {
VectorFloatFrame audio(kMono, kFrameLen10ms48kHz, 1.f);
- gain_applier.Process(info, audio.float_frame_view());
+ helper.gain_applier.Process(info, audio.float_frame_view());
prev_gain = audio.float_frame_view().channel(0)[0];
}
// Process one more speech frame.
VectorFloatFrame audio(kMono, kFrameLen10ms48kHz, 1.f);
- gain_applier.Process(info, audio.float_frame_view());
+ helper.gain_applier.Process(info, audio.float_frame_view());
// The gain has increased.
EXPECT_GT(audio.float_frame_view().channel(0)[0], prev_gain);
diff --git a/modules/audio_processing/agc2/agc2_common.h b/modules/audio_processing/agc2/agc2_common.h
index 5149ed6..5d01100 100644
--- a/modules/audio_processing/agc2/agc2_common.h
+++ b/modules/audio_processing/agc2/agc2_common.h
@@ -32,9 +32,6 @@
// At what limiter levels should we start decreasing the adaptive digital gain.
constexpr float kLimiterThresholdForAgcGainDbfs = -kHeadroomDbfs;
-// This parameter must be tuned together with the noise estimator.
-constexpr float kMaxNoiseLevelDbfs = -50.f;
-
// This is the threshold for speech. Speech frames are used for updating the
// speech level, measuring the amount of speech, and decide when to allow target
// gain reduction.
diff --git a/modules/audio_processing/include/audio_processing.h b/modules/audio_processing/include/audio_processing.h
index a56b315..d09e2ba 100644
--- a/modules/audio_processing/include/audio_processing.h
+++ b/modules/audio_processing/include/audio_processing.h
@@ -354,6 +354,7 @@
float extra_saturation_margin_db = 2.f;
int gain_applier_adjacent_speech_frames_threshold = 1;
float max_gain_change_db_per_second = 3.f;
+ float max_output_noise_level_dbfs = -50.f;
} adaptive_digital;
} gain_controller2;