AGC2: use only one headroom parameter
Instead of using two different headroom parameters, namely
`kHeadroomDbfs` and `kSaturationProtectorExtraHeadroomDb`, only use
the former that now also accounts for the deleted one - i.e., it equals
the sum of the two headrooms. In this way, tuning AGC2 will be easier.
This CL does *not* change the behavior of the AGC2 adaptive digital
controller - bitexactness verified with audioproc_f on a collection of
AEC dumps and Wav files (42 recordings in total).
The unit tests changes in agc2/saturation_protector_unittest.cc are
required since `extra_headroom_db` is removed and the changes in
agc2/adaptive_digital_gain_applier_unittest.cc are required because
`AdaptiveDigitalGainApplier` depends on `kHeadroomDbfs` which has been
updated as stated above.
Bug: webrtc:7494
Change-Id: I0a2a710bbede0caa53938090a004d185fdefaeb9
Reviewed-on: https://webrtc-review.googlesource.com/c/src/+/232905
Reviewed-by: Per Ã…hgren <peah@webrtc.org>
Commit-Queue: Alessio Bazzica <alessiob@webrtc.org>
Cr-Commit-Position: refs/heads/main@{#35109}
diff --git a/modules/audio_processing/agc2/adaptive_agc.cc b/modules/audio_processing/agc2/adaptive_agc.cc
index 0df4a26..0e2535a 100644
--- a/modules/audio_processing/agc2/adaptive_agc.cc
+++ b/modules/audio_processing/agc2/adaptive_agc.cc
@@ -55,7 +55,6 @@
noise_level_estimator_(CreateNoiseFloorEstimator(apm_data_dumper)),
saturation_protector_(
CreateSaturationProtector(kSaturationProtectorInitialHeadroomDb,
- kSaturationProtectorExtraHeadroomDb,
config.adjacent_speech_frames_threshold,
apm_data_dumper)) {
RTC_DCHECK(apm_data_dumper);
diff --git a/modules/audio_processing/agc2/adaptive_digital_gain_applier.h b/modules/audio_processing/agc2/adaptive_digital_gain_applier.h
index 8b58ea0..6fc8ac1 100644
--- a/modules/audio_processing/agc2/adaptive_digital_gain_applier.h
+++ b/modules/audio_processing/agc2/adaptive_digital_gain_applier.h
@@ -20,7 +20,7 @@
class ApmDataDumper;
-// TODO(bugs.webrtc.org): Split into `GainAdaptor` and `GainApplier`.
+// TODO(bugs.webrtc.org/7494): Split into `GainAdaptor` and `GainApplier`.
// Selects the target digital gain, decides when and how quickly to adapt to the
// target and applies the current gain to 10 ms frames.
class AdaptiveDigitalGainApplier {
diff --git a/modules/audio_processing/agc2/adaptive_digital_gain_applier_unittest.cc b/modules/audio_processing/agc2/adaptive_digital_gain_applier_unittest.cc
index f4a23a9..11df18e 100644
--- a/modules/audio_processing/agc2/adaptive_digital_gain_applier_unittest.cc
+++ b/modules/audio_processing/agc2/adaptive_digital_gain_applier_unittest.cc
@@ -31,7 +31,7 @@
// Constants used in place of estimated noise levels.
constexpr float kNoNoiseDbfs = kMinLevelDbfs;
-constexpr float kWithNoiseDbfs = -20.f;
+constexpr float kWithNoiseDbfs = -20.0f;
constexpr float kMaxGainChangePerSecondDb = 3.0f;
constexpr float kMaxGainChangePerFrameDb =
@@ -54,10 +54,10 @@
std::unique_ptr<AdaptiveDigitalGainApplier> gain_applier;
};
-// Voice on, no noise, low limiter, confident level.
-static_assert(std::is_trivially_destructible<
- AdaptiveDigitalGainApplier::FrameInfo>::value,
- "");
+// Sample frame information for the tests mocking noiseless speech detected
+// with maximum probability and with level, headroom and limiter envelope chosen
+// so that the resulting gain equals `kInitialAdaptiveDigitalGainDb` - i.e., no
+// gain adaptation is expected.
constexpr AdaptiveDigitalGainApplier::FrameInfo kFrameInfo{
/*speech_probability=*/kMaxSpeechProbability,
/*speech_level_dbfs=*/kInitialSpeechLevelEstimateDbfs,
@@ -241,14 +241,18 @@
GainApplierHelper helper(adjacent_speech_frames_threshold);
helper.gain_applier->Initialize(/*sample_rate_hz=*/48000, kMono);
+ // Lower the speech level so that the target gain will be increased.
+ AdaptiveDigitalGainApplier::FrameInfo info = kFrameInfo;
+ info.speech_level_dbfs -= 12.0f;
+
float prev_gain = 0.0f;
for (int i = 0; i < adjacent_speech_frames_threshold; ++i) {
SCOPED_TRACE(i);
VectorFloatFrame audio(kMono, kFrameLen10ms48kHz, 1.0f);
- helper.gain_applier->Process(kFrameInfo, audio.float_frame_view());
+ helper.gain_applier->Process(info, audio.float_frame_view());
const float gain = audio.float_frame_view().channel(0)[0];
if (i > 0) {
- EXPECT_EQ(prev_gain, gain); // No gain increase.
+ EXPECT_EQ(prev_gain, gain); // No gain increase applied.
}
prev_gain = gain;
}
@@ -259,25 +263,30 @@
GainApplierHelper helper(adjacent_speech_frames_threshold);
helper.gain_applier->Initialize(/*sample_rate_hz=*/48000, kMono);
+ // Lower the speech level so that the target gain will be increased.
+ AdaptiveDigitalGainApplier::FrameInfo info = kFrameInfo;
+ info.speech_level_dbfs -= 12.0f;
+
float prev_gain = 0.0f;
for (int i = 0; i < adjacent_speech_frames_threshold; ++i) {
SCOPED_TRACE(i);
VectorFloatFrame audio(kMono, kFrameLen10ms48kHz, 1.0f);
- helper.gain_applier->Process(kFrameInfo, audio.float_frame_view());
+ helper.gain_applier->Process(info, audio.float_frame_view());
prev_gain = audio.float_frame_view().channel(0)[0];
}
// Process one more speech frame.
VectorFloatFrame audio(kMono, kFrameLen10ms48kHz, 1.0f);
- helper.gain_applier->Process(kFrameInfo, audio.float_frame_view());
+ helper.gain_applier->Process(info, audio.float_frame_view());
- // The gain has increased.
+ // An increased gain has been applied.
EXPECT_GT(audio.float_frame_view().channel(0)[0], prev_gain);
}
INSTANTIATE_TEST_SUITE_P(GainController2,
AdaptiveDigitalGainApplierTest,
- ::testing::Values(1, 7, 31));
+ ::testing::Values(1000));
+// ::testing::Values(1, 7, 31));
// Checks that the input is never modified when running in dry run mode.
TEST(GainController2GainApplier, DryRunDoesNotChangeInput) {
diff --git a/modules/audio_processing/agc2/agc2_common.h b/modules/audio_processing/agc2/agc2_common.h
index adb1614..da28d8d 100644
--- a/modules/audio_processing/agc2/agc2_common.h
+++ b/modules/audio_processing/agc2/agc2_common.h
@@ -25,11 +25,11 @@
constexpr int kMaximalNumberOfSamplesPerChannel = 480;
// Adaptive digital gain applier settings below.
-constexpr float kHeadroomDbfs = 1.0f;
+constexpr float kHeadroomDbfs = 6.0f;
constexpr float kMaxGainDb = 30.0f;
constexpr float kInitialAdaptiveDigitalGainDb = 8.0f;
// At what limiter levels should we start decreasing the adaptive digital gain.
-constexpr float kLimiterThresholdForAgcGainDbfs = -kHeadroomDbfs;
+constexpr float kLimiterThresholdForAgcGainDbfs = -1.0f;
// This is the threshold for speech. Speech frames are used for updating the
// speech level, measuring the amount of speech, and decide when to allow target
@@ -48,14 +48,12 @@
// Saturation Protector settings.
constexpr float kSaturationProtectorInitialHeadroomDb = 20.0f;
-constexpr float kSaturationProtectorExtraHeadroomDb = 5.0f;
constexpr int kSaturationProtectorBufferSize = 4;
// Set the initial speech level estimate so that `kInitialAdaptiveDigitalGainDb`
// is applied at the beginning of the call.
constexpr float kInitialSpeechLevelEstimateDbfs =
- -kSaturationProtectorExtraHeadroomDb -
- kSaturationProtectorInitialHeadroomDb - kInitialAdaptiveDigitalGainDb -
+ -kSaturationProtectorInitialHeadroomDb - kInitialAdaptiveDigitalGainDb -
kHeadroomDbfs;
// Number of interpolation points for each region of the limiter.
diff --git a/modules/audio_processing/agc2/saturation_protector.cc b/modules/audio_processing/agc2/saturation_protector.cc
index d6f21ef..961baf4 100644
--- a/modules/audio_processing/agc2/saturation_protector.cc
+++ b/modules/audio_processing/agc2/saturation_protector.cc
@@ -95,12 +95,10 @@
class SaturationProtectorImpl : public SaturationProtector {
public:
explicit SaturationProtectorImpl(float initial_headroom_db,
- float extra_headroom_db,
int adjacent_speech_frames_threshold,
ApmDataDumper* apm_data_dumper)
: apm_data_dumper_(apm_data_dumper),
initial_headroom_db_(initial_headroom_db),
- extra_headroom_db_(extra_headroom_db),
adjacent_speech_frames_threshold_(adjacent_speech_frames_threshold) {
Reset();
}
@@ -140,7 +138,7 @@
if (num_adjacent_speech_frames_ >= adjacent_speech_frames_threshold_) {
// `preliminary_state_` is now reliable. Update the headroom.
- headroom_db_ = preliminary_state_.headroom_db + extra_headroom_db_;
+ headroom_db_ = preliminary_state_.headroom_db;
}
}
DumpDebugData();
@@ -148,7 +146,7 @@
void Reset() override {
num_adjacent_speech_frames_ = 0;
- headroom_db_ = initial_headroom_db_ + extra_headroom_db_;
+ headroom_db_ = initial_headroom_db_;
ResetSaturationProtectorState(initial_headroom_db_, preliminary_state_);
ResetSaturationProtectorState(initial_headroom_db_, reliable_state_);
}
@@ -165,7 +163,6 @@
ApmDataDumper* const apm_data_dumper_;
const float initial_headroom_db_;
- const float extra_headroom_db_;
const int adjacent_speech_frames_threshold_;
int num_adjacent_speech_frames_;
float headroom_db_;
@@ -177,12 +174,10 @@
std::unique_ptr<SaturationProtector> CreateSaturationProtector(
float initial_headroom_db,
- float extra_headroom_db,
int adjacent_speech_frames_threshold,
ApmDataDumper* apm_data_dumper) {
return std::make_unique<SaturationProtectorImpl>(
- initial_headroom_db, extra_headroom_db, adjacent_speech_frames_threshold,
- apm_data_dumper);
+ initial_headroom_db, adjacent_speech_frames_threshold, apm_data_dumper);
}
} // namespace webrtc
diff --git a/modules/audio_processing/agc2/saturation_protector.h b/modules/audio_processing/agc2/saturation_protector.h
index 0c384f1..ef22145 100644
--- a/modules/audio_processing/agc2/saturation_protector.h
+++ b/modules/audio_processing/agc2/saturation_protector.h
@@ -38,7 +38,6 @@
// Creates a saturation protector that starts at `initial_headroom_db`.
std::unique_ptr<SaturationProtector> CreateSaturationProtector(
float initial_headroom_db,
- float extra_headroom_db,
int adjacent_speech_frames_threshold,
ApmDataDumper* apm_data_dumper);
diff --git a/modules/audio_processing/agc2/saturation_protector_unittest.cc b/modules/audio_processing/agc2/saturation_protector_unittest.cc
index dc16dc2..3b104be 100644
--- a/modules/audio_processing/agc2/saturation_protector_unittest.cc
+++ b/modules/audio_processing/agc2/saturation_protector_unittest.cc
@@ -18,7 +18,6 @@
namespace {
constexpr float kInitialHeadroomDb = 20.0f;
-constexpr float kNoExtraHeadroomDb = 0.0f;
constexpr int kNoAdjacentSpeechFramesRequired = 1;
constexpr float kMaxSpeechProbability = 1.0f;
@@ -47,8 +46,7 @@
TEST(GainController2SaturationProtector, Reset) {
ApmDataDumper apm_data_dumper(0);
auto saturation_protector = CreateSaturationProtector(
- kInitialHeadroomDb, kNoExtraHeadroomDb, kNoAdjacentSpeechFramesRequired,
- &apm_data_dumper);
+ kInitialHeadroomDb, kNoAdjacentSpeechFramesRequired, &apm_data_dumper);
const float initial_headroom_db = saturation_protector->HeadroomDb();
RunOnConstantLevel(/*num_iterations=*/10, kMaxSpeechProbability,
/*peak_dbfs=*/0.0f,
@@ -71,43 +69,13 @@
ApmDataDumper apm_data_dumper(0);
auto saturation_protector = CreateSaturationProtector(
- kInitialHeadroomDb, kNoExtraHeadroomDb, kNoAdjacentSpeechFramesRequired,
- &apm_data_dumper);
+ kInitialHeadroomDb, kNoAdjacentSpeechFramesRequired, &apm_data_dumper);
RunOnConstantLevel(kNumIterations, kMaxSpeechProbability, kPeakLevelDbfs,
kSpeechLevelDbfs, *saturation_protector);
EXPECT_NEAR(saturation_protector->HeadroomDb(), kCrestFactorDb,
kMaxDifferenceDb);
}
-// Checks that the extra headroom is applied.
-TEST(GainController2SaturationProtector, ExtraHeadroomApplied) {
- constexpr float kExtraHeadroomDb = 5.1234f;
- constexpr int kNumIterations = 10;
- constexpr float kPeakLevelDbfs = -20.0f;
- constexpr float kSpeechLevelDbfs = kPeakLevelDbfs - 15.0f;
-
- ApmDataDumper apm_data_dumper(0);
-
- auto saturation_protector_no_extra = CreateSaturationProtector(
- kInitialHeadroomDb, kNoExtraHeadroomDb, kNoAdjacentSpeechFramesRequired,
- &apm_data_dumper);
- for (int i = 0; i < kNumIterations; ++i) {
- saturation_protector_no_extra->Analyze(kMaxSpeechProbability,
- kPeakLevelDbfs, kSpeechLevelDbfs);
- }
-
- auto saturation_protector_extra = CreateSaturationProtector(
- kInitialHeadroomDb, kExtraHeadroomDb, kNoAdjacentSpeechFramesRequired,
- &apm_data_dumper);
- for (int i = 0; i < kNumIterations; ++i) {
- saturation_protector_extra->Analyze(kMaxSpeechProbability, kPeakLevelDbfs,
- kSpeechLevelDbfs);
- }
-
- EXPECT_EQ(saturation_protector_no_extra->HeadroomDb() + kExtraHeadroomDb,
- saturation_protector_extra->HeadroomDb());
-}
-
// Checks that the headroom does not change too quickly.
TEST(GainController2SaturationProtector, ChangeSlowly) {
constexpr int kNumIterations = 1000;
@@ -119,8 +87,7 @@
ApmDataDumper apm_data_dumper(0);
auto saturation_protector = CreateSaturationProtector(
- kInitialHeadroomDb, kNoExtraHeadroomDb, kNoAdjacentSpeechFramesRequired,
- &apm_data_dumper);
+ kInitialHeadroomDb, kNoAdjacentSpeechFramesRequired, &apm_data_dumper);
float max_difference_db =
RunOnConstantLevel(kNumIterations, kMaxSpeechProbability, kPeakLevelDbfs,
kSpeechLevelDbfs, *saturation_protector);
@@ -142,8 +109,7 @@
TEST_P(SaturationProtectorParametrization, DoNotAdaptToShortSpeechSegments) {
ApmDataDumper apm_data_dumper(0);
auto saturation_protector = CreateSaturationProtector(
- kInitialHeadroomDb, kNoExtraHeadroomDb,
- adjacent_speech_frames_threshold(), &apm_data_dumper);
+ kInitialHeadroomDb, adjacent_speech_frames_threshold(), &apm_data_dumper);
const float initial_headroom_db = saturation_protector->HeadroomDb();
RunOnConstantLevel(/*num_iterations=*/adjacent_speech_frames_threshold() - 1,
kMaxSpeechProbability,
@@ -156,8 +122,7 @@
TEST_P(SaturationProtectorParametrization, AdaptToEnoughSpeechSegments) {
ApmDataDumper apm_data_dumper(0);
auto saturation_protector = CreateSaturationProtector(
- kInitialHeadroomDb, kNoExtraHeadroomDb,
- adjacent_speech_frames_threshold(), &apm_data_dumper);
+ kInitialHeadroomDb, adjacent_speech_frames_threshold(), &apm_data_dumper);
const float initial_headroom_db = saturation_protector->HeadroomDb();
RunOnConstantLevel(/*num_iterations=*/adjacent_speech_frames_threshold() + 1,
kMaxSpeechProbability,