AGC2 config: allow tuning of headroom, max gain and initial gain This CL does *not* change the behavior of the AGC2 adaptive digital controller - bitexactness verified with audioproc_f on a collection of AEC dumps and Wav files (42 recordings in total). Tested: compiled Chrome with this patch and made an appr.tc test call Bug: webrtc:7494 Change-Id: Ia8a9f6fbc3a3459b888a2eed87e108f0d39cfe99 Reviewed-on: https://webrtc-review.googlesource.com/c/src/+/233520 Commit-Queue: Alessio Bazzica <alessiob@webrtc.org> Reviewed-by: Sam Zackrisson <saza@webrtc.org> Cr-Commit-Position: refs/heads/main@{#35140}

commit: a850e6c8b6776f17e2c85124206a7310060ceb2e [log] [tgz]
author: Alessio Bazzica <alessiob@webrtc.org> Mon Oct 04 11:35:55 2021
committer: WebRTC LUCI CQ <webrtc-scoped@luci-project-accounts.iam.gserviceaccount.com> Mon Oct 04 16:11:00 2021
tree: 4ec47829c93139bc151d894c1f4a1a0b30be423f
parent: 41b4397e1a2c770b261126bf8664c542bf3b3f07 [diff]
diff --git a/modules/audio_processing/agc2/BUILD.gn b/modules/audio_processing/agc2/BUILD.gn
index 6dd8bab..ce70c5d 100644
--- a/modules/audio_processing/agc2/BUILD.gn
+++ b/modules/audio_processing/agc2/BUILD.gn

@@ -178,6 +178,7 @@
     ":common",
     ":gain_applier",
     ":test_utils",
+    "..:api",
     "..:apm_logging",
     "..:audio_frame_view",
     "../../../api:array_view",

diff --git a/modules/audio_processing/agc2/adaptive_agc.cc b/modules/audio_processing/agc2/adaptive_agc.cc
index 0e2535a..eafbcc2 100644
--- a/modules/audio_processing/agc2/adaptive_agc.cc
+++ b/modules/audio_processing/agc2/adaptive_agc.cc

@@ -43,14 +43,9 @@
 
 AdaptiveAgc::AdaptiveAgc(ApmDataDumper* apm_data_dumper,
                          const AdaptiveDigitalConfig& config)
-    : speech_level_estimator_(apm_data_dumper,
-                              config.adjacent_speech_frames_threshold),
+    : speech_level_estimator_(apm_data_dumper, config),
       vad_(config.vad_reset_period_ms, GetAllowedCpuFeatures(config)),
-      gain_controller_(apm_data_dumper,
-                       config.adjacent_speech_frames_threshold,
-                       config.max_gain_change_db_per_second,
-                       config.max_output_noise_level_dbfs,
-                       config.dry_run),
+      gain_controller_(apm_data_dumper, config),
       apm_data_dumper_(apm_data_dumper),
       noise_level_estimator_(CreateNoiseFloorEstimator(apm_data_dumper)),
       saturation_protector_(

diff --git a/modules/audio_processing/agc2/adaptive_digital_gain_applier.cc b/modules/audio_processing/agc2/adaptive_digital_gain_applier.cc
index e59b110..526ef06e 100644
--- a/modules/audio_processing/agc2/adaptive_digital_gain_applier.cc
+++ b/modules/audio_processing/agc2/adaptive_digital_gain_applier.cc

@@ -23,31 +23,38 @@
 namespace webrtc {
 namespace {
 
+using AdaptiveDigitalConfig =
+    AudioProcessing::Config::GainController2::AdaptiveDigital;
+
 constexpr int kHeadroomHistogramMin = 0;
 constexpr int kHeadroomHistogramMax = 50;
+constexpr int kGainDbHistogramMax = 30;
 
-// This function maps input level to desired applied gain. We want to
-// boost the signal so that peaks are at -kHeadroomDbfs. We can't
-// apply more than kMaxGainDb gain.
-float ComputeGainDb(float input_level_dbfs) {
-  // If the level is very low, boost it as much as we can.
-  if (input_level_dbfs < -(kHeadroomDbfs + kMaxGainDb)) {
-    return kMaxGainDb;
+// Computes the gain for `input_level_dbfs` to reach `-config.headroom_db`.
+// Clamps the gain in [0, `config.max_gain_db`]. `config.headroom_db` is a
+// safety margin to allow transient peaks to exceed the target peak level
+// without clipping.
+float ComputeGainDb(float input_level_dbfs,
+                    const AdaptiveDigitalConfig& config) {
+  // If the level is very low, apply the maximum gain.
+  if (input_level_dbfs < -(config.headroom_db + config.max_gain_db)) {
+    return config.max_gain_db;
   }
   // We expect to end up here most of the time: the level is below
   // -headroom, but we can boost it to -headroom.
-  if (input_level_dbfs < -kHeadroomDbfs) {
-    return -kHeadroomDbfs - input_level_dbfs;
+  if (input_level_dbfs < -config.headroom_db) {
+    return -config.headroom_db - input_level_dbfs;
   }
-  // Otherwise, the level is too high and we can't boost.
-  RTC_DCHECK_GE(input_level_dbfs, -kHeadroomDbfs);
-  return 0.f;
+  // The level is too high and we can't boost.
+  RTC_DCHECK_GE(input_level_dbfs, -config.headroom_db);
+  return 0.0f;
 }
 
-// Returns `target_gain` if the output noise level is below
-// `max_output_noise_level_dbfs`; otherwise returns a capped gain so that the
-// output noise level equals `max_output_noise_level_dbfs`.
-float LimitGainByNoise(float target_gain,
+// Returns `target_gain_db` if applying such a gain to `input_noise_level_dbfs`
+// does not exceed `max_output_noise_level_dbfs`. Otherwise lowers and returns
+// `target_gain_db` so that the output noise level equals
+// `max_output_noise_level_dbfs`.
+float LimitGainByNoise(float target_gain_db,
                        float input_noise_level_dbfs,
                        float max_output_noise_level_dbfs,
                        ApmDataDumper& apm_data_dumper) {
@@ -55,24 +62,25 @@
       max_output_noise_level_dbfs - input_noise_level_dbfs;
   apm_data_dumper.DumpRaw("agc2_adaptive_gain_applier_max_allowed_gain_db",
                           max_allowed_gain_db);
-  return std::min(target_gain, std::max(max_allowed_gain_db, 0.f));
+  return std::min(target_gain_db, std::max(max_allowed_gain_db, 0.0f));
 }
 
-float LimitGainByLowConfidence(float target_gain,
-                               float last_gain,
+float LimitGainByLowConfidence(float target_gain_db,
+                               float last_gain_db,
                                float limiter_audio_level_dbfs,
                                bool estimate_is_confident) {
   if (estimate_is_confident ||
       limiter_audio_level_dbfs <= kLimiterThresholdForAgcGainDbfs) {
-    return target_gain;
+    return target_gain_db;
   }
-  const float limiter_level_before_gain = limiter_audio_level_dbfs - last_gain;
+  const float limiter_level_dbfs_before_gain =
+      limiter_audio_level_dbfs - last_gain_db;
 
-  // Compute a new gain so that `limiter_level_before_gain` + `new_target_gain`
-  // is not great than `kLimiterThresholdForAgcGainDbfs`.
-  const float new_target_gain = std::max(
-      kLimiterThresholdForAgcGainDbfs - limiter_level_before_gain, 0.f);
-  return std::min(new_target_gain, target_gain);
+  // Compute a new gain so that `limiter_level_dbfs_before_gain` +
+  // `new_target_gain_db` is not great than `kLimiterThresholdForAgcGainDbfs`.
+  const float new_target_gain_db = std::max(
+      kLimiterThresholdForAgcGainDbfs - limiter_level_dbfs_before_gain, 0.0f);
+  return std::min(new_target_gain_db, target_gain_db);
 }
 
 // Computes how the gain should change during this frame.
@@ -86,7 +94,7 @@
   RTC_DCHECK_GT(max_gain_increase_db, 0);
   float target_gain_difference_db = target_gain_db - last_gain_db;
   if (!gain_increase_allowed) {
-    target_gain_difference_db = std::min(target_gain_difference_db, 0.f);
+    target_gain_difference_db = std::min(target_gain_difference_db, 0.0f);
   }
   return rtc::SafeClamp(target_gain_difference_db, -max_gain_decrease_db,
                         max_gain_increase_db);
@@ -110,32 +118,28 @@
 
 AdaptiveDigitalGainApplier::AdaptiveDigitalGainApplier(
     ApmDataDumper* apm_data_dumper,
-    int adjacent_speech_frames_threshold,
-    float max_gain_change_db_per_second,
-    float max_output_noise_level_dbfs,
-    bool dry_run)
+    const AudioProcessing::Config::GainController2::AdaptiveDigital& config)
     : apm_data_dumper_(apm_data_dumper),
       gain_applier_(
           /*hard_clip_samples=*/false,
-          /*initial_gain_factor=*/DbToRatio(kInitialAdaptiveDigitalGainDb)),
-      adjacent_speech_frames_threshold_(adjacent_speech_frames_threshold),
-      max_gain_change_db_per_10ms_(max_gain_change_db_per_second *
-                                   kFrameDurationMs / 1000.f),
-      max_output_noise_level_dbfs_(max_output_noise_level_dbfs),
-      dry_run_(dry_run),
+          /*initial_gain_factor=*/DbToRatio(config.initial_gain_db)),
+      config_(config),
+      max_gain_change_db_per_10ms_(config_.max_gain_change_db_per_second *
+                                   kFrameDurationMs / 1000.0f),
       calls_since_last_gain_log_(0),
-      frames_to_gain_increase_allowed_(adjacent_speech_frames_threshold_),
-      last_gain_db_(kInitialAdaptiveDigitalGainDb) {
-  RTC_DCHECK_GT(max_gain_change_db_per_second, 0.0f);
+      frames_to_gain_increase_allowed_(
+          config_.adjacent_speech_frames_threshold),
+      last_gain_db_(config_.initial_gain_db) {
+  RTC_DCHECK_GT(max_gain_change_db_per_10ms_, 0.0f);
   RTC_DCHECK_GE(frames_to_gain_increase_allowed_, 1);
-  RTC_DCHECK_GE(max_output_noise_level_dbfs_, -90.0f);
-  RTC_DCHECK_LE(max_output_noise_level_dbfs_, 0.0f);
+  RTC_DCHECK_GE(config_.max_output_noise_level_dbfs, -90.0f);
+  RTC_DCHECK_LE(config_.max_output_noise_level_dbfs, 0.0f);
   Initialize(/*sample_rate_hz=*/48000, /*num_channels=*/1);
 }
 
 void AdaptiveDigitalGainApplier::Initialize(int sample_rate_hz,
                                             int num_channels) {
-  if (!dry_run_) {
+  if (!config_.dry_run) {
     return;
   }
   RTC_DCHECK_GT(sample_rate_hz, 0);
@@ -159,7 +163,7 @@
 
 void AdaptiveDigitalGainApplier::Process(const FrameInfo& info,
                                          AudioFrameView<float> frame) {
-  RTC_DCHECK_GE(info.speech_level_dbfs, -150.f);
+  RTC_DCHECK_GE(info.speech_level_dbfs, -150.0f);
   RTC_DCHECK_GE(frame.num_channels(), 1);
   RTC_DCHECK(
       frame.samples_per_channel() == 80 || frame.samples_per_channel() == 160 ||
@@ -172,15 +176,16 @@
   const float input_level_dbfs = info.speech_level_dbfs + info.headroom_db;
 
   const float target_gain_db = LimitGainByLowConfidence(
-      LimitGainByNoise(ComputeGainDb(input_level_dbfs), info.noise_rms_dbfs,
-                       max_output_noise_level_dbfs_, *apm_data_dumper_),
+      LimitGainByNoise(ComputeGainDb(input_level_dbfs, config_),
+                       info.noise_rms_dbfs, config_.max_output_noise_level_dbfs,
+                       *apm_data_dumper_),
       last_gain_db_, info.limiter_envelope_dbfs, info.speech_level_reliable);
 
   // Forbid increasing the gain until enough adjacent speech frames are
   // observed.
   bool first_confident_speech_frame = false;
   if (info.speech_probability < kVadConfidenceThreshold) {
-    frames_to_gain_increase_allowed_ = adjacent_speech_frames_threshold_;
+    frames_to_gain_increase_allowed_ = config_.adjacent_speech_frames_threshold;
   } else if (frames_to_gain_increase_allowed_ > 0) {
     frames_to_gain_increase_allowed_--;
     first_confident_speech_frame = frames_to_gain_increase_allowed_ == 0;
@@ -196,7 +201,7 @@
     // No gain increase happened while waiting for a long enough speech
     // sequence. Therefore, temporarily allow a faster gain increase.
     RTC_DCHECK(gain_increase_allowed);
-    max_gain_increase_db *= adjacent_speech_frames_threshold_;
+    max_gain_increase_db *= config_.adjacent_speech_frames_threshold;
   }
 
   const float gain_change_this_frame_db = ComputeGainChangeThisFrameDb(
@@ -217,7 +222,7 @@
   }
 
   // Modify `frame` only if not running in "dry run" mode.
-  if (!dry_run_) {
+  if (!config_.dry_run) {
     gain_applier_.ApplyGain(frame);
   } else {
     // Copy `frame` so that `ApplyGain()` is called (on a copy).
@@ -247,7 +252,8 @@
         kHeadroomHistogramMax,
         kHeadroomHistogramMax - kHeadroomHistogramMin + 1);
     RTC_HISTOGRAM_COUNTS_LINEAR("WebRTC.Audio.Agc2.DigitalGainApplied",
-                                last_gain_db_, 0, kMaxGainDb, kMaxGainDb + 1);
+                                last_gain_db_, 0, kGainDbHistogramMax,
+                                kGainDbHistogramMax + 1);
     RTC_LOG(LS_INFO) << "AGC2 adaptive digital"
                      << " | speech_dbfs: " << info.speech_level_dbfs
                      << " | noise_dbfs: " << info.noise_rms_dbfs

diff --git a/modules/audio_processing/agc2/adaptive_digital_gain_applier.h b/modules/audio_processing/agc2/adaptive_digital_gain_applier.h
index 6fc8ac1..e254b51 100644
--- a/modules/audio_processing/agc2/adaptive_digital_gain_applier.h
+++ b/modules/audio_processing/agc2/adaptive_digital_gain_applier.h

@@ -15,6 +15,7 @@
 
 #include "modules/audio_processing/agc2/gain_applier.h"
 #include "modules/audio_processing/include/audio_frame_view.h"
+#include "modules/audio_processing/include/audio_processing.h"
 
 namespace webrtc {
 
@@ -35,16 +36,9 @@
     float limiter_envelope_dbfs;  // Envelope level from the limiter (dBFS).
   };
 
-  // Ctor. `adjacent_speech_frames_threshold` indicates how many adjacent speech
-  // frames must be observed in order to consider the sequence as speech.
-  // `max_gain_change_db_per_second` limits the adaptation speed (uniformly
-  // operated across frames). `max_output_noise_level_dbfs` limits the output
-  // noise level. If `dry_run` is true, `Process()` will not modify the audio.
-  AdaptiveDigitalGainApplier(ApmDataDumper* apm_data_dumper,
-                             int adjacent_speech_frames_threshold,
-                             float max_gain_change_db_per_second,
-                             float max_output_noise_level_dbfs,
-                             bool dry_run);
+  AdaptiveDigitalGainApplier(
+      ApmDataDumper* apm_data_dumper,
+      const AudioProcessing::Config::GainController2::AdaptiveDigital& config);
   AdaptiveDigitalGainApplier(const AdaptiveDigitalGainApplier&) = delete;
   AdaptiveDigitalGainApplier& operator=(const AdaptiveDigitalGainApplier&) =
       delete;
@@ -59,10 +53,8 @@
   ApmDataDumper* const apm_data_dumper_;
   GainApplier gain_applier_;
 
-  const int adjacent_speech_frames_threshold_;
+  const AudioProcessing::Config::GainController2::AdaptiveDigital config_;
   const float max_gain_change_db_per_10ms_;
-  const float max_output_noise_level_dbfs_;
-  const bool dry_run_;
 
   int calls_since_last_gain_log_;
   int frames_to_gain_increase_allowed_;

diff --git a/modules/audio_processing/agc2/adaptive_digital_gain_applier_unittest.cc b/modules/audio_processing/agc2/adaptive_digital_gain_applier_unittest.cc
index 3c5642b..efbc1e1 100644
--- a/modules/audio_processing/agc2/adaptive_digital_gain_applier_unittest.cc
+++ b/modules/audio_processing/agc2/adaptive_digital_gain_applier_unittest.cc

@@ -16,6 +16,7 @@
 #include "common_audio/include/audio_util.h"
 #include "modules/audio_processing/agc2/agc2_common.h"
 #include "modules/audio_processing/agc2/vector_float_frame.h"
+#include "modules/audio_processing/include/audio_processing.h"
 #include "modules/audio_processing/logging/apm_data_dumper.h"
 #include "rtc_base/gunit.h"
 
@@ -33,57 +34,68 @@
 constexpr float kNoNoiseDbfs = kMinLevelDbfs;
 constexpr float kWithNoiseDbfs = -20.0f;
 
-constexpr float kMaxGainChangePerSecondDb = 3.0f;
-constexpr float kMaxGainChangePerFrameDb =
-    kMaxGainChangePerSecondDb * kFrameDurationMs / 1000.0f;
-constexpr float kMaxOutputNoiseLevelDbfs = -50.0f;
+// Number of additional frames to process in the tests to ensure that the tested
+// adaptation processes have converged.
+constexpr int kNumExtraFrames = 10;
+
+constexpr float GetMaxGainChangePerFrameDb(
+    float max_gain_change_db_per_second) {
+  return max_gain_change_db_per_second * kFrameDurationMs / 1000.0f;
+}
+
+using AdaptiveDigitalConfig =
+    AudioProcessing::Config::GainController2::AdaptiveDigital;
+
+constexpr AdaptiveDigitalConfig kDefaultConfig{};
 
 // Helper to create initialized `AdaptiveDigitalGainApplier` objects.
 struct GainApplierHelper {
-  GainApplierHelper()
-      : GainApplierHelper(/*adjacent_speech_frames_threshold=*/1) {}
-  explicit GainApplierHelper(int adjacent_speech_frames_threshold)
+  explicit GainApplierHelper(const AdaptiveDigitalConfig& config)
       : apm_data_dumper(0),
-        gain_applier(std::make_unique<AdaptiveDigitalGainApplier>(
-            &apm_data_dumper,
-            adjacent_speech_frames_threshold,
-            kMaxGainChangePerSecondDb,
-            kMaxOutputNoiseLevelDbfs,
-            /*dry_run=*/false)) {}
+        gain_applier(
+            std::make_unique<AdaptiveDigitalGainApplier>(&apm_data_dumper,
+                                                         config)) {}
   ApmDataDumper apm_data_dumper;
   std::unique_ptr<AdaptiveDigitalGainApplier> gain_applier;
 };
 
-// Sample frame information for the tests mocking noiseless speech detected
-// with maximum probability and with level, headroom and limiter envelope chosen
-// so that the resulting gain equals `kInitialAdaptiveDigitalGainDb` - i.e., no
-// gain adaptation is expected.
-constexpr AdaptiveDigitalGainApplier::FrameInfo kFrameInfo{
-    /*speech_probability=*/kMaxSpeechProbability,
-    /*speech_level_dbfs=*/kInitialSpeechLevelEstimateDbfs,
-    /*speech_level_reliable=*/true,
-    /*noise_rms_dbfs=*/kNoNoiseDbfs,
-    /*headroom_db=*/kSaturationProtectorInitialHeadroomDb,
-    /*limiter_envelope_dbfs=*/-2.0f};
+// Returns a `FrameInfo` sample to simulate noiseless speech detected with
+// maximum probability and with level, headroom and limiter envelope chosen
+// so that the resulting gain equals the default initial adaptive digital gain
+// i.e., no gain adaptation is expected.
+AdaptiveDigitalGainApplier::FrameInfo GetFrameInfoToNotAdapt(
+    const AdaptiveDigitalConfig& config) {
+  AdaptiveDigitalGainApplier::FrameInfo info;
+  info.speech_probability = kMaxSpeechProbability;
+  info.speech_level_dbfs = -config.initial_gain_db - config.headroom_db;
+  info.speech_level_reliable = true;
+  info.noise_rms_dbfs = kNoNoiseDbfs;
+  info.headroom_db = config.headroom_db;
+  info.limiter_envelope_dbfs = -2.0f;
+  return info;
+}
 
 TEST(GainController2AdaptiveGainApplier, GainApplierShouldNotCrash) {
-  GainApplierHelper helper;
+  GainApplierHelper helper(kDefaultConfig);
   helper.gain_applier->Initialize(/*sample_rate_hz=*/48000, kStereo);
   // Make one call with reasonable audio level values and settings.
   VectorFloatFrame fake_audio(kStereo, kFrameLen10ms48kHz, 10000.0f);
-  AdaptiveDigitalGainApplier::FrameInfo info = kFrameInfo;
-  info.speech_level_dbfs = -5.0f;
-  helper.gain_applier->Process(kFrameInfo, fake_audio.float_frame_view());
+  helper.gain_applier->Process(GetFrameInfoToNotAdapt(kDefaultConfig),
+                               fake_audio.float_frame_view());
 }
 
 // Checks that the maximum allowed gain is applied.
 TEST(GainController2AdaptiveGainApplier, MaxGainApplied) {
   constexpr int kNumFramesToAdapt =
-      static_cast<int>(kMaxGainDb / kMaxGainChangePerFrameDb) + 10;
+      static_cast<int>(kDefaultConfig.max_gain_db /
+                       GetMaxGainChangePerFrameDb(
+                           kDefaultConfig.max_gain_change_db_per_second)) +
+      kNumExtraFrames;
 
-  GainApplierHelper helper;
+  GainApplierHelper helper(kDefaultConfig);
   helper.gain_applier->Initialize(/*sample_rate_hz=*/8000, kMono);
-  AdaptiveDigitalGainApplier::FrameInfo info = kFrameInfo;
+  AdaptiveDigitalGainApplier::FrameInfo info =
+      GetFrameInfoToNotAdapt(kDefaultConfig);
   info.speech_level_dbfs = -60.0f;
   float applied_gain;
   for (int i = 0; i < kNumFramesToAdapt; ++i) {
@@ -92,30 +104,33 @@
     applied_gain = fake_audio.float_frame_view().channel(0)[0];
   }
   const float applied_gain_db = 20.0f * std::log10f(applied_gain);
-  EXPECT_NEAR(applied_gain_db, kMaxGainDb, 0.1f);
+  EXPECT_NEAR(applied_gain_db, kDefaultConfig.max_gain_db, 0.1f);
 }
 
 TEST(GainController2AdaptiveGainApplier, GainDoesNotChangeFast) {
-  GainApplierHelper helper;
+  GainApplierHelper helper(kDefaultConfig);
   helper.gain_applier->Initialize(/*sample_rate_hz=*/8000, kMono);
 
   constexpr float initial_level_dbfs = -25.0f;
-  // A few extra frames for safety.
+  constexpr float kMaxGainChangeDbPerFrame =
+      GetMaxGainChangePerFrameDb(kDefaultConfig.max_gain_change_db_per_second);
   constexpr int kNumFramesToAdapt =
-      static_cast<int>(initial_level_dbfs / kMaxGainChangePerFrameDb) + 10;
+      static_cast<int>(initial_level_dbfs / kMaxGainChangeDbPerFrame) +
+      kNumExtraFrames;
 
-  const float kMaxChangePerFrameLinear = DbToRatio(kMaxGainChangePerFrameDb);
+  const float max_change_per_frame_linear = DbToRatio(kMaxGainChangeDbPerFrame);
 
   float last_gain_linear = 1.f;
   for (int i = 0; i < kNumFramesToAdapt; ++i) {
     SCOPED_TRACE(i);
     VectorFloatFrame fake_audio(kMono, kFrameLen10ms8kHz, 1.0f);
-    AdaptiveDigitalGainApplier::FrameInfo info = kFrameInfo;
+    AdaptiveDigitalGainApplier::FrameInfo info =
+        GetFrameInfoToNotAdapt(kDefaultConfig);
     info.speech_level_dbfs = initial_level_dbfs;
     helper.gain_applier->Process(info, fake_audio.float_frame_view());
     float current_gain_linear = fake_audio.float_frame_view().channel(0)[0];
     EXPECT_LE(std::abs(current_gain_linear - last_gain_linear),
-              kMaxChangePerFrameLinear);
+              max_change_per_frame_linear);
     last_gain_linear = current_gain_linear;
   }
 
@@ -123,56 +138,61 @@
   for (int i = 0; i < kNumFramesToAdapt; ++i) {
     SCOPED_TRACE(i);
     VectorFloatFrame fake_audio(kMono, kFrameLen10ms8kHz, 1.0f);
-    AdaptiveDigitalGainApplier::FrameInfo info = kFrameInfo;
+    AdaptiveDigitalGainApplier::FrameInfo info =
+        GetFrameInfoToNotAdapt(kDefaultConfig);
     info.speech_level_dbfs = 0.f;
     helper.gain_applier->Process(info, fake_audio.float_frame_view());
     float current_gain_linear = fake_audio.float_frame_view().channel(0)[0];
     EXPECT_LE(std::abs(current_gain_linear - last_gain_linear),
-              kMaxChangePerFrameLinear);
+              max_change_per_frame_linear);
     last_gain_linear = current_gain_linear;
   }
 }
 
 TEST(GainController2AdaptiveGainApplier, GainIsRampedInAFrame) {
-  GainApplierHelper helper;
+  GainApplierHelper helper(kDefaultConfig);
   helper.gain_applier->Initialize(/*sample_rate_hz=*/48000, kMono);
 
   constexpr float initial_level_dbfs = -25.0f;
 
   VectorFloatFrame fake_audio(kMono, kFrameLen10ms48kHz, 1.0f);
-  AdaptiveDigitalGainApplier::FrameInfo info = kFrameInfo;
+  AdaptiveDigitalGainApplier::FrameInfo info =
+      GetFrameInfoToNotAdapt(kDefaultConfig);
   info.speech_level_dbfs = initial_level_dbfs;
   helper.gain_applier->Process(info, fake_audio.float_frame_view());
   float maximal_difference = 0.0f;
-  float current_value = 1.0f * DbToRatio(kInitialAdaptiveDigitalGainDb);
+  float current_value = 1.0f * DbToRatio(kDefaultConfig.initial_gain_db);
   for (const auto& x : fake_audio.float_frame_view().channel(0)) {
     const float difference = std::abs(x - current_value);
     maximal_difference = std::max(maximal_difference, difference);
     current_value = x;
   }
 
-  const float kMaxChangePerFrameLinear = DbToRatio(kMaxGainChangePerFrameDb);
-  const float kMaxChangePerSample =
-      kMaxChangePerFrameLinear / kFrameLen10ms48kHz;
+  const float max_change_per_frame_linear = DbToRatio(
+      GetMaxGainChangePerFrameDb(kDefaultConfig.max_gain_change_db_per_second));
+  const float max_change_per_sample =
+      max_change_per_frame_linear / kFrameLen10ms48kHz;
 
-  EXPECT_LE(maximal_difference, kMaxChangePerSample);
+  EXPECT_LE(maximal_difference, max_change_per_sample);
 }
 
 TEST(GainController2AdaptiveGainApplier, NoiseLimitsGain) {
-  GainApplierHelper helper;
+  GainApplierHelper helper(kDefaultConfig);
   helper.gain_applier->Initialize(/*sample_rate_hz=*/48000, kMono);
 
   constexpr float initial_level_dbfs = -25.0f;
   constexpr int num_initial_frames =
-      kInitialAdaptiveDigitalGainDb / kMaxGainChangePerFrameDb;
+      kDefaultConfig.initial_gain_db /
+      GetMaxGainChangePerFrameDb(kDefaultConfig.max_gain_change_db_per_second);
   constexpr int num_frames = 50;
 
-  ASSERT_GT(kWithNoiseDbfs, kMaxOutputNoiseLevelDbfs)
+  ASSERT_GT(kWithNoiseDbfs, kDefaultConfig.max_output_noise_level_dbfs)
       << "kWithNoiseDbfs is too low";
 
   for (int i = 0; i < num_initial_frames + num_frames; ++i) {
     VectorFloatFrame fake_audio(kMono, kFrameLen10ms48kHz, 1.0f);
-    AdaptiveDigitalGainApplier::FrameInfo info = kFrameInfo;
+    AdaptiveDigitalGainApplier::FrameInfo info =
+        GetFrameInfoToNotAdapt(kDefaultConfig);
     info.speech_level_dbfs = initial_level_dbfs;
     info.noise_rms_dbfs = kWithNoiseDbfs;
     helper.gain_applier->Process(info, fake_audio.float_frame_view());
@@ -189,31 +209,34 @@
 }
 
 TEST(GainController2GainApplier, CanHandlePositiveSpeechLevels) {
-  GainApplierHelper helper;
+  GainApplierHelper helper(kDefaultConfig);
   helper.gain_applier->Initialize(/*sample_rate_hz=*/48000, kStereo);
 
   // Make one call with positive audio level values and settings.
   VectorFloatFrame fake_audio(kStereo, kFrameLen10ms48kHz, 10000.0f);
-  AdaptiveDigitalGainApplier::FrameInfo info = kFrameInfo;
+  AdaptiveDigitalGainApplier::FrameInfo info =
+      GetFrameInfoToNotAdapt(kDefaultConfig);
   info.speech_level_dbfs = 5.0f;
   helper.gain_applier->Process(info, fake_audio.float_frame_view());
 }
 
 TEST(GainController2GainApplier, AudioLevelLimitsGain) {
-  GainApplierHelper helper;
+  GainApplierHelper helper(kDefaultConfig);
   helper.gain_applier->Initialize(/*sample_rate_hz=*/48000, kMono);
 
   constexpr float initial_level_dbfs = -25.0f;
   constexpr int num_initial_frames =
-      kInitialAdaptiveDigitalGainDb / kMaxGainChangePerFrameDb;
+      kDefaultConfig.initial_gain_db /
+      GetMaxGainChangePerFrameDb(kDefaultConfig.max_gain_change_db_per_second);
   constexpr int num_frames = 50;
 
-  ASSERT_GT(kWithNoiseDbfs, kMaxOutputNoiseLevelDbfs)
+  ASSERT_GT(kWithNoiseDbfs, kDefaultConfig.max_output_noise_level_dbfs)
       << "kWithNoiseDbfs is too low";
 
   for (int i = 0; i < num_initial_frames + num_frames; ++i) {
     VectorFloatFrame fake_audio(kMono, kFrameLen10ms48kHz, 1.0f);
-    AdaptiveDigitalGainApplier::FrameInfo info = kFrameInfo;
+    AdaptiveDigitalGainApplier::FrameInfo info =
+        GetFrameInfoToNotAdapt(kDefaultConfig);
     info.speech_level_dbfs = initial_level_dbfs;
     info.limiter_envelope_dbfs = 1.0f;
     info.speech_level_reliable = false;
@@ -232,21 +255,22 @@
 
 class AdaptiveDigitalGainApplierTest : public ::testing::TestWithParam<int> {
  protected:
-  int AdjacentSpeechFramesThreshold() const { return GetParam(); }
+  int adjacent_speech_frames_threshold() const { return GetParam(); }
 };
 
 TEST_P(AdaptiveDigitalGainApplierTest,
        DoNotIncreaseGainWithTooFewSpeechFrames) {
-  const int adjacent_speech_frames_threshold = AdjacentSpeechFramesThreshold();
-  GainApplierHelper helper(adjacent_speech_frames_threshold);
+  AdaptiveDigitalConfig config;
+  config.adjacent_speech_frames_threshold = adjacent_speech_frames_threshold();
+  GainApplierHelper helper(config);
   helper.gain_applier->Initialize(/*sample_rate_hz=*/48000, kMono);
 
   // Lower the speech level so that the target gain will be increased.
-  AdaptiveDigitalGainApplier::FrameInfo info = kFrameInfo;
+  AdaptiveDigitalGainApplier::FrameInfo info = GetFrameInfoToNotAdapt(config);
   info.speech_level_dbfs -= 12.0f;
 
   float prev_gain = 0.0f;
-  for (int i = 0; i < adjacent_speech_frames_threshold; ++i) {
+  for (int i = 0; i < config.adjacent_speech_frames_threshold; ++i) {
     SCOPED_TRACE(i);
     VectorFloatFrame audio(kMono, kFrameLen10ms48kHz, 1.0f);
     helper.gain_applier->Process(info, audio.float_frame_view());
@@ -259,16 +283,17 @@
 }
 
 TEST_P(AdaptiveDigitalGainApplierTest, IncreaseGainWithEnoughSpeechFrames) {
-  const int adjacent_speech_frames_threshold = AdjacentSpeechFramesThreshold();
-  GainApplierHelper helper(adjacent_speech_frames_threshold);
+  AdaptiveDigitalConfig config;
+  config.adjacent_speech_frames_threshold = adjacent_speech_frames_threshold();
+  GainApplierHelper helper(config);
   helper.gain_applier->Initialize(/*sample_rate_hz=*/48000, kMono);
 
   // Lower the speech level so that the target gain will be increased.
-  AdaptiveDigitalGainApplier::FrameInfo info = kFrameInfo;
+  AdaptiveDigitalGainApplier::FrameInfo info = GetFrameInfoToNotAdapt(config);
   info.speech_level_dbfs -= 12.0f;
 
   float prev_gain = 0.0f;
-  for (int i = 0; i < adjacent_speech_frames_threshold; ++i) {
+  for (int i = 0; i < config.adjacent_speech_frames_threshold; ++i) {
     SCOPED_TRACE(i);
     VectorFloatFrame audio(kMono, kFrameLen10ms48kHz, 1.0f);
     helper.gain_applier->Process(info, audio.float_frame_view());
@@ -289,63 +314,65 @@
 
 // Checks that the input is never modified when running in dry run mode.
 TEST(GainController2GainApplier, DryRunDoesNotChangeInput) {
-  ApmDataDumper apm_data_dumper(0);
-  AdaptiveDigitalGainApplier gain_applier(
-      &apm_data_dumper, /*adjacent_speech_frames_threshold=*/1,
-      kMaxGainChangePerSecondDb, kMaxOutputNoiseLevelDbfs, /*dry_run=*/true);
+  AdaptiveDigitalConfig config;
+  config.dry_run = true;
+  GainApplierHelper helper(config);
+
   // Simulate an input signal with log speech level.
-  AdaptiveDigitalGainApplier::FrameInfo info = kFrameInfo;
+  AdaptiveDigitalGainApplier::FrameInfo info = GetFrameInfoToNotAdapt(config);
   info.speech_level_dbfs = -60.0f;
-  // Allow enough time to reach the maximum gain.
-  constexpr int kNumFramesToAdapt =
-      static_cast<int>(kMaxGainDb / kMaxGainChangePerFrameDb) + 10;
+  const int num_frames_to_adapt =
+      static_cast<int>(
+          config.max_gain_db /
+          GetMaxGainChangePerFrameDb(config.max_gain_change_db_per_second)) +
+      kNumExtraFrames;
   constexpr float kPcmSamples = 123.456f;
   // Run the gain applier and check that the PCM samples are not modified.
-  gain_applier.Initialize(/*sample_rate_hz=*/8000, kMono);
-  for (int i = 0; i < kNumFramesToAdapt; ++i) {
+  helper.gain_applier->Initialize(/*sample_rate_hz=*/8000, kMono);
+  for (int i = 0; i < num_frames_to_adapt; ++i) {
     SCOPED_TRACE(i);
     VectorFloatFrame fake_audio(kMono, kFrameLen10ms8kHz, kPcmSamples);
-    gain_applier.Process(info, fake_audio.float_frame_view());
+    helper.gain_applier->Process(info, fake_audio.float_frame_view());
     EXPECT_FLOAT_EQ(fake_audio.float_frame_view().channel(0)[0], kPcmSamples);
   }
 }
 
 // Checks that no sample is modified before and after the sample rate changes.
 TEST(GainController2GainApplier, DryRunHandlesSampleRateChange) {
-  ApmDataDumper apm_data_dumper(0);
-  AdaptiveDigitalGainApplier gain_applier(
-      &apm_data_dumper, /*adjacent_speech_frames_threshold=*/1,
-      kMaxGainChangePerSecondDb, kMaxOutputNoiseLevelDbfs, /*dry_run=*/true);
-  AdaptiveDigitalGainApplier::FrameInfo info = kFrameInfo;
+  AdaptiveDigitalConfig config;
+  config.dry_run = true;
+  GainApplierHelper helper(config);
+
+  AdaptiveDigitalGainApplier::FrameInfo info = GetFrameInfoToNotAdapt(config);
   info.speech_level_dbfs = -60.0f;
   constexpr float kPcmSamples = 123.456f;
   VectorFloatFrame fake_audio_8k(kMono, kFrameLen10ms8kHz, kPcmSamples);
-  gain_applier.Initialize(/*sample_rate_hz=*/8000, kMono);
-  gain_applier.Process(info, fake_audio_8k.float_frame_view());
+  helper.gain_applier->Initialize(/*sample_rate_hz=*/8000, kMono);
+  helper.gain_applier->Process(info, fake_audio_8k.float_frame_view());
   EXPECT_FLOAT_EQ(fake_audio_8k.float_frame_view().channel(0)[0], kPcmSamples);
-  gain_applier.Initialize(/*sample_rate_hz=*/48000, kMono);
+  helper.gain_applier->Initialize(/*sample_rate_hz=*/48000, kMono);
   VectorFloatFrame fake_audio_48k(kMono, kFrameLen10ms48kHz, kPcmSamples);
-  gain_applier.Process(info, fake_audio_48k.float_frame_view());
+  helper.gain_applier->Process(info, fake_audio_48k.float_frame_view());
   EXPECT_FLOAT_EQ(fake_audio_48k.float_frame_view().channel(0)[0], kPcmSamples);
 }
 
 // Checks that no sample is modified before and after the number of channels
 // changes.
 TEST(GainController2GainApplier, DryRunHandlesNumChannelsChange) {
-  ApmDataDumper apm_data_dumper(0);
-  AdaptiveDigitalGainApplier gain_applier(
-      &apm_data_dumper, /*adjacent_speech_frames_threshold=*/1,
-      kMaxGainChangePerSecondDb, kMaxOutputNoiseLevelDbfs, /*dry_run=*/true);
-  AdaptiveDigitalGainApplier::FrameInfo info = kFrameInfo;
+  AdaptiveDigitalConfig config;
+  config.dry_run = true;
+  GainApplierHelper helper(config);
+
+  AdaptiveDigitalGainApplier::FrameInfo info = GetFrameInfoToNotAdapt(config);
   info.speech_level_dbfs = -60.0f;
   constexpr float kPcmSamples = 123.456f;
   VectorFloatFrame fake_audio_8k(kMono, kFrameLen10ms8kHz, kPcmSamples);
-  gain_applier.Initialize(/*sample_rate_hz=*/8000, kMono);
-  gain_applier.Process(info, fake_audio_8k.float_frame_view());
+  helper.gain_applier->Initialize(/*sample_rate_hz=*/8000, kMono);
+  helper.gain_applier->Process(info, fake_audio_8k.float_frame_view());
   EXPECT_FLOAT_EQ(fake_audio_8k.float_frame_view().channel(0)[0], kPcmSamples);
   VectorFloatFrame fake_audio_48k(kStereo, kFrameLen10ms8kHz, kPcmSamples);
-  gain_applier.Initialize(/*sample_rate_hz=*/8000, kStereo);
-  gain_applier.Process(info, fake_audio_48k.float_frame_view());
+  helper.gain_applier->Initialize(/*sample_rate_hz=*/8000, kStereo);
+  helper.gain_applier->Process(info, fake_audio_48k.float_frame_view());
   EXPECT_FLOAT_EQ(fake_audio_48k.float_frame_view().channel(0)[0], kPcmSamples);
   EXPECT_FLOAT_EQ(fake_audio_48k.float_frame_view().channel(1)[0], kPcmSamples);
 }

diff --git a/modules/audio_processing/agc2/adaptive_mode_level_estimator.cc b/modules/audio_processing/agc2/adaptive_mode_level_estimator.cc
index ca3279e..81e7d29 100644
--- a/modules/audio_processing/agc2/adaptive_mode_level_estimator.cc
+++ b/modules/audio_processing/agc2/adaptive_mode_level_estimator.cc

@@ -20,7 +20,14 @@
 namespace {
 
 float ClampLevelEstimateDbfs(float level_estimate_dbfs) {
-  return rtc::SafeClamp<float>(level_estimate_dbfs, -90.f, 30.f);
+  return rtc::SafeClamp<float>(level_estimate_dbfs, -90.0f, 30.0f);
+}
+
+// Returns the initial speech level estimate needed to apply the initial gain.
+float GetInitialSpeechLevelEstimateDbfs(
+    const AudioProcessing::Config::GainController2::AdaptiveDigital& config) {
+  return ClampLevelEstimateDbfs(-kSaturationProtectorInitialHeadroomDb -
+                                config.initial_gain_db - config.headroom_db);
 }
 
 }  // namespace
@@ -38,17 +45,13 @@
 }
 
 AdaptiveModeLevelEstimator::AdaptiveModeLevelEstimator(
-    ApmDataDumper* apm_data_dumper)
-    : AdaptiveModeLevelEstimator(
-          apm_data_dumper,
-          kDefaultLevelEstimatorAdjacentSpeechFramesThreshold) {}
-
-AdaptiveModeLevelEstimator::AdaptiveModeLevelEstimator(
     ApmDataDumper* apm_data_dumper,
-    int adjacent_speech_frames_threshold)
+    const AudioProcessing::Config::GainController2::AdaptiveDigital& config)
     : apm_data_dumper_(apm_data_dumper),
-      adjacent_speech_frames_threshold_(adjacent_speech_frames_threshold),
-      level_dbfs_(ClampLevelEstimateDbfs(kInitialSpeechLevelEstimateDbfs)) {
+      initial_speech_level_dbfs_(GetInitialSpeechLevelEstimateDbfs(config)),
+      adjacent_speech_frames_threshold_(
+          config.adjacent_speech_frames_threshold),
+      level_dbfs_(initial_speech_level_dbfs_) {
   RTC_DCHECK(apm_data_dumper_);
   RTC_DCHECK_GE(adjacent_speech_frames_threshold_, 1);
   Reset();
@@ -128,14 +131,14 @@
 void AdaptiveModeLevelEstimator::Reset() {
   ResetLevelEstimatorState(preliminary_state_);
   ResetLevelEstimatorState(reliable_state_);
-  level_dbfs_ = ClampLevelEstimateDbfs(kInitialSpeechLevelEstimateDbfs);
+  level_dbfs_ = initial_speech_level_dbfs_;
   num_adjacent_speech_frames_ = 0;
 }
 
 void AdaptiveModeLevelEstimator::ResetLevelEstimatorState(
     LevelEstimatorState& state) const {
   state.time_to_confidence_ms = kLevelEstimatorTimeToConfidenceMs;
-  state.level_dbfs.numerator = kInitialSpeechLevelEstimateDbfs;
+  state.level_dbfs.numerator = initial_speech_level_dbfs_;
   state.level_dbfs.denominator = 1.0f;
 }
 

diff --git a/modules/audio_processing/agc2/adaptive_mode_level_estimator.h b/modules/audio_processing/agc2/adaptive_mode_level_estimator.h
index e39b6ce..e15c6af 100644
--- a/modules/audio_processing/agc2/adaptive_mode_level_estimator.h
+++ b/modules/audio_processing/agc2/adaptive_mode_level_estimator.h

@@ -24,12 +24,12 @@
 // Level estimator for the digital adaptive gain controller.
 class AdaptiveModeLevelEstimator {
  public:
-  explicit AdaptiveModeLevelEstimator(ApmDataDumper* apm_data_dumper);
+  AdaptiveModeLevelEstimator(
+      ApmDataDumper* apm_data_dumper,
+      const AudioProcessing::Config::GainController2::AdaptiveDigital& config);
   AdaptiveModeLevelEstimator(const AdaptiveModeLevelEstimator&) = delete;
   AdaptiveModeLevelEstimator& operator=(const AdaptiveModeLevelEstimator&) =
       delete;
-  AdaptiveModeLevelEstimator(ApmDataDumper* apm_data_dumper,
-                             int adjacent_speech_frames_threshold);
 
   // Updates the level estimation.
   void Update(const VadLevelAnalyzer::Result& vad_data);
@@ -63,6 +63,7 @@
 
   ApmDataDumper* const apm_data_dumper_;
 
+  const float initial_speech_level_dbfs_;
   const int adjacent_speech_frames_threshold_;
   LevelEstimatorState preliminary_state_;
   LevelEstimatorState reliable_state_;

diff --git a/modules/audio_processing/agc2/adaptive_mode_level_estimator_unittest.cc b/modules/audio_processing/agc2/adaptive_mode_level_estimator_unittest.cc
index c55950a..1cdd91d 100644
--- a/modules/audio_processing/agc2/adaptive_mode_level_estimator_unittest.cc
+++ b/modules/audio_processing/agc2/adaptive_mode_level_estimator_unittest.cc

@@ -13,37 +13,22 @@
 #include <memory>
 
 #include "modules/audio_processing/agc2/agc2_common.h"
+#include "modules/audio_processing/include/audio_processing.h"
 #include "modules/audio_processing/logging/apm_data_dumper.h"
 #include "rtc_base/gunit.h"
 
 namespace webrtc {
 namespace {
 
+using AdaptiveDigitalConfig =
+    AudioProcessing::Config::GainController2::AdaptiveDigital;
+
 // Number of speech frames that the level estimator must observe in order to
 // become confident about the estimated level.
 constexpr int kNumFramesToConfidence =
     kLevelEstimatorTimeToConfidenceMs / kFrameDurationMs;
 static_assert(kNumFramesToConfidence > 0, "");
 
-// Fake levels and speech probabilities used in the tests.
-static_assert(kInitialSpeechLevelEstimateDbfs < 0.0f, "");
-constexpr float kVadLevelRms = kInitialSpeechLevelEstimateDbfs / 2.0f;
-constexpr float kVadLevelPeak = kInitialSpeechLevelEstimateDbfs / 3.0f;
-static_assert(kVadLevelRms < kVadLevelPeak, "");
-static_assert(kVadLevelRms > kInitialSpeechLevelEstimateDbfs, "");
-static_assert(kVadLevelRms - kInitialSpeechLevelEstimateDbfs > 5.0f,
-              "Adjust `kVadLevelRms` so that the difference from the initial "
-              "level is wide enough for the tests.");
-
-constexpr VadLevelAnalyzer::Result kVadDataSpeech{/*speech_probability=*/1.0f,
-                                                  kVadLevelRms, kVadLevelPeak};
-constexpr VadLevelAnalyzer::Result kVadDataNonSpeech{
-    /*speech_probability=*/kVadConfidenceThreshold / 2.0f, kVadLevelRms,
-    kVadLevelPeak};
-
-constexpr float kMinSpeechProbability = 0.0f;
-constexpr float kMaxSpeechProbability = 1.0f;
-
 constexpr float kConvergenceSpeedTestsLevelTolerance = 0.5f;
 
 // Provides the `vad_level` value `num_iterations` times to `level_estimator`.
@@ -55,31 +40,51 @@
   }
 }
 
+constexpr AdaptiveDigitalConfig GetAdaptiveDigitalConfig(
+    int adjacent_speech_frames_threshold) {
+  AdaptiveDigitalConfig config;
+  config.adjacent_speech_frames_threshold = adjacent_speech_frames_threshold;
+  return config;
+}
+
 // Level estimator with data dumper.
 struct TestLevelEstimator {
-  TestLevelEstimator()
+  explicit TestLevelEstimator(int adjacent_speech_frames_threshold)
       : data_dumper(0),
         estimator(std::make_unique<AdaptiveModeLevelEstimator>(
             &data_dumper,
-            /*adjacent_speech_frames_threshold=*/1)) {}
+            GetAdaptiveDigitalConfig(adjacent_speech_frames_threshold))),
+        initial_speech_level_dbfs(estimator->level_dbfs()),
+        vad_level_rms(initial_speech_level_dbfs / 2.0f),
+        vad_level_peak(initial_speech_level_dbfs / 3.0f),
+        vad_data_speech(
+            {/*speech_probability=*/1.0f, vad_level_rms, vad_level_peak}),
+        vad_data_non_speech(
+            {/*speech_probability=*/kVadConfidenceThreshold / 2.0f,
+             vad_level_rms, vad_level_peak}) {
+    RTC_DCHECK_LT(vad_level_rms, vad_level_peak);
+    RTC_DCHECK_LT(initial_speech_level_dbfs, vad_level_rms);
+    RTC_DCHECK_GT(vad_level_rms - initial_speech_level_dbfs, 5.0f)
+        << "Adjust `vad_level_rms` so that the difference from the initial "
+           "level is wide enough for the tests";
+  }
   ApmDataDumper data_dumper;
   std::unique_ptr<AdaptiveModeLevelEstimator> estimator;
+  const float initial_speech_level_dbfs;
+  const float vad_level_rms;
+  const float vad_level_peak;
+  const VadLevelAnalyzer::Result vad_data_speech;
+  const VadLevelAnalyzer::Result vad_data_non_speech;
 };
 
-// Checks the initially estimated level.
-TEST(GainController2AdaptiveModeLevelEstimator, CheckInitialEstimate) {
-  TestLevelEstimator level_estimator;
-  EXPECT_FLOAT_EQ(level_estimator.estimator->level_dbfs(),
-                  kInitialSpeechLevelEstimateDbfs);
-}
-
 // Checks that the level estimator converges to a constant input speech level.
 TEST(GainController2AdaptiveModeLevelEstimator, LevelStabilizes) {
-  TestLevelEstimator level_estimator;
-  RunOnConstantLevel(/*num_iterations=*/kNumFramesToConfidence, kVadDataSpeech,
+  TestLevelEstimator level_estimator(/*adjacent_speech_frames_threshold=*/1);
+  RunOnConstantLevel(/*num_iterations=*/kNumFramesToConfidence,
+                     level_estimator.vad_data_speech,
                      *level_estimator.estimator);
   const float estimated_level_dbfs = level_estimator.estimator->level_dbfs();
-  RunOnConstantLevel(/*num_iterations=*/1, kVadDataSpeech,
+  RunOnConstantLevel(/*num_iterations=*/1, level_estimator.vad_data_speech,
                      *level_estimator.estimator);
   EXPECT_NEAR(level_estimator.estimator->level_dbfs(), estimated_level_dbfs,
               0.1f);
@@ -88,17 +93,19 @@
 // Checks that the level controller does not become confident when too few
 // speech frames are observed.
 TEST(GainController2AdaptiveModeLevelEstimator, IsNotConfident) {
-  TestLevelEstimator level_estimator;
+  TestLevelEstimator level_estimator(/*adjacent_speech_frames_threshold=*/1);
   RunOnConstantLevel(/*num_iterations=*/kNumFramesToConfidence / 2,
-                     kVadDataSpeech, *level_estimator.estimator);
+                     level_estimator.vad_data_speech,
+                     *level_estimator.estimator);
   EXPECT_FALSE(level_estimator.estimator->IsConfident());
 }
 
 // Checks that the level controller becomes confident when enough speech frames
 // are observed.
 TEST(GainController2AdaptiveModeLevelEstimator, IsConfident) {
-  TestLevelEstimator level_estimator;
-  RunOnConstantLevel(/*num_iterations=*/kNumFramesToConfidence, kVadDataSpeech,
+  TestLevelEstimator level_estimator(/*adjacent_speech_frames_threshold=*/1);
+  RunOnConstantLevel(/*num_iterations=*/kNumFramesToConfidence,
+                     level_estimator.vad_data_speech,
                      *level_estimator.estimator);
   EXPECT_TRUE(level_estimator.estimator->IsConfident());
 }
@@ -107,14 +114,15 @@
 // frames.
 TEST(GainController2AdaptiveModeLevelEstimator,
      EstimatorIgnoresNonSpeechFrames) {
-  TestLevelEstimator level_estimator;
+  TestLevelEstimator level_estimator(/*adjacent_speech_frames_threshold=*/1);
   // Simulate speech.
-  RunOnConstantLevel(/*num_iterations=*/kNumFramesToConfidence, kVadDataSpeech,
+  RunOnConstantLevel(/*num_iterations=*/kNumFramesToConfidence,
+                     level_estimator.vad_data_speech,
                      *level_estimator.estimator);
   const float estimated_level_dbfs = level_estimator.estimator->level_dbfs();
   // Simulate full-scale non-speech.
   RunOnConstantLevel(/*num_iterations=*/kNumFramesToConfidence,
-                     VadLevelAnalyzer::Result{kMinSpeechProbability,
+                     VadLevelAnalyzer::Result{/*speech_probability=*/0.0f,
                                               /*rms_dbfs=*/0.0f,
                                               /*peak_dbfs=*/0.0f},
                      *level_estimator.estimator);
@@ -126,28 +134,30 @@
 // Checks the convergence speed of the estimator before it becomes confident.
 TEST(GainController2AdaptiveModeLevelEstimator,
      ConvergenceSpeedBeforeConfidence) {
-  TestLevelEstimator level_estimator;
-  RunOnConstantLevel(/*num_iterations=*/kNumFramesToConfidence, kVadDataSpeech,
+  TestLevelEstimator level_estimator(/*adjacent_speech_frames_threshold=*/1);
+  RunOnConstantLevel(/*num_iterations=*/kNumFramesToConfidence,
+                     level_estimator.vad_data_speech,
                      *level_estimator.estimator);
-  EXPECT_NEAR(level_estimator.estimator->level_dbfs(), kVadDataSpeech.rms_dbfs,
+  EXPECT_NEAR(level_estimator.estimator->level_dbfs(),
+              level_estimator.vad_data_speech.rms_dbfs,
               kConvergenceSpeedTestsLevelTolerance);
 }
 
 // Checks the convergence speed of the estimator after it becomes confident.
 TEST(GainController2AdaptiveModeLevelEstimator,
      ConvergenceSpeedAfterConfidence) {
-  TestLevelEstimator level_estimator;
+  TestLevelEstimator level_estimator(/*adjacent_speech_frames_threshold=*/1);
   // Reach confidence using the initial level estimate.
   RunOnConstantLevel(
       /*num_iterations=*/kNumFramesToConfidence,
       VadLevelAnalyzer::Result{
-          kMaxSpeechProbability,
-          /*rms_dbfs=*/kInitialSpeechLevelEstimateDbfs,
-          /*peak_dbfs=*/kInitialSpeechLevelEstimateDbfs + 6.0f},
+          /*speech_probability=*/1.0f,
+          /*rms_dbfs=*/level_estimator.initial_speech_level_dbfs,
+          /*peak_dbfs=*/level_estimator.initial_speech_level_dbfs + 6.0f},
       *level_estimator.estimator);
   // No estimate change should occur, but confidence is achieved.
   ASSERT_FLOAT_EQ(level_estimator.estimator->level_dbfs(),
-                  kInitialSpeechLevelEstimateDbfs);
+                  level_estimator.initial_speech_level_dbfs);
   ASSERT_TRUE(level_estimator.estimator->IsConfident());
   // After confidence.
   constexpr float kConvergenceTimeAfterConfidenceNumFrames = 600;  // 6 seconds.
@@ -155,8 +165,9 @@
       kConvergenceTimeAfterConfidenceNumFrames > kNumFramesToConfidence, "");
   RunOnConstantLevel(
       /*num_iterations=*/kConvergenceTimeAfterConfidenceNumFrames,
-      kVadDataSpeech, *level_estimator.estimator);
-  EXPECT_NEAR(level_estimator.estimator->level_dbfs(), kVadDataSpeech.rms_dbfs,
+      level_estimator.vad_data_speech, *level_estimator.estimator);
+  EXPECT_NEAR(level_estimator.estimator->level_dbfs(),
+              level_estimator.vad_data_speech.rms_dbfs,
               kConvergenceSpeedTestsLevelTolerance);
 }
 
@@ -168,30 +179,26 @@
 
 TEST_P(AdaptiveModeLevelEstimatorParametrization,
        DoNotAdaptToShortSpeechSegments) {
-  ApmDataDumper apm_data_dumper(0);
-  AdaptiveModeLevelEstimator level_estimator(
-      &apm_data_dumper, adjacent_speech_frames_threshold());
-  const float initial_level = level_estimator.level_dbfs();
-  ASSERT_LT(initial_level, kVadDataSpeech.peak_dbfs);
+  TestLevelEstimator level_estimator(adjacent_speech_frames_threshold());
+  const float initial_level = level_estimator.estimator->level_dbfs();
+  ASSERT_LT(initial_level, level_estimator.vad_data_speech.peak_dbfs);
   for (int i = 0; i < adjacent_speech_frames_threshold() - 1; ++i) {
     SCOPED_TRACE(i);
-    level_estimator.Update(kVadDataSpeech);
-    EXPECT_EQ(initial_level, level_estimator.level_dbfs());
+    level_estimator.estimator->Update(level_estimator.vad_data_speech);
+    EXPECT_EQ(initial_level, level_estimator.estimator->level_dbfs());
   }
-  level_estimator.Update(kVadDataNonSpeech);
-  EXPECT_EQ(initial_level, level_estimator.level_dbfs());
+  level_estimator.estimator->Update(level_estimator.vad_data_non_speech);
+  EXPECT_EQ(initial_level, level_estimator.estimator->level_dbfs());
 }
 
 TEST_P(AdaptiveModeLevelEstimatorParametrization, AdaptToEnoughSpeechSegments) {
-  ApmDataDumper apm_data_dumper(0);
-  AdaptiveModeLevelEstimator level_estimator(
-      &apm_data_dumper, adjacent_speech_frames_threshold());
-  const float initial_level = level_estimator.level_dbfs();
-  ASSERT_LT(initial_level, kVadDataSpeech.peak_dbfs);
+  TestLevelEstimator level_estimator(adjacent_speech_frames_threshold());
+  const float initial_level = level_estimator.estimator->level_dbfs();
+  ASSERT_LT(initial_level, level_estimator.vad_data_speech.peak_dbfs);
   for (int i = 0; i < adjacent_speech_frames_threshold(); ++i) {
-    level_estimator.Update(kVadDataSpeech);
+    level_estimator.estimator->Update(level_estimator.vad_data_speech);
   }
-  EXPECT_LT(initial_level, level_estimator.level_dbfs());
+  EXPECT_LT(initial_level, level_estimator.estimator->level_dbfs());
 }
 
 INSTANTIATE_TEST_SUITE_P(GainController2,

diff --git a/modules/audio_processing/agc2/agc2_common.h b/modules/audio_processing/agc2/agc2_common.h
index da28d8d..4af8552 100644
--- a/modules/audio_processing/agc2/agc2_common.h
+++ b/modules/audio_processing/agc2/agc2_common.h

@@ -24,38 +24,26 @@
 constexpr int kSubFramesInFrame = 20;
 constexpr int kMaximalNumberOfSamplesPerChannel = 480;
 
-// Adaptive digital gain applier settings below.
-constexpr float kHeadroomDbfs = 6.0f;
-constexpr float kMaxGainDb = 30.0f;
-constexpr float kInitialAdaptiveDigitalGainDb = 8.0f;
+// Adaptive digital gain applier settings.
+
 // At what limiter levels should we start decreasing the adaptive digital gain.
 constexpr float kLimiterThresholdForAgcGainDbfs = -1.0f;
 
 // This is the threshold for speech. Speech frames are used for updating the
 // speech level, measuring the amount of speech, and decide when to allow target
-// gain reduction.
+// gain changes.
 constexpr float kVadConfidenceThreshold = 0.95f;
 
-// Adaptive digital level estimator parameters.
 // Number of milliseconds of speech frames to observe to make the estimator
 // confident.
 constexpr float kLevelEstimatorTimeToConfidenceMs = 400;
 constexpr float kLevelEstimatorLeakFactor =
     1.0f - 1.0f / kLevelEstimatorTimeToConfidenceMs;
 
-// Robust VAD probability and speech decisions.
-constexpr int kDefaultLevelEstimatorAdjacentSpeechFramesThreshold = 12;
-
 // Saturation Protector settings.
 constexpr float kSaturationProtectorInitialHeadroomDb = 20.0f;
 constexpr int kSaturationProtectorBufferSize = 4;
 
-// Set the initial speech level estimate so that `kInitialAdaptiveDigitalGainDb`
-// is applied at the beginning of the call.
-constexpr float kInitialSpeechLevelEstimateDbfs =
-    -kSaturationProtectorInitialHeadroomDb - kInitialAdaptiveDigitalGainDb -
-    kHeadroomDbfs;
-
 // Number of interpolation points for each region of the limiter.
 // These values have been tuned to limit the interpolated gain curve error given
 // the limiter parameters and allowing a maximum error of +/- 32768^-1.

diff --git a/modules/audio_processing/audio_processing_unittest.cc b/modules/audio_processing/audio_processing_unittest.cc
index 100a3c0..436effd 100644
--- a/modules/audio_processing/audio_processing_unittest.cc
+++ b/modules/audio_processing/audio_processing_unittest.cc

@@ -3107,6 +3107,18 @@
   b_adaptive.dry_run = a_adaptive.dry_run;
   EXPECT_EQ(a, b);
 
+  a_adaptive.headroom_db += 1.0f;
+  b_adaptive.headroom_db = a_adaptive.headroom_db;
+  EXPECT_EQ(a, b);
+
+  a_adaptive.max_gain_db += 1.0f;
+  b_adaptive.max_gain_db = a_adaptive.max_gain_db;
+  EXPECT_EQ(a, b);
+
+  a_adaptive.initial_gain_db += 1.0f;
+  b_adaptive.initial_gain_db = a_adaptive.initial_gain_db;
+  EXPECT_EQ(a, b);
+
   a_adaptive.vad_reset_period_ms++;
   b_adaptive.vad_reset_period_ms = a_adaptive.vad_reset_period_ms;
   EXPECT_EQ(a, b);
@@ -3164,6 +3176,18 @@
   EXPECT_NE(a, b);
   a_adaptive = b_adaptive;
 
+  a_adaptive.headroom_db += 1.0f;
+  EXPECT_NE(a, b);
+  a_adaptive = b_adaptive;
+
+  a_adaptive.max_gain_db += 1.0f;
+  EXPECT_NE(a, b);
+  a_adaptive = b_adaptive;
+
+  a_adaptive.initial_gain_db += 1.0f;
+  EXPECT_NE(a, b);
+  a_adaptive = b_adaptive;
+
   a_adaptive.vad_reset_period_ms++;
   EXPECT_NE(a, b);
   a_adaptive = b_adaptive;

diff --git a/modules/audio_processing/gain_controller2.cc b/modules/audio_processing/gain_controller2.cc
index 195044a..8a22fed 100644
--- a/modules/audio_processing/gain_controller2.cc
+++ b/modules/audio_processing/gain_controller2.cc

@@ -105,7 +105,9 @@
     const AudioProcessing::Config::GainController2& config) {
   const auto& fixed = config.fixed_digital;
   const auto& adaptive = config.adaptive_digital;
-  return fixed.gain_db >= 0.f && fixed.gain_db < 50.f &&
+  return fixed.gain_db >= 0.0f && fixed.gain_db < 50.f &&
+         adaptive.headroom_db >= 0.0f && adaptive.max_gain_db > 0.0f &&
+         adaptive.initial_gain_db >= 0.0f &&
          adaptive.max_gain_change_db_per_second > 0.0f &&
          adaptive.max_output_noise_level_dbfs <= 0.0f;
 }

diff --git a/modules/audio_processing/gain_controller2_unittest.cc b/modules/audio_processing/gain_controller2_unittest.cc
index c8ee113..d1c1f5b 100644
--- a/modules/audio_processing/gain_controller2_unittest.cc
+++ b/modules/audio_processing/gain_controller2_unittest.cc

@@ -89,6 +89,36 @@
   EXPECT_TRUE(GainController2::Validate(config));
 }
 
+TEST(GainController2, CheckHeadroomDb) {
+  AudioProcessing::Config::GainController2 config;
+  config.adaptive_digital.headroom_db = -1.0f;
+  EXPECT_FALSE(GainController2::Validate(config));
+  config.adaptive_digital.headroom_db = 0.0f;
+  EXPECT_TRUE(GainController2::Validate(config));
+  config.adaptive_digital.headroom_db = 5.0f;
+  EXPECT_TRUE(GainController2::Validate(config));
+}
+
+TEST(GainController2, CheckMaxGainDb) {
+  AudioProcessing::Config::GainController2 config;
+  config.adaptive_digital.max_gain_db = -1.0f;
+  EXPECT_FALSE(GainController2::Validate(config));
+  config.adaptive_digital.max_gain_db = 0.0f;
+  EXPECT_FALSE(GainController2::Validate(config));
+  config.adaptive_digital.max_gain_db = 5.0f;
+  EXPECT_TRUE(GainController2::Validate(config));
+}
+
+TEST(GainController2, CheckInitialGainDb) {
+  AudioProcessing::Config::GainController2 config;
+  config.adaptive_digital.initial_gain_db = -1.0f;
+  EXPECT_FALSE(GainController2::Validate(config));
+  config.adaptive_digital.initial_gain_db = 0.0f;
+  EXPECT_TRUE(GainController2::Validate(config));
+  config.adaptive_digital.initial_gain_db = 5.0f;
+  EXPECT_TRUE(GainController2::Validate(config));
+}
+
 TEST(GainController2, CheckAdaptiveDigitalMaxGainChangeSpeedConfig) {
   AudioProcessing::Config::GainController2 config;
   config.adaptive_digital.max_gain_change_db_per_second = -1.0f;

diff --git a/modules/audio_processing/include/audio_processing.cc b/modules/audio_processing/include/audio_processing.cc
index 2286196..ddd8078 100644
--- a/modules/audio_processing/include/audio_processing.cc
+++ b/modules/audio_processing/include/audio_processing.cc

@@ -90,6 +90,8 @@
 bool Agc2Config::AdaptiveDigital::operator==(
     const Agc2Config::AdaptiveDigital& rhs) const {
   return enabled == rhs.enabled && dry_run == rhs.dry_run &&
+         headroom_db == rhs.headroom_db && max_gain_db == rhs.max_gain_db &&
+         initial_gain_db == rhs.initial_gain_db &&
          vad_reset_period_ms == rhs.vad_reset_period_ms &&
          adjacent_speech_frames_threshold ==
              rhs.adjacent_speech_frames_threshold &&
@@ -197,6 +199,10 @@
       << " }, adaptive_digital: { enabled: "
       << gain_controller2.adaptive_digital.enabled
       << ", dry_run: " << gain_controller2.adaptive_digital.dry_run
+      << ", headroom_db: " << gain_controller2.adaptive_digital.headroom_db
+      << ", max_gain_db: " << gain_controller2.adaptive_digital.max_gain_db
+      << ", initial_gain_db: "
+      << gain_controller2.adaptive_digital.initial_gain_db
       << ", vad_reset_period_ms: "
       << gain_controller2.adaptive_digital.vad_reset_period_ms
       << ", adjacent_speech_frames_threshold: "

diff --git a/modules/audio_processing/include/audio_processing.h b/modules/audio_processing/include/audio_processing.h
index 8f07c6e..121e430 100644
--- a/modules/audio_processing/include/audio_processing.h
+++ b/modules/audio_processing/include/audio_processing.h

@@ -367,12 +367,19 @@
         }
 
         bool enabled = false;
-        // Run the adaptive digital controller but the signal is not modified.
+        // When true, the adaptive digital controller runs but the signal is not
+        // modified.
         bool dry_run = false;
+        float headroom_db = 6.0f;
+        // TODO(bugs.webrtc.org/7494): Consider removing and inferring from
+        // `max_output_noise_level_dbfs`.
+        float max_gain_db = 30.0f;
+        float initial_gain_db = 8.0f;
         int vad_reset_period_ms = 1500;
         int adjacent_speech_frames_threshold = 12;
         float max_gain_change_db_per_second = 3.0f;
         float max_output_noise_level_dbfs = -50.0f;
+        // TODO(bugs.webrtc.org/7494): Replace with field trials.
         bool sse2_allowed = true;
         bool avx2_allowed = true;
         bool neon_allowed = true;
commit	a850e6c8b6776f17e2c85124206a7310060ceb2e	[log] [tgz]
author	Alessio Bazzica <alessiob@webrtc.org>	Mon Oct 04 11:35:55 2021
committer	WebRTC LUCI CQ <webrtc-scoped@luci-project-accounts.iam.gserviceaccount.com>	Mon Oct 04 16:11:00 2021
tree	4ec47829c93139bc151d894c1f4a1a0b30be423f
parent	41b4397e1a2c770b261126bf8664c542bf3b3f07 [diff]