AGC2: max adaptation speed now part of config Tested: bit-exactness verified with audioproc_f Bug: webrtc:7494 Change-Id: Ie65a2e2139cff0bd730307d06b74760e307c9568 Reviewed-on: https://webrtc-review.googlesource.com/c/src/+/186264 Commit-Queue: Alessio Bazzica <alessiob@webrtc.org> Reviewed-by: Minyue Li <minyue@webrtc.org> Cr-Commit-Position: refs/heads/master@{#32277}

commit: 29ef556aff963d4f1d4d2a69aa52d60170d8233e [log] [tgz]
author: Alessio Bazzica <alessiob@webrtc.org> Thu Oct 01 14:57:45 2020
committer: Commit Bot <commit-bot@chromium.org> Thu Oct 01 16:15:28 2020
tree: 2f76443532336e249e52dbbdc87138256ee53133
parent: dba4db5668d6dd95475dc4e8aa42de3011b4a15e [diff]
diff --git a/modules/audio_processing/agc2/adaptive_agc.cc b/modules/audio_processing/agc2/adaptive_agc.cc
index 9349436..380c39c 100644
--- a/modules/audio_processing/agc2/adaptive_agc.cc
+++ b/modules/audio_processing/agc2/adaptive_agc.cc

@@ -28,11 +28,16 @@
   dumper.DumpRaw("agc2_last_limiter_audio_level", info.limiter_envelope_dbfs);
 }
 
+constexpr int kGainApplierAdjacentSpeechFramesThreshold = 1;
+constexpr float kMaxGainChangePerSecondDb = 3.f;
+
 }  // namespace
 
 AdaptiveAgc::AdaptiveAgc(ApmDataDumper* apm_data_dumper)
     : speech_level_estimator_(apm_data_dumper),
-      gain_applier_(apm_data_dumper),
+      gain_applier_(apm_data_dumper,
+                    kGainApplierAdjacentSpeechFramesThreshold,
+                    kMaxGainChangePerSecondDb),
       apm_data_dumper_(apm_data_dumper),
       noise_level_estimator_(apm_data_dumper) {
   RTC_DCHECK(apm_data_dumper);
@@ -48,9 +53,10 @@
           config.adaptive_digital.initial_saturation_margin_db,
           config.adaptive_digital.extra_saturation_margin_db),
       vad_(config.adaptive_digital.vad_probability_attack),
-      gain_applier_(apm_data_dumper,
-                    config.adaptive_digital
-                        .gain_applier_adjacent_speech_frames_threshold),
+      gain_applier_(
+          apm_data_dumper,
+          config.adaptive_digital.gain_applier_adjacent_speech_frames_threshold,
+          config.adaptive_digital.max_gain_change_db_per_second),
       apm_data_dumper_(apm_data_dumper),
       noise_level_estimator_(apm_data_dumper) {
   RTC_DCHECK(apm_data_dumper);

diff --git a/modules/audio_processing/agc2/adaptive_agc.h b/modules/audio_processing/agc2/adaptive_agc.h
index e2decb8..f3c7854 100644
--- a/modules/audio_processing/agc2/adaptive_agc.h
+++ b/modules/audio_processing/agc2/adaptive_agc.h

@@ -26,6 +26,7 @@
 class AdaptiveAgc {
  public:
   explicit AdaptiveAgc(ApmDataDumper* apm_data_dumper);
+  // TODO(crbug.com/webrtc/7494): Remove ctor above.
   AdaptiveAgc(ApmDataDumper* apm_data_dumper,
               const AudioProcessing::Config::GainController2& config);
   ~AdaptiveAgc();

diff --git a/modules/audio_processing/agc2/adaptive_digital_gain_applier.cc b/modules/audio_processing/agc2/adaptive_digital_gain_applier.cc
index e4e7886..ef048e6 100644
--- a/modules/audio_processing/agc2/adaptive_digital_gain_applier.cc
+++ b/modules/audio_processing/agc2/adaptive_digital_gain_applier.cc

@@ -74,35 +74,33 @@
 // Return the gain difference in db to 'last_gain_db'.
 float ComputeGainChangeThisFrameDb(float target_gain_db,
                                    float last_gain_db,
-                                   bool gain_increase_allowed) {
+                                   bool gain_increase_allowed,
+                                   float max_gain_change_db) {
   float target_gain_difference_db = target_gain_db - last_gain_db;
   if (!gain_increase_allowed) {
     target_gain_difference_db = std::min(target_gain_difference_db, 0.f);
   }
-
-  return rtc::SafeClamp(target_gain_difference_db, -kMaxGainChangePerFrameDb,
-                        kMaxGainChangePerFrameDb);
+  return rtc::SafeClamp(target_gain_difference_db, -max_gain_change_db,
+                        max_gain_change_db);
 }
-}  // namespace
 
-// TODO(crbug.com/webrtc/7494): Remove ctor and the constant used below.
-AdaptiveDigitalGainApplier::AdaptiveDigitalGainApplier(
-    ApmDataDumper* apm_data_dumper)
-    : AdaptiveDigitalGainApplier(
-          apm_data_dumper,
-          kDefaultDigitalGainApplierAdjacentSpeechFramesThreshold) {}
+}  // namespace
 
 AdaptiveDigitalGainApplier::AdaptiveDigitalGainApplier(
     ApmDataDumper* apm_data_dumper,
-    int adjacent_speech_frames_threshold)
+    int adjacent_speech_frames_threshold,
+    float max_gain_change_db_per_second)
     : apm_data_dumper_(apm_data_dumper),
       gain_applier_(
           /*hard_clip_samples=*/false,
           /*initial_gain_factor=*/DbToRatio(kInitialAdaptiveDigitalGainDb)),
       adjacent_speech_frames_threshold_(adjacent_speech_frames_threshold),
+      max_gain_change_db_per_10ms_(max_gain_change_db_per_second *
+                                   kFrameDurationMs / 1000.f),
       calls_since_last_gain_log_(0),
       frames_to_gain_increase_allowed_(adjacent_speech_frames_threshold_),
       last_gain_db_(kInitialAdaptiveDigitalGainDb) {
+  RTC_DCHECK_GT(max_gain_change_db_per_second, 0.f);
   RTC_DCHECK_GE(frames_to_gain_increase_allowed_, 1);
 }
 
@@ -110,7 +108,11 @@
                                          AudioFrameView<float> frame) {
   RTC_DCHECK_GE(info.input_level_dbfs, -150.f);
   RTC_DCHECK_GE(frame.num_channels(), 1);
-  RTC_DCHECK_GE(frame.samples_per_channel(), 1);
+  RTC_DCHECK(
+      frame.samples_per_channel() == 80 || frame.samples_per_channel() == 160 ||
+      frame.samples_per_channel() == 320 || frame.samples_per_channel() == 480)
+      << "`frame` does not look like a 10 ms frame for an APM supported sample "
+         "rate";
 
   // Log every second.
   calls_since_last_gain_log_++;
@@ -137,7 +139,8 @@
 
   const float gain_change_this_frame_db = ComputeGainChangeThisFrameDb(
       target_gain_db, last_gain_db_,
-      /*gain_increase_allowed=*/frames_to_gain_increase_allowed_ == 0);
+      /*gain_increase_allowed=*/frames_to_gain_increase_allowed_ == 0,
+      max_gain_change_db_per_10ms_);
 
   apm_data_dumper_->DumpRaw("agc2_want_to_change_by_db",
                             target_gain_db - last_gain_db_);

diff --git a/modules/audio_processing/agc2/adaptive_digital_gain_applier.h b/modules/audio_processing/agc2/adaptive_digital_gain_applier.h
index ad3f39c..ca36abc 100644
--- a/modules/audio_processing/agc2/adaptive_digital_gain_applier.h
+++ b/modules/audio_processing/agc2/adaptive_digital_gain_applier.h

@@ -34,16 +34,18 @@
     bool estimate_is_confident;
   };
 
-  explicit AdaptiveDigitalGainApplier(ApmDataDumper* apm_data_dumper);
-  // Ctor. `adjacent_speech_frames_threshold` indicates how many speech frames
-  // are required before a gain increase is allowed.
+  // `adjacent_speech_frames_threshold` indicates how many speech frames are
+  // required before a gain increase is allowed. `max_gain_change_db_per_second`
+  // limits the adaptation speed (uniformly operated across frames).
   AdaptiveDigitalGainApplier(ApmDataDumper* apm_data_dumper,
-                             int adjacent_speech_frames_threshold);
+                             int adjacent_speech_frames_threshold,
+                             float max_gain_change_db_per_second);
   AdaptiveDigitalGainApplier(const AdaptiveDigitalGainApplier&) = delete;
   AdaptiveDigitalGainApplier& operator=(const AdaptiveDigitalGainApplier&) =
       delete;
 
-  // Analyzes `info`, updates the digital gain and applies it to `frame`.
+  // Analyzes `info`, updates the digital gain and applies it to a 10 ms
+  // `frame`. Supports any sample rate supported by APM.
   void Process(const FrameInfo& info, AudioFrameView<float> frame);
 
  private:
@@ -51,6 +53,7 @@
   GainApplier gain_applier_;
 
   const int adjacent_speech_frames_threshold_;
+  const float max_gain_change_db_per_10ms_;
 
   int calls_since_last_gain_log_;
   int frames_to_gain_increase_allowed_;

diff --git a/modules/audio_processing/agc2/adaptive_digital_gain_applier_unittest.cc b/modules/audio_processing/agc2/adaptive_digital_gain_applier_unittest.cc
index c8fb6ca..63763c8 100644
--- a/modules/audio_processing/agc2/adaptive_digital_gain_applier_unittest.cc
+++ b/modules/audio_processing/agc2/adaptive_digital_gain_applier_unittest.cc

@@ -23,6 +23,7 @@
 
 constexpr int kMono = 1;
 constexpr int kStereo = 2;
+constexpr int kFrameLen10ms8kHz = 80;
 constexpr int kFrameLen10ms48kHz = 480;
 
 // Constants used in place of estimated noise levels.
@@ -32,6 +33,21 @@
               "");
 constexpr VadLevelAnalyzer::Result kVadSpeech{1.f, -20.f, 0.f};
 
+constexpr float kMaxGainChangePerSecondDb = 3.f;
+constexpr float kMaxGainChangePerFrameDb =
+    kMaxGainChangePerSecondDb * kFrameDurationMs / 1000.f;
+
+// Helper to instance `AdaptiveDigitalGainApplier`.
+struct GainApplierHelper {
+  GainApplierHelper()
+      : apm_data_dumper(0),
+        gain_applier(&apm_data_dumper,
+                     /*adjacent_speech_frames_threshold=*/1,
+                     kMaxGainChangePerSecondDb) {}
+  ApmDataDumper apm_data_dumper;
+  AdaptiveDigitalGainApplier gain_applier;
+};
+
 // Runs gain applier and returns the applied gain in linear scale.
 float RunOnConstantLevel(int num_iterations,
                          VadLevelAnalyzer::Result vad_level,
@@ -40,7 +56,7 @@
   float gain_linear = 0.f;
 
   for (int i = 0; i < num_iterations; ++i) {
-    VectorFloatFrame fake_audio(kMono, 1, 1.f);
+    VectorFloatFrame fake_audio(kMono, kFrameLen10ms8kHz, 1.f);
     AdaptiveDigitalGainApplier::FrameInfo info;
     info.input_level_dbfs = input_level_dbfs;
     info.input_noise_level_dbfs = kNoNoiseDbfs;
@@ -62,25 +78,22 @@
     /*estimate_is_confident=*/true};
 
 TEST(AutomaticGainController2AdaptiveGainApplier, GainApplierShouldNotCrash) {
-  ApmDataDumper apm_data_dumper(0);
-  AdaptiveDigitalGainApplier gain_applier(&apm_data_dumper);
-
+  GainApplierHelper helper;
   // Make one call with reasonable audio level values and settings.
   VectorFloatFrame fake_audio(kStereo, kFrameLen10ms48kHz, 10000.f);
   AdaptiveDigitalGainApplier::FrameInfo info = kFrameInfo;
   info.input_level_dbfs = -5.0;
-  gain_applier.Process(kFrameInfo, fake_audio.float_frame_view());
+  helper.gain_applier.Process(kFrameInfo, fake_audio.float_frame_view());
 }
 
 // Check that the output is -kHeadroom dBFS.
 TEST(AutomaticGainController2AdaptiveGainApplier, TargetLevelIsReached) {
-  ApmDataDumper apm_data_dumper(0);
-  AdaptiveDigitalGainApplier gain_applier(&apm_data_dumper);
+  GainApplierHelper helper;
 
   constexpr float initial_level_dbfs = -5.f;
 
-  const float applied_gain =
-      RunOnConstantLevel(200, kVadSpeech, initial_level_dbfs, &gain_applier);
+  const float applied_gain = RunOnConstantLevel(
+      200, kVadSpeech, initial_level_dbfs, &helper.gain_applier);
 
   EXPECT_NEAR(applied_gain, DbToRatio(-kHeadroomDbfs - initial_level_dbfs),
               0.1f);
@@ -88,8 +101,7 @@
 
 // Check that the output is -kHeadroom dBFS
 TEST(AutomaticGainController2AdaptiveGainApplier, GainApproachesMaxGain) {
-  ApmDataDumper apm_data_dumper(0);
-  AdaptiveDigitalGainApplier gain_applier(&apm_data_dumper);
+  GainApplierHelper helper;
 
   constexpr float initial_level_dbfs = -kHeadroomDbfs - kMaxGainDb - 10.f;
   // A few extra frames for safety.
@@ -97,7 +109,7 @@
       static_cast<int>(kMaxGainDb / kMaxGainChangePerFrameDb) + 10;
 
   const float applied_gain = RunOnConstantLevel(
-      kNumFramesToAdapt, kVadSpeech, initial_level_dbfs, &gain_applier);
+      kNumFramesToAdapt, kVadSpeech, initial_level_dbfs, &helper.gain_applier);
   EXPECT_NEAR(applied_gain, DbToRatio(kMaxGainDb), 0.1f);
 
   const float applied_gain_db = 20.f * std::log10(applied_gain);
@@ -105,8 +117,7 @@
 }
 
 TEST(AutomaticGainController2AdaptiveGainApplier, GainDoesNotChangeFast) {
-  ApmDataDumper apm_data_dumper(0);
-  AdaptiveDigitalGainApplier gain_applier(&apm_data_dumper);
+  GainApplierHelper helper;
 
   constexpr float initial_level_dbfs = -25.f;
   // A few extra frames for safety.
@@ -118,10 +129,10 @@
   float last_gain_linear = 1.f;
   for (int i = 0; i < kNumFramesToAdapt; ++i) {
     SCOPED_TRACE(i);
-    VectorFloatFrame fake_audio(kMono, 1, 1.f);
+    VectorFloatFrame fake_audio(kMono, kFrameLen10ms8kHz, 1.f);
     AdaptiveDigitalGainApplier::FrameInfo info = kFrameInfo;
     info.input_level_dbfs = initial_level_dbfs;
-    gain_applier.Process(info, fake_audio.float_frame_view());
+    helper.gain_applier.Process(info, fake_audio.float_frame_view());
     float current_gain_linear = fake_audio.float_frame_view().channel(0)[0];
     EXPECT_LE(std::abs(current_gain_linear - last_gain_linear),
               kMaxChangePerFrameLinear);
@@ -131,10 +142,10 @@
   // Check that the same is true when gain decreases as well.
   for (int i = 0; i < kNumFramesToAdapt; ++i) {
     SCOPED_TRACE(i);
-    VectorFloatFrame fake_audio(kMono, 1, 1.f);
+    VectorFloatFrame fake_audio(kMono, kFrameLen10ms8kHz, 1.f);
     AdaptiveDigitalGainApplier::FrameInfo info = kFrameInfo;
     info.input_level_dbfs = 0.f;
-    gain_applier.Process(info, fake_audio.float_frame_view());
+    helper.gain_applier.Process(info, fake_audio.float_frame_view());
     float current_gain_linear = fake_audio.float_frame_view().channel(0)[0];
     EXPECT_LE(std::abs(current_gain_linear - last_gain_linear),
               kMaxChangePerFrameLinear);
@@ -143,15 +154,14 @@
 }
 
 TEST(AutomaticGainController2AdaptiveGainApplier, GainIsRampedInAFrame) {
-  ApmDataDumper apm_data_dumper(0);
-  AdaptiveDigitalGainApplier gain_applier(&apm_data_dumper);
+  GainApplierHelper helper;
 
   constexpr float initial_level_dbfs = -25.f;
 
   VectorFloatFrame fake_audio(kMono, kFrameLen10ms48kHz, 1.f);
   AdaptiveDigitalGainApplier::FrameInfo info = kFrameInfo;
   info.input_level_dbfs = initial_level_dbfs;
-  gain_applier.Process(info, fake_audio.float_frame_view());
+  helper.gain_applier.Process(info, fake_audio.float_frame_view());
   float maximal_difference = 0.f;
   float current_value = 1.f * DbToRatio(kInitialAdaptiveDigitalGainDb);
   for (const auto& x : fake_audio.float_frame_view().channel(0)) {
@@ -168,8 +178,7 @@
 }
 
 TEST(AutomaticGainController2AdaptiveGainApplier, NoiseLimitsGain) {
-  ApmDataDumper apm_data_dumper(0);
-  AdaptiveDigitalGainApplier gain_applier(&apm_data_dumper);
+  GainApplierHelper helper;
 
   constexpr float initial_level_dbfs = -25.f;
   constexpr int num_initial_frames =
@@ -183,7 +192,7 @@
     AdaptiveDigitalGainApplier::FrameInfo info = kFrameInfo;
     info.input_level_dbfs = initial_level_dbfs;
     info.input_noise_level_dbfs = kWithNoiseDbfs;
-    gain_applier.Process(info, fake_audio.float_frame_view());
+    helper.gain_applier.Process(info, fake_audio.float_frame_view());
 
     // Wait so that the adaptive gain applier has time to lower the gain.
     if (i > num_initial_frames) {
@@ -197,19 +206,17 @@
 }
 
 TEST(AutomaticGainController2GainApplier, CanHandlePositiveSpeechLevels) {
-  ApmDataDumper apm_data_dumper(0);
-  AdaptiveDigitalGainApplier gain_applier(&apm_data_dumper);
+  GainApplierHelper helper;
 
   // Make one call with positive audio level values and settings.
   VectorFloatFrame fake_audio(kStereo, kFrameLen10ms48kHz, 10000.f);
   AdaptiveDigitalGainApplier::FrameInfo info = kFrameInfo;
   info.input_level_dbfs = 5.f;
-  gain_applier.Process(info, fake_audio.float_frame_view());
+  helper.gain_applier.Process(info, fake_audio.float_frame_view());
 }
 
 TEST(AutomaticGainController2GainApplier, AudioLevelLimitsGain) {
-  ApmDataDumper apm_data_dumper(0);
-  AdaptiveDigitalGainApplier gain_applier(&apm_data_dumper);
+  GainApplierHelper helper;
 
   constexpr float initial_level_dbfs = -25.f;
   constexpr int num_initial_frames =
@@ -224,7 +231,7 @@
     info.input_level_dbfs = initial_level_dbfs;
     info.limiter_envelope_dbfs = 1.f;
     info.estimate_is_confident = false;
-    gain_applier.Process(info, fake_audio.float_frame_view());
+    helper.gain_applier.Process(info, fake_audio.float_frame_view());
 
     // Wait so that the adaptive gain applier has time to lower the gain.
     if (i > num_initial_frames) {
@@ -247,7 +254,8 @@
   const int adjacent_speech_frames_threshold = AdjacentSpeechFramesThreshold();
   ApmDataDumper apm_data_dumper(0);
   AdaptiveDigitalGainApplier gain_applier(&apm_data_dumper,
-                                          adjacent_speech_frames_threshold);
+                                          adjacent_speech_frames_threshold,
+                                          kMaxGainChangePerFrameDb);
   AdaptiveDigitalGainApplier::FrameInfo info = kFrameInfo;
   info.input_level_dbfs = -25.0;
 
@@ -268,7 +276,8 @@
   const int adjacent_speech_frames_threshold = AdjacentSpeechFramesThreshold();
   ApmDataDumper apm_data_dumper(0);
   AdaptiveDigitalGainApplier gain_applier(&apm_data_dumper,
-                                          adjacent_speech_frames_threshold);
+                                          adjacent_speech_frames_threshold,
+                                          kMaxGainChangePerFrameDb);
   AdaptiveDigitalGainApplier::FrameInfo info = kFrameInfo;
   info.input_level_dbfs = -25.0;
 

diff --git a/modules/audio_processing/agc2/agc2_common.h b/modules/audio_processing/agc2/agc2_common.h
index 30880e2..5149ed6 100644
--- a/modules/audio_processing/agc2/agc2_common.h
+++ b/modules/audio_processing/agc2/agc2_common.h

@@ -26,9 +26,6 @@
 constexpr float kAttackFilterConstant = 0.f;
 
 // Adaptive digital gain applier settings below.
-constexpr float kMaxGainChangePerSecondDb = 3.f;
-constexpr float kMaxGainChangePerFrameDb =
-    kMaxGainChangePerSecondDb * kFrameDurationMs / 1000.f;
 constexpr float kHeadroomDbfs = 1.f;
 constexpr float kMaxGainDb = 30.f;
 constexpr float kInitialAdaptiveDigitalGainDb = 8.f;
@@ -51,7 +48,6 @@
 
 // Robust VAD probability and speech decisions.
 constexpr float kDefaultSmoothedVadProbabilityAttack = 1.f;
-constexpr int kDefaultDigitalGainApplierAdjacentSpeechFramesThreshold = 1;
 constexpr int kDefaultLevelEstimatorAdjacentSpeechFramesThreshold = 1;
 
 // Saturation Protector settings.

diff --git a/modules/audio_processing/include/audio_processing.h b/modules/audio_processing/include/audio_processing.h
index e96a45b..a56b315 100644
--- a/modules/audio_processing/include/audio_processing.h
+++ b/modules/audio_processing/include/audio_processing.h

@@ -353,6 +353,7 @@
         float initial_saturation_margin_db = 20.f;
         float extra_saturation_margin_db = 2.f;
         int gain_applier_adjacent_speech_frames_threshold = 1;
+        float max_gain_change_db_per_second = 3.f;
       } adaptive_digital;
     } gain_controller2;
commit	29ef556aff963d4f1d4d2a69aa52d60170d8233e	[log] [tgz]
author	Alessio Bazzica <alessiob@webrtc.org>	Thu Oct 01 14:57:45 2020
committer	Commit Bot <commit-bot@chromium.org>	Thu Oct 01 16:15:28 2020
tree	2f76443532336e249e52dbbdc87138256ee53133
parent	dba4db5668d6dd95475dc4e8aa42de3011b4a15e [diff]