Add a specific AEC3 behavior for setups with known clock-drift

TBR=gustaf@webrtc.org

Change-Id: I9c726fc8e1b010255a1bee166c99fe6cb75d7658
Bug: chromium:826655,webrtc:9079
Reviewed-on: https://webrtc-review.googlesource.com/64982
Commit-Queue: Per Åhgren <peah@webrtc.org>
Reviewed-by: Gustaf Ullberg <gustaf@webrtc.org>
Cr-Commit-Position: refs/heads/master@{#22657}
diff --git a/api/audio/echo_canceller3_config.cc b/api/audio/echo_canceller3_config.cc
index d74d7a8..805a765 100644
--- a/api/audio/echo_canceller3_config.cc
+++ b/api/audio/echo_canceller3_config.cc
@@ -12,5 +12,7 @@
 namespace webrtc {
 
 EchoCanceller3Config::EchoCanceller3Config() = default;
+EchoCanceller3Config::EchoCanceller3Config(const EchoCanceller3Config& e) =
+    default;
 
 }  // namespace webrtc
diff --git a/api/audio/echo_canceller3_config.h b/api/audio/echo_canceller3_config.h
index 41b26d0..cf45255 100644
--- a/api/audio/echo_canceller3_config.h
+++ b/api/audio/echo_canceller3_config.h
@@ -18,6 +18,7 @@
 // Configuration struct for EchoCanceller3
 struct EchoCanceller3Config {
   EchoCanceller3Config();
+  EchoCanceller3Config(const EchoCanceller3Config& e);
   struct Delay {
     size_t default_delay = 5;
     size_t down_sampling_factor = 4;
@@ -119,6 +120,18 @@
 
     bool has_clock_drift = false;
   } echo_removal_control;
+
+  struct EchoModel {
+    size_t noise_floor_hold = 50;
+    float min_noise_floor_power = 1638400.f;
+    float stationary_gate_slope = 10.f;
+    float noise_gate_power = 27509.42f;
+    float noise_gate_slope = 0.3f;
+    size_t render_pre_window_size = 1;
+    size_t render_post_window_size = 3;
+    float nonlinear_hold = 2;
+    float nonlinear_release = 0.1f;
+  } echo_model;
 };
 }  // namespace webrtc
 
diff --git a/modules/audio_processing/aec3/echo_canceller3.cc b/modules/audio_processing/aec3/echo_canceller3.cc
index f0cbbc8..a0cf4f9 100644
--- a/modules/audio_processing/aec3/echo_canceller3.cc
+++ b/modules/audio_processing/aec3/echo_canceller3.cc
@@ -13,6 +13,7 @@
 
 #include "modules/audio_processing/logging/apm_data_dumper.h"
 #include "rtc_base/atomicops.h"
+#include "rtc_base/logging.h"
 
 namespace webrtc {
 
@@ -29,6 +30,43 @@
   return false;
 }
 
+// Method for adjusting config parameter dependencies..
+EchoCanceller3Config AdjustConfig(const EchoCanceller3Config& config) {
+  EchoCanceller3Config adjusted_cfg = config;
+
+  // Use customized parameters when the system has clock-drift.
+  if (config.echo_removal_control.has_clock_drift) {
+    RTC_LOG(LS_WARNING)
+        << "Customizing parameters to work well for the clock-drift case.";
+    if (config.ep_strength.bounded_erl) {
+      adjusted_cfg.ep_strength.default_len = 0.85f;
+      adjusted_cfg.ep_strength.lf = 0.01f;
+      adjusted_cfg.ep_strength.mf = 0.01f;
+      adjusted_cfg.ep_strength.hf = 0.01f;
+      adjusted_cfg.echo_model.render_pre_window_size = 1;
+      adjusted_cfg.echo_model.render_post_window_size = 1;
+      adjusted_cfg.echo_model.nonlinear_hold = 3;
+      adjusted_cfg.echo_model.nonlinear_release = 0.001f;
+    } else {
+      adjusted_cfg.ep_strength.bounded_erl = true;
+      adjusted_cfg.delay.down_sampling_factor = 2;
+      adjusted_cfg.ep_strength.default_len = 0.8f;
+      adjusted_cfg.ep_strength.lf = 0.01f;
+      adjusted_cfg.ep_strength.mf = 0.01f;
+      adjusted_cfg.ep_strength.hf = 0.01f;
+      adjusted_cfg.filter.main = {30, 0.1f, 0.8f, 0.001f, 20075344.f};
+      adjusted_cfg.filter.shadow = {30, 0.7f, 20075344.f};
+      adjusted_cfg.filter.main_initial = {30, 0.1f, 1.5f, 0.001f, 20075344.f};
+      adjusted_cfg.filter.shadow_initial = {30, 0.9f, 20075344.f};
+      adjusted_cfg.echo_model.render_pre_window_size = 2;
+      adjusted_cfg.echo_model.render_post_window_size = 2;
+      adjusted_cfg.echo_model.nonlinear_hold = 3;
+      adjusted_cfg.echo_model.nonlinear_release = 0.6f;
+    }
+  }
+  return adjusted_cfg;
+}
+
 void FillSubFrameView(AudioBuffer* frame,
                       size_t sub_frame_index,
                       std::vector<rtc::ArrayView<float>>* sub_frame_view) {
@@ -209,11 +247,12 @@
 EchoCanceller3::EchoCanceller3(const EchoCanceller3Config& config,
                                int sample_rate_hz,
                                bool use_highpass_filter)
-    : EchoCanceller3(config,
-                     sample_rate_hz,
-                     use_highpass_filter,
-                     std::unique_ptr<BlockProcessor>(
-                         BlockProcessor::Create(config, sample_rate_hz))) {}
+    : EchoCanceller3(
+          AdjustConfig(config),
+          sample_rate_hz,
+          use_highpass_filter,
+          std::unique_ptr<BlockProcessor>(
+              BlockProcessor::Create(AdjustConfig(config), sample_rate_hz))) {}
 EchoCanceller3::EchoCanceller3(const EchoCanceller3Config& config,
                                int sample_rate_hz,
                                bool use_highpass_filter,
diff --git a/modules/audio_processing/aec3/residual_echo_estimator.cc b/modules/audio_processing/aec3/residual_echo_estimator.cc
index bf7e427..f534817 100644
--- a/modules/audio_processing/aec3/residual_echo_estimator.cc
+++ b/modules/audio_processing/aec3/residual_echo_estimator.cc
@@ -17,65 +17,6 @@
 #include "rtc_base/checks.h"
 
 namespace webrtc {
-namespace {
-
-// Estimates the echo generating signal power as gated maximal power over a time
-// window.
-void EchoGeneratingPower(const RenderBuffer& render_buffer,
-                         size_t min_delay,
-                         size_t max_delay,
-                         std::array<float, kFftLengthBy2Plus1>* X2) {
-  X2->fill(0.f);
-  for (size_t k = min_delay; k <= max_delay; ++k) {
-    std::transform(X2->begin(), X2->end(), render_buffer.Spectrum(k).begin(),
-                   X2->begin(),
-                   [](float a, float b) { return std::max(a, b); });
-  }
-
-  // Apply soft noise gate of -78 dBFS.
-  static constexpr float kNoiseGatePower = 27509.42f;
-  std::for_each(X2->begin(), X2->end(), [](float& a) {
-    if (kNoiseGatePower > a) {
-      a = std::max(0.f, a - 0.3f * (kNoiseGatePower - a));
-    }
-  });
-}
-
-constexpr int kNoiseFloorCounterMax = 50;
-constexpr float kNoiseFloorMin = 10.f * 10.f * 128.f * 128.f;
-
-// Updates estimate for the power of the stationary noise component in the
-// render signal.
-void RenderNoisePower(
-    const RenderBuffer& render_buffer,
-    std::array<float, kFftLengthBy2Plus1>* X2_noise_floor,
-    std::array<int, kFftLengthBy2Plus1>* X2_noise_floor_counter) {
-  RTC_DCHECK(X2_noise_floor);
-  RTC_DCHECK(X2_noise_floor_counter);
-
-  const auto render_power = render_buffer.Spectrum(0);
-  RTC_DCHECK_EQ(X2_noise_floor->size(), render_power.size());
-  RTC_DCHECK_EQ(X2_noise_floor_counter->size(), render_power.size());
-
-  // Estimate the stationary noise power in a minimum statistics manner.
-  for (size_t k = 0; k < render_power.size(); ++k) {
-    // Decrease rapidly.
-    if (render_power[k] < (*X2_noise_floor)[k]) {
-      (*X2_noise_floor)[k] = render_power[k];
-      (*X2_noise_floor_counter)[k] = 0;
-    } else {
-      // Increase in a delayed, leaky manner.
-      if ((*X2_noise_floor_counter)[k] >= kNoiseFloorCounterMax) {
-        (*X2_noise_floor)[k] =
-            std::max((*X2_noise_floor)[k] * 1.1f, kNoiseFloorMin);
-      } else {
-        ++(*X2_noise_floor_counter)[k];
-      }
-    }
-  }
-}
-
-}  // namespace
 
 ResidualEchoEstimator::ResidualEchoEstimator(const EchoCanceller3Config& config)
     : config_(config), S2_old_(config_.filter.main.length_blocks) {
@@ -112,15 +53,21 @@
     std::array<float, kFftLengthBy2Plus1> X2;
 
     // Computes the spectral power over the blocks surrounding the delay.
-    EchoGeneratingPower(render_buffer,
-                        std::max(0, aec_state.FilterDelayBlocks() - 1),
-                        aec_state.FilterDelayBlocks() + 3, &X2);
+    size_t window_start = std::max(
+        0, aec_state.FilterDelayBlocks() -
+               static_cast<int>(config_.echo_model.render_pre_window_size));
+    size_t window_end =
+        aec_state.FilterDelayBlocks() +
+        static_cast<int>(config_.echo_model.render_post_window_size);
+    EchoGeneratingPower(render_buffer, window_start, window_end, &X2);
 
     // Subtract the stationary noise power to avoid stationary noise causing
     // excessive echo suppression.
-    std::transform(
-        X2.begin(), X2.end(), X2_noise_floor_.begin(), X2.begin(),
-        [](float a, float b) { return std::max(0.f, a - 10.f * b); });
+    std::transform(X2.begin(), X2.end(), X2_noise_floor_.begin(), X2.begin(),
+                   [&](float a, float b) {
+                     return std::max(
+                         0.f, a - config_.echo_model.stationary_gate_slope * b);
+                   });
 
     NonLinearEstimate(aec_state.SaturatedEcho(), aec_state.EchoPathGain(), X2,
                       Y2, R2);
@@ -144,8 +91,8 @@
 }
 
 void ResidualEchoEstimator::Reset() {
-  X2_noise_floor_counter_.fill(kNoiseFloorCounterMax);
-  X2_noise_floor_.fill(kNoiseFloorMin);
+  X2_noise_floor_counter_.fill(config_.echo_model.noise_floor_hold);
+  X2_noise_floor_.fill(config_.echo_model.min_noise_floor_power);
   R2_reverb_.fill(0.f);
   R2_old_.fill(0.f);
   R2_hold_counter_.fill(0.f);
@@ -186,9 +133,12 @@
 
     // Compute the residual echo by holding a maximum echo powers and an echo
     // fading corresponding to a room with an RT60 value of about 50 ms.
-    (*R2)[k] = R2_hold_counter_[k] < 2
-                   ? std::max((*R2)[k], R2_old_[k])
-                   : std::min((*R2)[k] + R2_old_[k] * 0.1f, Y2[k]);
+    (*R2)[k] =
+        R2_hold_counter_[k] < config_.echo_model.nonlinear_hold
+            ? std::max((*R2)[k], R2_old_[k])
+            : std::min(
+                  (*R2)[k] + R2_old_[k] * config_.echo_model.nonlinear_release,
+                  Y2[k]);
   }
 }
 
@@ -233,4 +183,56 @@
                  std::plus<float>());
 }
 
+void ResidualEchoEstimator::EchoGeneratingPower(
+    const RenderBuffer& render_buffer,
+    size_t min_delay,
+    size_t max_delay,
+    std::array<float, kFftLengthBy2Plus1>* X2) const {
+  X2->fill(0.f);
+  for (size_t k = min_delay; k <= max_delay; ++k) {
+    std::transform(X2->begin(), X2->end(), render_buffer.Spectrum(k).begin(),
+                   X2->begin(),
+                   [](float a, float b) { return std::max(a, b); });
+  }
+
+  // Apply soft noise gate.
+  std::for_each(X2->begin(), X2->end(), [&](float& a) {
+    if (config_.echo_model.noise_gate_power > a) {
+      a = std::max(0.f, a - config_.echo_model.noise_gate_slope *
+                                (config_.echo_model.noise_gate_power - a));
+    }
+  });
+}
+
+void ResidualEchoEstimator::RenderNoisePower(
+    const RenderBuffer& render_buffer,
+    std::array<float, kFftLengthBy2Plus1>* X2_noise_floor,
+    std::array<int, kFftLengthBy2Plus1>* X2_noise_floor_counter) const {
+  RTC_DCHECK(X2_noise_floor);
+  RTC_DCHECK(X2_noise_floor_counter);
+
+  const auto render_power = render_buffer.Spectrum(0);
+  RTC_DCHECK_EQ(X2_noise_floor->size(), render_power.size());
+  RTC_DCHECK_EQ(X2_noise_floor_counter->size(), render_power.size());
+
+  // Estimate the stationary noise power in a minimum statistics manner.
+  for (size_t k = 0; k < render_power.size(); ++k) {
+    // Decrease rapidly.
+    if (render_power[k] < (*X2_noise_floor)[k]) {
+      (*X2_noise_floor)[k] = render_power[k];
+      (*X2_noise_floor_counter)[k] = 0;
+    } else {
+      // Increase in a delayed, leaky manner.
+      if ((*X2_noise_floor_counter)[k] >=
+          static_cast<int>(config_.echo_model.noise_floor_hold)) {
+        (*X2_noise_floor)[k] =
+            std::max((*X2_noise_floor)[k] * 1.1f,
+                     config_.echo_model.min_noise_floor_power);
+      } else {
+        ++(*X2_noise_floor_counter)[k];
+      }
+    }
+  }
+}
+
 }  // namespace webrtc
diff --git a/modules/audio_processing/aec3/residual_echo_estimator.h b/modules/audio_processing/aec3/residual_echo_estimator.h
index 3758114..1222d54 100644
--- a/modules/audio_processing/aec3/residual_echo_estimator.h
+++ b/modules/audio_processing/aec3/residual_echo_estimator.h
@@ -61,6 +61,21 @@
                      size_t delay,
                      float reverb_decay_factor,
                      std::array<float, kFftLengthBy2Plus1>* R2);
+
+  // Estimates the echo generating signal power as gated maximal power over a
+  // time window.
+  void EchoGeneratingPower(const RenderBuffer& render_buffer,
+                           size_t min_delay,
+                           size_t max_delay,
+                           std::array<float, kFftLengthBy2Plus1>* X2) const;
+
+  // Updates estimate for the power of the stationary noise component in the
+  // render signal.
+  void RenderNoisePower(
+      const RenderBuffer& render_buffer,
+      std::array<float, kFftLengthBy2Plus1>* X2_noise_floor,
+      std::array<int, kFftLengthBy2Plus1>* X2_noise_floor_counter) const;
+
   const EchoCanceller3Config config_;
   std::array<float, kFftLengthBy2Plus1> R2_old_;
   std::array<int, kFftLengthBy2Plus1> R2_hold_counter_;
diff --git a/modules/audio_processing/test/audio_processing_simulator.cc b/modules/audio_processing/test/audio_processing_simulator.cc
index f7b538b..d281964 100644
--- a/modules/audio_processing/test/audio_processing_simulator.cc
+++ b/modules/audio_processing/test/audio_processing_simulator.cc
@@ -248,6 +248,23 @@
               &cfg.echo_removal_control.has_clock_drift);
   }
 
+  if (rtc::GetValueFromJsonObject(root, "echo_model", &section)) {
+    Json::Value subsection;
+    ReadParam(section, "noise_floor_hold", &cfg.echo_model.noise_floor_hold);
+    ReadParam(section, "min_noise_floor_power",
+              &cfg.echo_model.min_noise_floor_power);
+    ReadParam(section, "stationary_gate_slope",
+              &cfg.echo_model.stationary_gate_slope);
+    ReadParam(section, "noise_gate_power", &cfg.echo_model.noise_gate_power);
+    ReadParam(section, "noise_gate_slope", &cfg.echo_model.noise_gate_slope);
+    ReadParam(section, "render_pre_window_size",
+              &cfg.echo_model.render_pre_window_size);
+    ReadParam(section, "render_post_window_size",
+              &cfg.echo_model.render_post_window_size);
+    ReadParam(section, "nonlinear_hold", &cfg.echo_model.nonlinear_hold);
+    ReadParam(section, "nonlinear_release", &cfg.echo_model.nonlinear_release);
+  }
+
   std::cout << std::endl;
   return cfg;
 }