AEC3: Prevent diverging coarse filter from influencing the refined filter

After the refined filter has been determined to perform better than
the coarse filter, and the coefficients of the coarse filters are
overwritten by the ones from the refined filter, at least 100 ms have
to pass before the adaptation of the refined filter is allowed to speed
up due to good coarse filter performance.

This change solves the vicious circle described in webrtc:12265, where
the coarse and refined filters can diverge over time.

This feature can be disabled remotely via a kill-switch. When disabled
the AEC output is bit-exact to before the change.

Bug: webrtc:12265,chromium:1155477
Change-Id: Iacd6e325e987dd8a475bb3e8163fee714c65b20a
Reviewed-on: https://webrtc-review.googlesource.com/c/src/+/196501
Reviewed-by: Per Ã…hgren <peah@webrtc.org>
Commit-Queue: Gustaf Ullberg <gustaf@webrtc.org>
Cr-Commit-Position: refs/heads/master@{#32801}
diff --git a/api/audio/echo_canceller3_config.cc b/api/audio/echo_canceller3_config.cc
index aeb809e..5f1923e 100644
--- a/api/audio/echo_canceller3_config.cc
+++ b/api/audio/echo_canceller3_config.cc
@@ -153,6 +153,7 @@
 
   res = res & Limit(&c->filter.config_change_duration_blocks, 0, 100000);
   res = res & Limit(&c->filter.initial_state_seconds, 0.f, 100.f);
+  res = res & Limit(&c->filter.coarse_reset_hangover_blocks, 0, 2500);
 
   res = res & Limit(&c->erle.min, 1.f, 100000.f);
   res = res & Limit(&c->erle.max_l, 1.f, 100000.f);
diff --git a/api/audio/echo_canceller3_config.h b/api/audio/echo_canceller3_config.h
index 3ed11ff..55281af 100644
--- a/api/audio/echo_canceller3_config.h
+++ b/api/audio/echo_canceller3_config.h
@@ -86,6 +86,7 @@
 
     size_t config_change_duration_blocks = 250;
     float initial_state_seconds = 2.5f;
+    int coarse_reset_hangover_blocks = 25;
     bool conservative_initial_phase = false;
     bool enable_coarse_filter_output_usage = true;
     bool use_linear_filter = true;
diff --git a/api/audio/echo_canceller3_config_json.cc b/api/audio/echo_canceller3_config_json.cc
index 907b472..9d10da9 100644
--- a/api/audio/echo_canceller3_config_json.cc
+++ b/api/audio/echo_canceller3_config_json.cc
@@ -223,6 +223,8 @@
               &cfg.filter.config_change_duration_blocks);
     ReadParam(section, "initial_state_seconds",
               &cfg.filter.initial_state_seconds);
+    ReadParam(section, "coarse_reset_hangover_blocks",
+              &cfg.filter.coarse_reset_hangover_blocks);
     ReadParam(section, "conservative_initial_phase",
               &cfg.filter.conservative_initial_phase);
     ReadParam(section, "enable_coarse_filter_output_usage",
@@ -502,6 +504,8 @@
       << config.filter.config_change_duration_blocks << ",";
   ost << "\"initial_state_seconds\": " << config.filter.initial_state_seconds
       << ",";
+  ost << "\"coarse_reset_hangover_blocks\": "
+      << config.filter.coarse_reset_hangover_blocks << ",";
   ost << "\"conservative_initial_phase\": "
       << (config.filter.conservative_initial_phase ? "true" : "false") << ",";
   ost << "\"enable_coarse_filter_output_usage\": "
diff --git a/modules/audio_processing/aec3/aec_state.cc b/modules/audio_processing/aec3/aec_state.cc
index c736109..5b31e3c 100644
--- a/modules/audio_processing/aec3/aec_state.cc
+++ b/modules/audio_processing/aec3/aec_state.cc
@@ -322,6 +322,11 @@
                         external_delay ? 1 : 0);
   data_dumper_->DumpRaw("aec3_filter_tail_freq_resp_est",
                         GetReverbFrequencyResponse());
+  data_dumper_->DumpRaw("aec3_subtractor_y2", subtractor_output[0].y2);
+  data_dumper_->DumpRaw("aec3_subtractor_e2_coarse",
+                        subtractor_output[0].e2_coarse);
+  data_dumper_->DumpRaw("aec3_subtractor_e2_refined",
+                        subtractor_output[0].e2_refined);
 }
 
 AecState::InitialState::InitialState(const EchoCanceller3Config& config)
diff --git a/modules/audio_processing/aec3/refined_filter_update_gain.cc b/modules/audio_processing/aec3/refined_filter_update_gain.cc
index 138329a..db5203d 100644
--- a/modules/audio_processing/aec3/refined_filter_update_gain.cc
+++ b/modules/audio_processing/aec3/refined_filter_update_gain.cc
@@ -73,6 +73,7 @@
     rtc::ArrayView<const float> erl,
     size_t size_partitions,
     bool saturated_capture_signal,
+    bool disallow_leakage_diverged,
     FftData* gain_fft) {
   RTC_DCHECK(gain_fft);
   // Introducing shorter notation to improve readability.
@@ -125,7 +126,7 @@
 
   // H_error = H_error + factor * erl.
   for (size_t k = 0; k < kFftLengthBy2Plus1; ++k) {
-    if (E2_coarse[k] >= E2_refined[k]) {
+    if (E2_refined[k] <= E2_coarse[k] || disallow_leakage_diverged) {
       H_error_[k] += current_config_.leakage_converged * erl[k];
     } else {
       H_error_[k] += current_config_.leakage_diverged * erl[k];
diff --git a/modules/audio_processing/aec3/refined_filter_update_gain.h b/modules/audio_processing/aec3/refined_filter_update_gain.h
index 5730979..ae4fe84 100644
--- a/modules/audio_processing/aec3/refined_filter_update_gain.h
+++ b/modules/audio_processing/aec3/refined_filter_update_gain.h
@@ -51,6 +51,7 @@
                rtc::ArrayView<const float> erl,
                size_t size_partitions,
                bool saturated_capture_signal,
+               bool disallow_leakage_diverged,
                FftData* gain_fft);
 
   // Sets a new config.
diff --git a/modules/audio_processing/aec3/refined_filter_update_gain_unittest.cc b/modules/audio_processing/aec3/refined_filter_update_gain_unittest.cc
index 2393fdd..6fce858 100644
--- a/modules/audio_processing/aec3/refined_filter_update_gain_unittest.cc
+++ b/modules/audio_processing/aec3/refined_filter_update_gain_unittest.cc
@@ -196,7 +196,8 @@
     std::array<float, kFftLengthBy2Plus1> erl;
     ComputeErl(optimization, H2[0], erl);
     refined_gain.Compute(render_power, render_signal_analyzer, output[0], erl,
-                         refined_filter.SizePartitions(), saturation, &G);
+                         refined_filter.SizePartitions(), saturation, false,
+                         &G);
     refined_filter.Adapt(*render_delay_buffer->GetRenderBuffer(), G, &h[0]);
 
     // Update the delay.
@@ -247,7 +248,7 @@
   erl.fill(0.f);
   EXPECT_DEATH(
       gain.Compute(render_power, analyzer, output, erl,
-                   config.filter.refined.length_blocks, false, nullptr),
+                   config.filter.refined.length_blocks, false, false, nullptr),
       "");
 }
 
diff --git a/modules/audio_processing/aec3/subtractor.cc b/modules/audio_processing/aec3/subtractor.cc
index d152299..d10e4ff 100644
--- a/modules/audio_processing/aec3/subtractor.cc
+++ b/modules/audio_processing/aec3/subtractor.cc
@@ -19,11 +19,17 @@
 #include "modules/audio_processing/logging/apm_data_dumper.h"
 #include "rtc_base/checks.h"
 #include "rtc_base/numerics/safe_minmax.h"
+#include "system_wrappers/include/field_trial.h"
 
 namespace webrtc {
 
 namespace {
 
+bool UseCoarseFilterResetHangover() {
+  return !field_trial::IsEnabled(
+      "WebRTC-Aec3CoarseFilterResetHangoverKillSwitch");
+}
+
 void PredictionError(const Aec3Fft& fft,
                      const FftData& S,
                      rtc::ArrayView<const float> y,
@@ -66,12 +72,14 @@
       optimization_(optimization),
       config_(config),
       num_capture_channels_(num_capture_channels),
+      use_coarse_filter_reset_hangover_(UseCoarseFilterResetHangover()),
       refined_filters_(num_capture_channels_),
       coarse_filter_(num_capture_channels_),
       refined_gains_(num_capture_channels_),
       coarse_gains_(num_capture_channels_),
       filter_misadjustment_estimators_(num_capture_channels_),
       poor_coarse_filter_counters_(num_capture_channels_, 0),
+      coarse_filter_reset_hangover_(num_capture_channels_, 0),
       refined_frequency_responses_(
           num_capture_channels_,
           std::vector<std::array<float, kFftLengthBy2Plus1>>(
@@ -228,11 +236,19 @@
 
     // Update the refined filter.
     if (!refined_filters_adjusted) {
+      // Do not allow the performance of the coarse filter to affect the
+      // adaptation speed of the refined filter just after the coarse filter has
+      // been reset.
+      const bool disallow_leakage_diverged =
+          coarse_filter_reset_hangover_[ch] > 0 &&
+          use_coarse_filter_reset_hangover_;
+
       std::array<float, kFftLengthBy2Plus1> erl;
       ComputeErl(optimization_, refined_frequency_responses_[ch], erl);
       refined_gains_[ch]->Compute(X2_refined, render_signal_analyzer, output,
                                   erl, refined_filters_[ch]->SizePartitions(),
-                                  aec_state.SaturatedCapture(), &G);
+                                  aec_state.SaturatedCapture(),
+                                  disallow_leakage_diverged, &G);
     } else {
       G.re.fill(0.f);
       G.im.fill(0.f);
@@ -256,6 +272,8 @@
       coarse_gains_[ch]->Compute(X2_coarse, render_signal_analyzer, E_coarse,
                                  coarse_filter_[ch]->SizePartitions(),
                                  aec_state.SaturatedCapture(), &G);
+      coarse_filter_reset_hangover_[ch] =
+          std::max(coarse_filter_reset_hangover_[ch] - 1, 0);
     } else {
       poor_coarse_filter_counters_[ch] = 0;
       coarse_filter_[ch]->SetFilter(refined_filters_[ch]->SizePartitions(),
@@ -263,6 +281,8 @@
       coarse_gains_[ch]->Compute(X2_coarse, render_signal_analyzer, E_refined,
                                  coarse_filter_[ch]->SizePartitions(),
                                  aec_state.SaturatedCapture(), &G);
+      coarse_filter_reset_hangover_[ch] =
+          config_.filter.coarse_reset_hangover_blocks;
     }
 
     coarse_filter_[ch]->Adapt(render_buffer, G);
diff --git a/modules/audio_processing/aec3/subtractor.h b/modules/audio_processing/aec3/subtractor.h
index 42ca372..560f656 100644
--- a/modules/audio_processing/aec3/subtractor.h
+++ b/modules/audio_processing/aec3/subtractor.h
@@ -120,6 +120,7 @@
   const Aec3Optimization optimization_;
   const EchoCanceller3Config config_;
   const size_t num_capture_channels_;
+  const bool use_coarse_filter_reset_hangover_;
 
   std::vector<std::unique_ptr<AdaptiveFirFilter>> refined_filters_;
   std::vector<std::unique_ptr<AdaptiveFirFilter>> coarse_filter_;
@@ -127,6 +128,7 @@
   std::vector<std::unique_ptr<CoarseFilterUpdateGain>> coarse_gains_;
   std::vector<FilterMisadjustmentEstimator> filter_misadjustment_estimators_;
   std::vector<size_t> poor_coarse_filter_counters_;
+  std::vector<int> coarse_filter_reset_hangover_;
   std::vector<std::vector<std::array<float, kFftLengthBy2Plus1>>>
       refined_frequency_responses_;
   std::vector<std::vector<float>> refined_impulse_responses_;
diff --git a/resources/audio_processing/output_data_float.pb.sha1 b/resources/audio_processing/output_data_float.pb.sha1
index 6c3ab91..c895b96 100644
--- a/resources/audio_processing/output_data_float.pb.sha1
+++ b/resources/audio_processing/output_data_float.pb.sha1
@@ -1 +1 @@
-ed1172c80a1a001a8aa7ac0680a99018cbb7d278
\ No newline at end of file
+365a02046fdb30357d649e73766d2f6eb2b33677
\ No newline at end of file
diff --git a/resources/audio_processing/output_data_float_avx2.pb.sha1 b/resources/audio_processing/output_data_float_avx2.pb.sha1
index 2d4ad0c..12ec621 100644
--- a/resources/audio_processing/output_data_float_avx2.pb.sha1
+++ b/resources/audio_processing/output_data_float_avx2.pb.sha1
@@ -1 +1 @@
-a1dd718a6882bf8033a934e5beec73086cc91240
\ No newline at end of file
+847035cbe0bc7ee0620c32fa5ac857cc5b2c7ec4
\ No newline at end of file