AEC3: Prevent transparent mode from leaking low volume echo

This change makes the transparent mode classifier consider filter
convergence also for microphone signals with very low volume. This is
in order to prevent entering transparent mode when there is low, but
audible, echo.

Furthermore, the suppression gain during transparent mode is restored
to avoid leaks when the echo is too low to be reliably detected by
filter convergence.

Bug: webrtc:10232, chromium:1140452
Change-Id: Idd4f40c4aee7c20baa444afaa5ec604eb65bcfd0
Reviewed-on: https://webrtc-review.googlesource.com/c/src/+/189786
Reviewed-by: Per Ã…hgren <peah@webrtc.org>
Commit-Queue: Gustaf Ullberg <gustaf@webrtc.org>
Cr-Commit-Position: refs/heads/master@{#32461}
diff --git a/modules/audio_processing/aec3/aec_state.cc b/modules/audio_processing/aec3/aec_state.cc
index df56c3a..c736109 100644
--- a/modules/audio_processing/aec3/aec_state.cc
+++ b/modules/audio_processing/aec3/aec_state.cc
@@ -197,8 +197,10 @@
 
   // Analyze the filter outputs and filters.
   bool any_filter_converged;
+  bool any_coarse_filter_converged;
   bool all_filters_diverged;
   subtractor_output_analyzer_.Update(subtractor_output, &any_filter_converged,
+                                     &any_coarse_filter_converged,
                                      &all_filters_diverged);
 
   bool any_filter_consistent;
@@ -272,10 +274,10 @@
 
   // Detect whether the transparent mode should be activated.
   if (transparent_state_) {
-    transparent_state_->Update(delay_state_.MinDirectPathFilterDelay(),
-                               any_filter_consistent, any_filter_converged,
-                               all_filters_diverged, active_render,
-                               SaturatedCapture());
+    transparent_state_->Update(
+        delay_state_.MinDirectPathFilterDelay(), any_filter_consistent,
+        any_filter_converged, any_coarse_filter_converged, all_filters_diverged,
+        active_render, SaturatedCapture());
   }
 
   // Analyze the quality of the filter.
@@ -312,6 +314,8 @@
   data_dumper_->DumpRaw("aec3_capture_saturation", SaturatedCapture());
   data_dumper_->DumpRaw("aec3_echo_saturation", SaturatedEcho());
   data_dumper_->DumpRaw("aec3_any_filter_converged", any_filter_converged);
+  data_dumper_->DumpRaw("aec3_any_coarse_filter_converged",
+                        any_coarse_filter_converged);
   data_dumper_->DumpRaw("aec3_all_filters_diverged", all_filters_diverged);
 
   data_dumper_->DumpRaw("aec3_external_delay_avaliable",
diff --git a/modules/audio_processing/aec3/residual_echo_estimator.cc b/modules/audio_processing/aec3/residual_echo_estimator.cc
index 46db233..e352cf5 100644
--- a/modules/audio_processing/aec3/residual_echo_estimator.cc
+++ b/modules/audio_processing/aec3/residual_echo_estimator.cc
@@ -23,15 +23,10 @@
 namespace webrtc {
 namespace {
 
-constexpr float kDefaultTransparentModeGain = 0.f;
+constexpr float kDefaultTransparentModeGain = 0.01f;
 
 float GetTransparentModeGain() {
-  if (field_trial::IsEnabled(
-          "WebRTC-Aec3NoSuppressionInTransparentModeKillSwitch")) {
-    return 0.01f;
-  } else {
-    return kDefaultTransparentModeGain;
-  }
+  return kDefaultTransparentModeGain;
 }
 
 float GetEarlyReflectionsDefaultModeGain(
diff --git a/modules/audio_processing/aec3/subtractor_output_analyzer.cc b/modules/audio_processing/aec3/subtractor_output_analyzer.cc
index 8b22185..baf0600 100644
--- a/modules/audio_processing/aec3/subtractor_output_analyzer.cc
+++ b/modules/audio_processing/aec3/subtractor_output_analyzer.cc
@@ -22,12 +22,14 @@
 void SubtractorOutputAnalyzer::Update(
     rtc::ArrayView<const SubtractorOutput> subtractor_output,
     bool* any_filter_converged,
+    bool* any_coarse_filter_converged,
     bool* all_filters_diverged) {
   RTC_DCHECK(any_filter_converged);
   RTC_DCHECK(all_filters_diverged);
   RTC_DCHECK_EQ(subtractor_output.size(), filters_converged_.size());
 
   *any_filter_converged = false;
+  *any_coarse_filter_converged = false;
   *all_filters_diverged = true;
 
   for (size_t ch = 0; ch < subtractor_output.size(); ++ch) {
@@ -36,16 +38,21 @@
     const float e2_coarse = subtractor_output[ch].e2_coarse;
 
     constexpr float kConvergenceThreshold = 50 * 50 * kBlockSize;
+    constexpr float kConvergenceThresholdLowLevel = 20 * 20 * kBlockSize;
     bool refined_filter_converged =
         e2_refined < 0.5f * y2 && y2 > kConvergenceThreshold;
-    bool coarse_filter_converged =
+    bool coarse_filter_converged_strict =
         e2_coarse < 0.05f * y2 && y2 > kConvergenceThreshold;
+    bool coarse_filter_converged_relaxed =
+        e2_coarse < 0.2f * y2 && y2 > kConvergenceThresholdLowLevel;
     float min_e2 = std::min(e2_refined, e2_coarse);
     bool filter_diverged = min_e2 > 1.5f * y2 && y2 > 30.f * 30.f * kBlockSize;
     filters_converged_[ch] =
-        refined_filter_converged || coarse_filter_converged;
+        refined_filter_converged || coarse_filter_converged_strict;
 
     *any_filter_converged = *any_filter_converged || filters_converged_[ch];
+    *any_coarse_filter_converged =
+        *any_coarse_filter_converged || coarse_filter_converged_relaxed;
     *all_filters_diverged = *all_filters_diverged && filter_diverged;
   }
 }
diff --git a/modules/audio_processing/aec3/subtractor_output_analyzer.h b/modules/audio_processing/aec3/subtractor_output_analyzer.h
index 5328ae7..32707db 100644
--- a/modules/audio_processing/aec3/subtractor_output_analyzer.h
+++ b/modules/audio_processing/aec3/subtractor_output_analyzer.h
@@ -26,6 +26,7 @@
   // Analyses the subtractor output.
   void Update(rtc::ArrayView<const SubtractorOutput> subtractor_output,
               bool* any_filter_converged,
+              bool* any_coarse_filter_converged,
               bool* all_filters_diverged);
 
   const std::vector<bool>& ConvergedFilters() const {
diff --git a/modules/audio_processing/aec3/transparent_mode.cc b/modules/audio_processing/aec3/transparent_mode.cc
index 1820e16..3ed0980 100644
--- a/modules/audio_processing/aec3/transparent_mode.cc
+++ b/modules/audio_processing/aec3/transparent_mode.cc
@@ -46,6 +46,7 @@
   void Update(int filter_delay_blocks,
               bool any_filter_consistent,
               bool any_filter_converged,
+              bool any_coarse_filter_converged,
               bool all_filters_diverged,
               bool active_render,
               bool saturated_capture) override {
@@ -56,9 +57,9 @@
     // there is no echo present in the microphone signal.
 
     // The constants have been obtained by observing active_render and
-    // any_filter_converged under varying call scenarios. They have further been
-    // hand tuned to prefer normal state during uncertain regions (to avoid echo
-    // leaks).
+    // any_coarse_filter_converged under varying call scenarios. They
+    // have further been hand tuned to prefer normal state during uncertain
+    // regions (to avoid echo leaks).
 
     // The model is only updated during active render.
     if (!active_render)
@@ -69,8 +70,8 @@
 
     // Probability of observing converged filters in states "normal" and
     // "transparent" during active render.
-    constexpr float kConvergedNormal = 0.03f;
-    constexpr float kConvergedTransparent = 0.005f;
+    constexpr float kConvergedNormal = 0.01f;
+    constexpr float kConvergedTransparent = 0.001f;
 
     // Probability of transitioning to transparent state from normal state and
     // transparent state respectively.
@@ -92,7 +93,7 @@
     const float prob_transition_normal = 1.f - prob_transition_transparent;
 
     // Observed output.
-    const int out = any_filter_converged;
+    const int out = static_cast<int>(any_coarse_filter_converged);
 
     // Joint probabilites of the observed output and respective states.
     const float prob_joint_normal = prob_transition_normal * kB[0][out];
@@ -142,6 +143,7 @@
   void Update(int filter_delay_blocks,
               bool any_filter_consistent,
               bool any_filter_converged,
+              bool any_coarse_filter_converged,
               bool all_filters_diverged,
               bool active_render,
               bool saturated_capture) override {
diff --git a/modules/audio_processing/aec3/transparent_mode.h b/modules/audio_processing/aec3/transparent_mode.h
index b1be69b..bc5dd03 100644
--- a/modules/audio_processing/aec3/transparent_mode.h
+++ b/modules/audio_processing/aec3/transparent_mode.h
@@ -37,6 +37,7 @@
   virtual void Update(int filter_delay_blocks,
                       bool any_filter_consistent,
                       bool any_filter_converged,
+                      bool any_coarse_filter_converged,
                       bool all_filters_diverged,
                       bool active_render,
                       bool saturated_capture) = 0;