AEC3: Avoid entering non-linear mode when the filter is slightly diverged

This CL changes the behavior when the main filter diverges.
Instead of entering non-linear mode, the AEC continues to operate in
linear mode but estimates the residual echo differently. R2 is S2
scaled by a factor of 10.

Bug: chromium:857018,webrtc:9462
Change-Id: I41212efe164ad319cf38a163cdf9d3ea151e0997
Reviewed-on: https://webrtc-review.googlesource.com/85981
Commit-Queue: Gustaf Ullberg <gustaf@webrtc.org>
Reviewed-by: Jesus de Vicente Pena <devicentepena@webrtc.org>
Cr-Commit-Position: refs/heads/master@{#23772}
diff --git a/modules/audio_processing/aec3/aec_state.cc b/modules/audio_processing/aec3/aec_state.cc
index c6198b8..9c6314e 100644
--- a/modules/audio_processing/aec3/aec_state.cc
+++ b/modules/audio_processing/aec3/aec_state.cc
@@ -38,6 +38,11 @@
       "WebRTC-Aec3EnforceDelayAfterRealignmentKillSwitch");
 }
 
+bool EnableLinearModeWithDivergedFilter() {
+  return !field_trial::IsEnabled(
+      "WebRTC-Aec3LinearModeWithDivergedFilterKillSwitch");
+}
+
 float ComputeGainRampupIncrease(const EchoCanceller3Config& config) {
   const auto& c = config.echo_removal_control.gain_rampup;
   return powf(1.f / c.first_non_zero_gain, 1.f / c.non_zero_gain_blocks);
@@ -59,6 +64,8 @@
           EnableStationaryRenderImprovements() &&
           config_.echo_audibility.use_stationary_properties),
       enforce_delay_after_realignment_(EnableEnforcingDelayAfterRealignment()),
+      allow_linear_mode_with_diverged_filter_(
+          EnableLinearModeWithDivergedFilter()),
       erle_estimator_(config.erle.min, config.erle.max_l, config.erle.max_h),
       max_render_(config_.filter.main.length_blocks, 0.f),
       reverb_decay_(fabsf(config_.ep_strength.default_len)),
@@ -77,6 +84,7 @@
     filter_analyzer_.Reset();
     blocks_since_last_saturation_ = 0;
     usable_linear_estimate_ = false;
+    diverged_linear_filter_ = false;
     capture_signal_saturation_ = false;
     echo_saturation_ = false;
     std::fill(max_render_.begin(), max_render_.end(), 0.f);
@@ -269,10 +277,13 @@
   if (!config_.echo_removal_control.linear_and_stable_echo_path) {
     usable_linear_estimate_ =
         usable_linear_estimate_ && recently_converged_filter;
-    usable_linear_estimate_ = usable_linear_estimate_ && !diverged_filter;
+    if (!allow_linear_mode_with_diverged_filter_) {
+      usable_linear_estimate_ = usable_linear_estimate_ && !diverged_filter;
+    }
   }
 
   use_linear_filter_output_ = usable_linear_estimate_ && !TransparentMode();
+  diverged_linear_filter_ = diverged_filter;
 
   UpdateReverb(adaptive_filter_impulse_response);
 
diff --git a/modules/audio_processing/aec3/aec_state.h b/modules/audio_processing/aec3/aec_state.h
index d7d4d8c..5c90128 100644
--- a/modules/audio_processing/aec3/aec_state.h
+++ b/modules/audio_processing/aec3/aec_state.h
@@ -69,6 +69,14 @@
     return erle_estimator_.Erle();
   }
 
+  // Returns any uncertainty in the ERLE estimate.
+  absl::optional<float> ErleUncertainty() const {
+    if (allow_linear_mode_with_diverged_filter_ && diverged_linear_filter_) {
+      return 10.f;
+    }
+    return absl::nullopt;
+  }
+
   // Returns the time-domain ERLE.
   float ErleTimeDomain() const { return erle_estimator_.ErleTimeDomain(); }
 
@@ -159,6 +167,7 @@
   const bool allow_transparent_mode_;
   const bool use_stationary_properties_;
   const bool enforce_delay_after_realignment_;
+  const bool allow_linear_mode_with_diverged_filter_;
   ErlEstimator erl_estimator_;
   ErleEstimator erle_estimator_;
   size_t capture_block_counter_ = 0;
@@ -166,6 +175,7 @@
   size_t blocks_with_proper_filter_adaptation_ = 0;
   size_t blocks_with_active_render_ = 0;
   bool usable_linear_estimate_ = false;
+  bool diverged_linear_filter_ = false;
   bool capture_signal_saturation_ = false;
   bool echo_saturation_ = false;
   bool transparent_mode_ = false;
diff --git a/modules/audio_processing/aec3/residual_echo_estimator.cc b/modules/audio_processing/aec3/residual_echo_estimator.cc
index eaa81e8..f51fbb3 100644
--- a/modules/audio_processing/aec3/residual_echo_estimator.cc
+++ b/modules/audio_processing/aec3/residual_echo_estimator.cc
@@ -100,7 +100,8 @@
   // Estimate the residual echo power.
   if (aec_state.UsableLinearEstimate()) {
     RTC_DCHECK(!aec_state.SaturatedEcho());
-    LinearEstimate(S2_linear, aec_state.Erle(), R2);
+    LinearEstimate(S2_linear, aec_state.Erle(), aec_state.ErleUncertainty(),
+                   R2);
     // Adds the estimated unmodelled echo power to the residual echo power
     // estimate.
     if (echo_reverb_) {
@@ -203,13 +204,20 @@
 void ResidualEchoEstimator::LinearEstimate(
     const std::array<float, kFftLengthBy2Plus1>& S2_linear,
     const std::array<float, kFftLengthBy2Plus1>& erle,
+    absl::optional<float> erle_uncertainty,
     std::array<float, kFftLengthBy2Plus1>* R2) {
   std::fill(R2_hold_counter_.begin(), R2_hold_counter_.end(), 10.f);
-  std::transform(erle.begin(), erle.end(), S2_linear.begin(), R2->begin(),
-                 [](float a, float b) {
-                   RTC_DCHECK_LT(0.f, a);
-                   return b / a;
-                 });
+  if (erle_uncertainty) {
+    for (size_t k = 0; k < R2->size(); ++k) {
+      (*R2)[k] = S2_linear[k] * *erle_uncertainty;
+    }
+  } else {
+    std::transform(erle.begin(), erle.end(), S2_linear.begin(), R2->begin(),
+                   [](float a, float b) {
+                     RTC_DCHECK_LT(0.f, a);
+                     return b / a;
+                   });
+  }
 }
 
 void ResidualEchoEstimator::NonLinearEstimate(
diff --git a/modules/audio_processing/aec3/residual_echo_estimator.h b/modules/audio_processing/aec3/residual_echo_estimator.h
index 5e2affe..cea9e04 100644
--- a/modules/audio_processing/aec3/residual_echo_estimator.h
+++ b/modules/audio_processing/aec3/residual_echo_estimator.h
@@ -56,6 +56,7 @@
   // (ERLE) and the linear power estimate.
   void LinearEstimate(const std::array<float, kFftLengthBy2Plus1>& S2_linear,
                       const std::array<float, kFftLengthBy2Plus1>& erle,
+                      absl::optional<float> erle_uncertainty,
                       std::array<float, kFftLengthBy2Plus1>* R2);
 
   // Estimates the residual echo power based on the estimate of the echo path