Echo canceller 3 improvements for setups with headsets. This CL improves the echo cancellation performance on setups where headsets are used (systems with such low echo path gain that no correlation between the render and capture signals can be found) in 4 ways: 1) The echo path gain for systems with headsets is assumed to be nonzero. 2) The stationary component of the render power is not included in nonlinear echo power estimate. 3) The behavior after echo path gain changes is made less cautious. 4) The detection of systems with headsets is made more rapid. BUG=chromium:712651, webrtc:6018 Review-Url: https://codereview.webrtc.org/2823903003 Cr-Commit-Position: refs/heads/master@{#17768}

commit: e52a203a56c7876bc226c04464563555433fa0c1 [log] [tgz]
author: peah <peah@webrtc.org> Wed Apr 19 16:03:40 2017
committer: Commit bot <commit-bot@chromium.org> Wed Apr 19 16:03:40 2017
tree: 5faf1cbb045fe777c5304596ffa2631955a5c55b
parent: d5c77abbaa893f0940acf584ab1abfab1f14345f [diff]
diff --git a/webrtc/modules/audio_processing/aec3/aec_state.cc b/webrtc/modules/audio_processing/aec3/aec_state.cc
index 01c3c44..de4a0c1 100644
--- a/webrtc/modules/audio_processing/aec3/aec_state.cc
+++ b/webrtc/modules/audio_processing/aec3/aec_state.cc

@@ -22,7 +22,7 @@
 namespace webrtc {
 namespace {
 
-constexpr size_t kEchoPathChangeConvergenceBlocks = 4 * kNumBlocksPerSecond;
+constexpr size_t kEchoPathChangeConvergenceBlocks = 2 * kNumBlocksPerSecond;
 constexpr size_t kSaturationLeakageBlocks = 20;
 
 // Computes delay of the adaptive filter.
@@ -89,7 +89,6 @@
     const EchoPathVariability& echo_path_variability) {
   if (echo_path_variability.AudioPathChanged()) {
     blocks_since_last_saturation_ = 0;
-    active_render_blocks_ = 0;
     usable_linear_estimate_ = false;
     echo_leakage_detected_ = false;
     capture_signal_saturation_ = false;
@@ -98,6 +97,8 @@
 
     if (echo_path_variability.delay_change) {
       force_zero_gain_counter_ = 0;
+      blocks_with_filter_adaptation_ = 0;
+      render_received_ = false;
       force_zero_gain_ = true;
       echo_path_change_counter_ = kEchoPathChangeCounterMax;
     }
@@ -121,7 +122,11 @@
   // Update counters.
   const float x_energy = std::inner_product(x.begin(), x.end(), x.begin(), 0.f);
   const bool active_render_block = x_energy > 10000.f * kFftLengthBy2;
-  active_render_blocks_ += active_render_block ? 1 : 0;
+  if (active_render_block) {
+    render_received_ = true;
+  }
+  blocks_with_filter_adaptation_ +=
+      (active_render_block && (!SaturatedCapture()) ? 1 : 0);
   --echo_path_change_counter_;
 
   // Force zero echo suppression gain after an echo path change to allow at
@@ -145,6 +150,8 @@
   }
 
   // Detect and flag echo saturation.
+  // TODO(peah): Add the delay in this computation to ensure that the render and
+  // capture signals are properly aligned.
   RTC_DCHECK_LT(0, x.size());
   const float max_sample = fabs(*std::max_element(
       x.begin(), x.end(), [](float a, float b) { return a * a < b * b; }));
@@ -160,14 +167,17 @@
   // Flag whether the linear filter estimate is usable.
   usable_linear_estimate_ =
       (!echo_saturation_) &&
-      active_render_blocks_ > kEchoPathChangeConvergenceBlocks &&
+      (!render_received_ ||
+       blocks_with_filter_adaptation_ > kEchoPathChangeConvergenceBlocks) &&
       filter_delay_ && echo_path_change_counter_ <= 0;
 
   // After an amount of active render samples for which an echo should have been
   // detected in the capture signal if the ERL was not infinite, flag that a
   // headset is used.
-  headset_detected_ = !external_delay_ && !filter_delay_ &&
-                      active_render_blocks_ >= kEchoPathChangeConvergenceBlocks;
+  headset_detected_ =
+      !external_delay_ && !filter_delay_ &&
+      (!render_received_ ||
+       blocks_with_filter_adaptation_ >= kEchoPathChangeConvergenceBlocks);
 }
 
 }  // namespace webrtc

diff --git a/webrtc/modules/audio_processing/aec3/aec_state.h b/webrtc/modules/audio_processing/aec3/aec_state.h
index 387c6ea..519665f 100644
--- a/webrtc/modules/audio_processing/aec3/aec_state.h
+++ b/webrtc/modules/audio_processing/aec3/aec_state.h

@@ -41,7 +41,8 @@
   bool EchoLeakageDetected() const { return echo_leakage_detected_; }
 
   // Returns whether the render signal is currently active.
-  bool ActiveRender() const { return active_render_blocks_ > 200; }
+  // TODO(peah): Deprecate this in an upcoming CL.
+  bool ActiveRender() const { return blocks_with_filter_adaptation_ > 200; }
 
   // Returns the ERLE.
   const std::array<float, kFftLengthBy2Plus1>& Erle() const {
@@ -99,7 +100,7 @@
   ErlEstimator erl_estimator_;
   ErleEstimator erle_estimator_;
   int echo_path_change_counter_;
-  size_t active_render_blocks_ = 0;
+  size_t blocks_with_filter_adaptation_ = 0;
   bool usable_linear_estimate_ = false;
   bool echo_leakage_detected_ = false;
   bool capture_signal_saturation_ = false;
@@ -107,6 +108,7 @@
   bool headset_detected_ = false;
   float previous_max_sample_ = 0.f;
   bool force_zero_gain_ = false;
+  bool render_received_ = false;
   size_t force_zero_gain_counter_ = 0;
   rtc::Optional<size_t> filter_delay_;
   rtc::Optional<size_t> external_delay_;

diff --git a/webrtc/modules/audio_processing/aec3/residual_echo_estimator.cc b/webrtc/modules/audio_processing/aec3/residual_echo_estimator.cc
index 0a9ecac..d36720f 100644
--- a/webrtc/modules/audio_processing/aec3/residual_echo_estimator.cc
+++ b/webrtc/modules/audio_processing/aec3/residual_echo_estimator.cc

@@ -40,6 +40,43 @@
   });
 }
 
+constexpr int kNoiseFloorCounterMax = 50;
+constexpr float kNoiseFloorMin = 10.f * 10.f * 128.f * 128.f;
+
+// Updates estimate for the power of the stationary noise component in the
+// render signal.
+void RenderNoisePower(
+    const RenderBuffer& render_buffer,
+    std::array<float, kFftLengthBy2Plus1>* X2_noise_floor,
+    std::array<int, kFftLengthBy2Plus1>* X2_noise_floor_counter) {
+  RTC_DCHECK(X2_noise_floor);
+  RTC_DCHECK(X2_noise_floor_counter);
+
+  const auto render_power = render_buffer.Spectrum(0);
+  RTC_DCHECK_EQ(X2_noise_floor->size(), render_power.size());
+  RTC_DCHECK_EQ(X2_noise_floor_counter->size(), render_power.size());
+
+  // Estimate the stationary noise power in a minimum statistics manner.
+  for (size_t k = 0; k < render_power.size(); ++k) {
+    // Decrease rapidly.
+    if (render_power[k] < (*X2_noise_floor)[k]) {
+      (*X2_noise_floor)[k] = render_power[k];
+      (*X2_noise_floor_counter)[k] = 0;
+    } else {
+      // Increase in a delayed, leaky manner.
+      if ((*X2_noise_floor_counter)[k] >= kNoiseFloorCounterMax) {
+        (*X2_noise_floor)[k] =
+            std::max((*X2_noise_floor)[k] * 1.1f, kNoiseFloorMin);
+      } else {
+        ++(*X2_noise_floor_counter)[k];
+      }
+    }
+  }
+}
+
+// Assume a minimum echo path gain of -33 dB for headsets.
+constexpr float kHeadsetEchoPathGain = 0.0005f;
+
 }  // namespace
 
 ResidualEchoEstimator::ResidualEchoEstimator() {
@@ -57,28 +94,19 @@
     std::array<float, kFftLengthBy2Plus1>* R2) {
   RTC_DCHECK(R2);
 
-  // Return zero residual echo power when a headset is detected.
-  if (aec_state.HeadsetDetected()) {
-    if (!headset_detected_cached_) {
-      Reset();
-      headset_detected_cached_ = true;
-    }
-    R2->fill(0.f);
-    return;
-  } else {
-    headset_detected_cached_ = false;
-  }
-
   const rtc::Optional<size_t> delay =
       aec_state.FilterDelay()
           ? aec_state.FilterDelay()
           : (aec_state.ExternalDelay() ? aec_state.ExternalDelay()
                                        : rtc::Optional<size_t>());
 
+  // Estimate the power of the stationary noise in the render signal.
+  RenderNoisePower(render_buffer, &X2_noise_floor_, &X2_noise_floor_counter_);
+
   // Estimate the residual echo power.
   const bool use_linear_echo_power =
       aec_state.UsableLinearEstimate() && using_subtractor_output;
-  if (use_linear_echo_power) {
+  if (use_linear_echo_power && !aec_state.HeadsetDetected()) {
     RTC_DCHECK(aec_state.FilterDelay());
     const int filter_delay = *aec_state.FilterDelay();
     LinearEstimate(S2_linear, aec_state.Erle(), filter_delay, R2);
@@ -102,7 +130,15 @@
                           kResidualEchoPowerRenderWindowSize - 1, &X2);
     }
 
-    NonLinearEstimate(X2, Y2, R2);
+    // Subtract the stationary noise power to avoid stationary noise causing
+    // excessive echo suppression.
+    std::transform(
+        X2.begin(), X2.end(), X2_noise_floor_.begin(), X2.begin(),
+        [](float a, float b) { return std::max(0.f, a - 10.f * b); });
+
+    NonLinearEstimate(
+        aec_state.HeadsetDetected() ? kHeadsetEchoPathGain : kFixedEchoPathGain,
+        X2, Y2, R2);
     AddEchoReverb(*R2, aec_state.SaturatedEcho(),
                   std::min(static_cast<size_t>(kAdaptiveFilterLength),
                            delay.value_or(kAdaptiveFilterLength)),
@@ -119,6 +155,8 @@
 }
 
 void ResidualEchoEstimator::Reset() {
+  X2_noise_floor_counter_.fill(kNoiseFloorCounterMax);
+  X2_noise_floor_.fill(kNoiseFloorMin);
   R2_reverb_.fill(0.f);
   R2_old_.fill(0.f);
   R2_hold_counter_.fill(0.f);
@@ -141,14 +179,13 @@
 }
 
 void ResidualEchoEstimator::NonLinearEstimate(
+    float echo_path_gain,
     const std::array<float, kFftLengthBy2Plus1>& X2,
     const std::array<float, kFftLengthBy2Plus1>& Y2,
     std::array<float, kFftLengthBy2Plus1>* R2) {
   // Compute preliminary residual echo.
-  // TODO(peah): Try to make this adaptive. Currently the gain is hardcoded to
-  // 20 dB.
   std::transform(X2.begin(), X2.end(), R2->begin(),
-                 [](float a) { return a * kFixedEchoPathGain; });
+                 [echo_path_gain](float a) { return a * echo_path_gain; });
 
   for (size_t k = 0; k < R2->size(); ++k) {
     // Update hold counter.

diff --git a/webrtc/modules/audio_processing/aec3/residual_echo_estimator.h b/webrtc/modules/audio_processing/aec3/residual_echo_estimator.h
index 6c8a7b2..c8e6a28 100644
--- a/webrtc/modules/audio_processing/aec3/residual_echo_estimator.h
+++ b/webrtc/modules/audio_processing/aec3/residual_echo_estimator.h

@@ -48,7 +48,8 @@
 
   // Estimates the residual echo power based on the estimate of the echo path
   // gain.
-  void NonLinearEstimate(const std::array<float, kFftLengthBy2Plus1>& X2,
+  void NonLinearEstimate(float echo_path_gain,
+                         const std::array<float, kFftLengthBy2Plus1>& X2,
                          const std::array<float, kFftLengthBy2Plus1>& Y2,
                          std::array<float, kFftLengthBy2Plus1>* R2);
 
@@ -66,7 +67,8 @@
   int S2_old_index_ = 0;
   std::array<std::array<float, kFftLengthBy2Plus1>, kAdaptiveFilterLength>
       S2_old_;
-  bool headset_detected_cached_ = false;
+  std::array<float, kFftLengthBy2Plus1> X2_noise_floor_;
+  std::array<int, kFftLengthBy2Plus1> X2_noise_floor_counter_;
 
   RTC_DISALLOW_COPY_AND_ASSIGN(ResidualEchoEstimator);
 };
commit	e52a203a56c7876bc226c04464563555433fa0c1	[log] [tgz]
author	peah <peah@webrtc.org>	Wed Apr 19 16:03:40 2017
committer	Commit bot <commit-bot@chromium.org>	Wed Apr 19 16:03:40 2017
tree	5faf1cbb045fe777c5304596ffa2631955a5c55b
parent	d5c77abbaa893f0940acf584ab1abfab1f14345f [diff]