Transparency improvements in the echo canceller 3 This CL adds two changes: -Adaptive adjustment of the echo suppression to both cover the cases when the echo path well covers the room, and when when it does not. -Identification of the case when the echo is too low to be audible and adaptive handling of this case in the echo suppression. BUG=webrtc:7519, webrtc:7956,webrtc:7957 Review-Url: https://codereview.webrtc.org/2974583004 Cr-Commit-Position: refs/heads/master@{#18962}

commit: 2910357621dee4368bd3eaa0040cec82ac230dad [log] [tgz]
author: peah <peah@webrtc.org> Tue Jul 11 09:54:02 2017
committer: Commit Bot <commit-bot@chromium.org> Tue Jul 11 09:54:02 2017
tree: 512ef6314bea9fcb03d5b7e8eb0fadbe9f274dad
parent: 863f03ba38f397c078931d2bd27d40a4d60efc02 [diff]
diff --git a/webrtc/modules/audio_processing/aec3/adaptive_fir_filter.cc b/webrtc/modules/audio_processing/aec3/adaptive_fir_filter.cc
index b372df5..43cc901 100644
--- a/webrtc/modules/audio_processing/aec3/adaptive_fir_filter.cc
+++ b/webrtc/modules/audio_processing/aec3/adaptive_fir_filter.cc

@@ -25,22 +25,6 @@
 
 namespace webrtc {
 
-namespace {
-
-// Constrains the a partiton of the frequency domain filter to be limited in
-// time via setting the relevant time-domain coefficients to zero.
-void Constrain(const Aec3Fft& fft, FftData* H) {
-  std::array<float, kFftLength> h;
-  fft.Ifft(*H, &h);
-  constexpr float kScale = 1.0f / kFftLengthBy2;
-  std::for_each(h.begin(), h.begin() + kFftLengthBy2,
-                [kScale](float& a) { a *= kScale; });
-  std::fill(h.begin() + kFftLengthBy2, h.end(), 0.f);
-  fft.Fft(&h, H);
-}
-
-}  // namespace
-
 namespace aec3 {
 
 // Computes and stores the frequency response of the filter.
@@ -434,6 +418,7 @@
       H2_(size_partitions, std::array<float, kFftLengthBy2Plus1>()) {
   RTC_DCHECK(data_dumper_);
 
+  h_.fill(0.f);
   for (auto& H_j : H_) {
     H_j.Clear();
   }
@@ -446,6 +431,7 @@
 AdaptiveFirFilter::~AdaptiveFirFilter() = default;
 
 void AdaptiveFirFilter::HandleEchoPathChange() {
+  h_.fill(0.f);
   for (auto& H_j : H_) {
     H_j.Clear();
   }
@@ -493,10 +479,7 @@
   }
 
   // Constrain the filter partitions in a cyclic manner.
-  Constrain(fft_, &H_[partition_to_constrain_]);
-  partition_to_constrain_ = partition_to_constrain_ < (H_.size() - 1)
-                                ? partition_to_constrain_ + 1
-                                : 0;
+  Constrain();
 
   // Update the frequency response and echo return loss for the filter.
   switch (optimization_) {
@@ -518,4 +501,25 @@
   }
 }
 
+// Constrains the a partiton of the frequency domain filter to be limited in
+// time via setting the relevant time-domain coefficients to zero.
+void AdaptiveFirFilter::Constrain() {
+  std::array<float, kFftLength> h;
+  fft_.Ifft(H_[partition_to_constrain_], &h);
+
+  constexpr float kScale = 1.0f / kFftLengthBy2;
+  std::for_each(h.begin(), h.begin() + kFftLengthBy2,
+                [kScale](float& a) { a *= kScale; });
+  std::fill(h.begin() + kFftLengthBy2, h.end(), 0.f);
+
+  std::copy(h.begin(), h.begin() + kFftLengthBy2,
+            h_.begin() + partition_to_constrain_ * kFftLengthBy2);
+
+  fft_.Fft(&h, &H_[partition_to_constrain_]);
+
+  partition_to_constrain_ = partition_to_constrain_ < (H_.size() - 1)
+                                ? partition_to_constrain_ + 1
+                                : 0;
+}
+
 }  // namespace webrtc

diff --git a/webrtc/modules/audio_processing/aec3/adaptive_fir_filter.h b/webrtc/modules/audio_processing/aec3/adaptive_fir_filter.h
index d77cbca..6fae158 100644
--- a/webrtc/modules/audio_processing/aec3/adaptive_fir_filter.h
+++ b/webrtc/modules/audio_processing/aec3/adaptive_fir_filter.h

@@ -119,6 +119,12 @@
     return H2_;
   }
 
+  // Returns the estimate of the impulse response.
+  const std::array<float, kAdaptiveFilterTimeDomainLength>&
+  FilterImpulseResponse() const {
+    return h_;
+  }
+
   void DumpFilter(const char* name) {
     for (auto& H : H_) {
       data_dumper_->DumpRaw(name, H.re);
@@ -127,11 +133,15 @@
   }
 
  private:
+  // Constrain the filter partitions in a cyclic manner.
+  void Constrain();
+
   ApmDataDumper* const data_dumper_;
   const Aec3Fft fft_;
   const Aec3Optimization optimization_;
   std::vector<FftData> H_;
   std::vector<std::array<float, kFftLengthBy2Plus1>> H2_;
+  std::array<float, kAdaptiveFilterTimeDomainLength> h_;
   std::array<float, kFftLengthBy2Plus1> erl_;
   size_t partition_to_constrain_ = 0;
 

diff --git a/webrtc/modules/audio_processing/aec3/adaptive_fir_filter_unittest.cc b/webrtc/modules/audio_processing/aec3/adaptive_fir_filter_unittest.cc
index 474174c..32b20a4 100644
--- a/webrtc/modules/audio_processing/aec3/adaptive_fir_filter_unittest.cc
+++ b/webrtc/modules/audio_processing/aec3/adaptive_fir_filter_unittest.cc

@@ -308,7 +308,8 @@
   AecState aec_state(0.f);
   RenderSignalAnalyzer render_signal_analyzer;
   std::vector<float> e(kBlockSize, 0.f);
-  std::array<float, kFftLength> s;
+  std::array<float, kFftLength> s_scratch;
+  std::array<float, kBlockSize> s;
   FftData S;
   FftData G;
   FftData E;
@@ -348,20 +349,24 @@
       render_signal_analyzer.Update(render_buffer, aec_state.FilterDelay());
 
       filter.Filter(render_buffer, &S);
-      fft.Ifft(S, &s);
-      std::transform(y.begin(), y.end(), s.begin() + kFftLengthBy2, e.begin(),
+      fft.Ifft(S, &s_scratch);
+      std::transform(y.begin(), y.end(), s_scratch.begin() + kFftLengthBy2,
+                     e.begin(),
                      [&](float a, float b) { return a - b * kScale; });
       std::for_each(e.begin(), e.end(),
                     [](float& a) { a = rtc::SafeClamp(a, -32768.f, 32767.f); });
       fft.ZeroPaddedFft(e, &E);
+      for (size_t k = 0; k < kBlockSize; ++k) {
+        s[k] = kScale * s_scratch[k + kFftLengthBy2];
+      }
 
       gain.Compute(render_buffer, render_signal_analyzer, E,
                    filter.SizePartitions(), false, &G);
       filter.Adapt(render_buffer, G);
       aec_state.HandleEchoPathChange(EchoPathVariability(false, false));
       aec_state.Update(filter.FilterFrequencyResponse(),
-                       rtc::Optional<size_t>(), render_buffer, E2_main, Y2,
-                       x[0], false);
+                       filter.FilterImpulseResponse(), rtc::Optional<size_t>(),
+                       render_buffer, E2_main, Y2, x[0], s, false);
     }
     // Verify that the filter is able to perform well.
     EXPECT_LT(1000 * std::inner_product(e.begin(), e.end(), e.begin(), 0.f),

diff --git a/webrtc/modules/audio_processing/aec3/aec3_common.h b/webrtc/modules/audio_processing/aec3/aec3_common.h
index 04b86e9..e6cabb4 100644
--- a/webrtc/modules/audio_processing/aec3/aec3_common.h
+++ b/webrtc/modules/audio_processing/aec3/aec3_common.h

@@ -33,14 +33,16 @@
 constexpr int kMetricsCollectionBlocks =
     kMetricsReportingIntervalBlocks - kMetricsComputationBlocks;
 
-constexpr int kAdaptiveFilterLength = 12;
-constexpr int kResidualEchoPowerRenderWindowSize = 30;
-
 constexpr size_t kFftLengthBy2 = 64;
 constexpr size_t kFftLengthBy2Plus1 = kFftLengthBy2 + 1;
 constexpr size_t kFftLengthBy2Minus1 = kFftLengthBy2 - 1;
 constexpr size_t kFftLength = 2 * kFftLengthBy2;
 
+constexpr int kAdaptiveFilterLength = 12;
+constexpr int kResidualEchoPowerRenderWindowSize = 30;
+constexpr int kAdaptiveFilterTimeDomainLength =
+    kAdaptiveFilterLength * kFftLengthBy2;
+
 constexpr size_t kMaxNumBands = 3;
 constexpr size_t kSubFrameLength = 80;
 

diff --git a/webrtc/modules/audio_processing/aec3/aec_state.cc b/webrtc/modules/audio_processing/aec3/aec_state.cc
index 3840ef9..aa389c8 100644
--- a/webrtc/modules/audio_processing/aec3/aec_state.cc
+++ b/webrtc/modules/audio_processing/aec3/aec_state.cc

@@ -78,11 +78,11 @@
 
 int AecState::instance_count_ = 0;
 
-AecState::AecState(float echo_decay)
+AecState::AecState(float reverb_decay)
     : data_dumper_(
           new ApmDataDumper(rtc::AtomicOps::Increment(&instance_count_))),
       echo_path_change_counter_(kEchoPathChangeCounterInitial),
-      echo_decay_factor_(echo_decay) {}
+      reverb_decay_(reverb_decay) {}
 
 AecState::~AecState() = default;
 
@@ -111,12 +111,18 @@
 
 void AecState::Update(const std::vector<std::array<float, kFftLengthBy2Plus1>>&
                           adaptive_filter_frequency_response,
+                      const std::array<float, kAdaptiveFilterTimeDomainLength>&
+                          adaptive_filter_impulse_response,
                       const rtc::Optional<size_t>& external_delay_samples,
                       const RenderBuffer& render_buffer,
                       const std::array<float, kFftLengthBy2Plus1>& E2_main,
                       const std::array<float, kFftLengthBy2Plus1>& Y2,
                       rtc::ArrayView<const float> x,
+                      const std::array<float, kBlockSize>& s,
                       bool echo_leakage_detected) {
+  // Update the echo audibility evaluator.
+  echo_audibility_.Update(x, s);
+
   // Store input parameters.
   echo_leakage_detected_ = echo_leakage_detected;
 
@@ -179,6 +185,126 @@
       !external_delay_ && !filter_delay_ &&
       (!render_received_ ||
        blocks_with_filter_adaptation_ >= kEchoPathChangeConvergenceBlocks);
+
+  // Update the room reverb estimate.
+  UpdateReverb(adaptive_filter_impulse_response);
+}
+
+void AecState::UpdateReverb(
+    const std::array<float, kAdaptiveFilterTimeDomainLength>&
+        impulse_response) {
+  if ((!(filter_delay_ && usable_linear_estimate_)) ||
+      (*filter_delay_ > kAdaptiveFilterLength - 4)) {
+    return;
+  }
+
+  // Form the data to match against by squaring the impulse response
+  // coefficients.
+  std::array<float, kAdaptiveFilterTimeDomainLength> matching_data;
+  std::transform(impulse_response.begin(), impulse_response.end(),
+                 matching_data.begin(), [](float a) { return a * a; });
+
+  // Avoid matching against noise in the model by subtracting an estimate of the
+  // model noise power.
+  constexpr size_t kTailLength = 64;
+  constexpr size_t tail_index = kAdaptiveFilterTimeDomainLength - kTailLength;
+  const float tail_power = *std::max_element(matching_data.begin() + tail_index,
+                                             matching_data.end());
+  std::for_each(matching_data.begin(), matching_data.begin() + tail_index,
+                [tail_power](float& a) { a = std::max(0.f, a - tail_power); });
+
+  // Identify the peak index of the impulse response.
+  const size_t peak_index = *std::max_element(
+      matching_data.begin(), matching_data.begin() + tail_index);
+
+  if (peak_index + 128 < tail_index) {
+    size_t start_index = peak_index + 64;
+    // Compute the matching residual error for the current candidate to match.
+    float residual_sqr_sum = 0.f;
+    float d_k = reverb_decay_to_test_;
+    for (size_t k = start_index; k < tail_index; ++k) {
+      if (matching_data[start_index + 1] == 0.f) {
+        break;
+      }
+
+      float residual = matching_data[k] - matching_data[peak_index] * d_k;
+      residual_sqr_sum += residual * residual;
+      d_k *= reverb_decay_to_test_;
+    }
+
+    // If needed, update the best candidate for the reverb decay.
+    if (reverb_decay_candidate_residual_ < 0.f ||
+        residual_sqr_sum < reverb_decay_candidate_residual_) {
+      reverb_decay_candidate_residual_ = residual_sqr_sum;
+      reverb_decay_candidate_ = reverb_decay_to_test_;
+    }
+  }
+
+  // Compute the next reverb candidate to evaluate such that all candidates will
+  // be evaluated within one second.
+  reverb_decay_to_test_ += (0.9965f - 0.9f) / (5 * kNumBlocksPerSecond);
+
+  // If all reverb candidates have been evaluated, choose the best one as the
+  // reverb decay.
+  if (reverb_decay_to_test_ >= 0.9965f) {
+    if (reverb_decay_candidate_residual_ < 0.f) {
+      // Transform the decay to be in the unit of blocks.
+      reverb_decay_ = powf(reverb_decay_candidate_, kFftLengthBy2);
+
+      // Limit the estimated reverb_decay_ to the maximum one needed in practice
+      // to minimize the impact of incorrect estimates.
+      reverb_decay_ = std::min(0.8f, reverb_decay_);
+    }
+    reverb_decay_to_test_ = 0.9f;
+    reverb_decay_candidate_residual_ = -1.f;
+  }
+
+  // For noisy impulse responses, assume a fixed tail length.
+  if (tail_power > 0.0005f) {
+    reverb_decay_ = 0.7f;
+  }
+  data_dumper_->DumpRaw("aec3_reverb_decay", reverb_decay_);
+  data_dumper_->DumpRaw("aec3_tail_power", tail_power);
+}
+
+void AecState::EchoAudibility::Update(rtc::ArrayView<const float> x,
+                                      const std::array<float, kBlockSize>& s) {
+  auto result_x = std::minmax_element(x.begin(), x.end());
+  auto result_s = std::minmax_element(s.begin(), s.end());
+  const float x_abs =
+      std::max(std::abs(*result_x.first), std::abs(*result_x.second));
+  const float s_abs =
+      std::max(std::abs(*result_s.first), std::abs(*result_s.second));
+
+  if (x_abs < 5.f) {
+    ++low_farend_counter_;
+  } else {
+    low_farend_counter_ = 0;
+  }
+
+  // The echo is deemed as not audible if the echo estimate is on the level of
+  // the quantization noise in the FFTs and the nearend level is sufficiently
+  // strong to mask that by ensuring that the playout and AGC gains do not boost
+  // any residual echo that is below the quantization noise level. Furthermore,
+  // cases where the render signal is very close to zero are also identified as
+  // not producing audible echo.
+  inaudible_echo_ = max_nearend_ > 500 && s_abs < 30.f;
+  inaudible_echo_ = inaudible_echo_ || low_farend_counter_ > 20;
+}
+
+void AecState::EchoAudibility::UpdateWithOutput(rtc::ArrayView<const float> e) {
+  const float e_max = *std::max_element(e.begin(), e.end());
+  const float e_min = *std::min_element(e.begin(), e.end());
+  const float e_abs = std::max(std::abs(e_max), std::abs(e_min));
+
+  if (max_nearend_ < e_abs) {
+    max_nearend_ = e_abs;
+    max_nearend_counter_ = 0;
+  } else {
+    if (++max_nearend_counter_ > 5 * kNumBlocksPerSecond) {
+      max_nearend_ *= 0.995f;
+    }
+  }
 }
 
 }  // namespace webrtc

diff --git a/webrtc/modules/audio_processing/aec3/aec_state.h b/webrtc/modules/audio_processing/aec3/aec_state.h
index 1b00bf5..5192a92 100644
--- a/webrtc/modules/audio_processing/aec3/aec_state.h
+++ b/webrtc/modules/audio_processing/aec3/aec_state.h

@@ -31,7 +31,7 @@
 // Handles the state and the conditions for the echo removal functionality.
 class AecState {
  public:
-  explicit AecState(float echo_decay);
+  explicit AecState(float reverb_decay);
   ~AecState();
 
   // Returns whether the linear filter estimate is usable.
@@ -78,23 +78,50 @@
   void HandleEchoPathChange(const EchoPathVariability& echo_path_variability);
 
   // Returns the decay factor for the echo reverberation.
-  // TODO(peah): Make this adaptive.
-  float ReverbDecayFactor() const { return echo_decay_factor_; }
+  float ReverbDecay() const { return reverb_decay_; }
 
   // Returns whether the echo suppression gain should be forced to zero.
   bool ForcedZeroGain() const { return force_zero_gain_; }
 
+  // Returns whether the echo in the capture signal is audible.
+  bool InaudibleEcho() const { return echo_audibility_.InaudibleEcho(); }
+
+  // Updates the aec state with the AEC output signal.
+  void UpdateWithOutput(rtc::ArrayView<const float> e) {
+    echo_audibility_.UpdateWithOutput(e);
+  }
+
   // Updates the aec state.
   void Update(const std::vector<std::array<float, kFftLengthBy2Plus1>>&
                   adaptive_filter_frequency_response,
+              const std::array<float, kAdaptiveFilterTimeDomainLength>&
+                  adaptive_filter_impulse_response,
               const rtc::Optional<size_t>& external_delay_samples,
               const RenderBuffer& render_buffer,
               const std::array<float, kFftLengthBy2Plus1>& E2_main,
               const std::array<float, kFftLengthBy2Plus1>& Y2,
               rtc::ArrayView<const float> x,
+              const std::array<float, kBlockSize>& s_main,
               bool echo_leakage_detected);
 
  private:
+  class EchoAudibility {
+   public:
+    void Update(rtc::ArrayView<const float> x,
+                const std::array<float, kBlockSize>& s);
+    void UpdateWithOutput(rtc::ArrayView<const float> e);
+    bool InaudibleEcho() const { return inaudible_echo_; }
+
+   private:
+    float max_nearend_ = 0.f;
+    size_t max_nearend_counter_ = 0;
+    size_t low_farend_counter_ = 0;
+    bool inaudible_echo_ = false;
+  };
+
+  void UpdateReverb(const std::array<float, kAdaptiveFilterTimeDomainLength>&
+                        impulse_response);
+
   static int instance_count_;
   std::unique_ptr<ApmDataDumper> data_dumper_;
   ErlEstimator erl_estimator_;
@@ -113,7 +140,12 @@
   rtc::Optional<size_t> filter_delay_;
   rtc::Optional<size_t> external_delay_;
   size_t blocks_since_last_saturation_ = 1000;
-  const float echo_decay_factor_;
+  float reverb_decay_;
+  float reverb_decay_to_test_ = 0.9f;
+  float reverb_decay_candidate_ = 0.f;
+  float reverb_decay_candidate_residual_ = -1.f;
+  EchoAudibility echo_audibility_;
+
   RTC_DISALLOW_IMPLICIT_CONSTRUCTORS(AecState);
 };
 

diff --git a/webrtc/modules/audio_processing/aec3/aec_state_unittest.cc b/webrtc/modules/audio_processing/aec3/aec_state_unittest.cc
index 682126e..7062d24 100644
--- a/webrtc/modules/audio_processing/aec3/aec_state_unittest.cc
+++ b/webrtc/modules/audio_processing/aec3/aec_state_unittest.cc

@@ -25,6 +25,8 @@
   std::array<float, kFftLengthBy2Plus1> Y2 = {};
   std::vector<std::vector<float>> x(3, std::vector<float>(kBlockSize, 0.f));
   EchoPathVariability echo_path_variability(false, false);
+  std::array<float, kBlockSize> s;
+  s.fill(100.f);
 
   std::vector<std::array<float, kFftLengthBy2Plus1>>
       converged_filter_frequency_response(10);
@@ -36,47 +38,57 @@
   converged_filter_frequency_response[2].fill(100.f);
   converged_filter_frequency_response[2][0] = 1.f;
 
+  std::array<float, kAdaptiveFilterTimeDomainLength> impulse_response;
+  impulse_response.fill(0.f);
+
   // Verify that linear AEC usability is false when the filter is diverged and
   // there is no external delay reported.
-  state.Update(diverged_filter_frequency_response, rtc::Optional<size_t>(),
-               render_buffer, E2_main, Y2, x[0], false);
+  state.Update(diverged_filter_frequency_response, impulse_response,
+               rtc::Optional<size_t>(), render_buffer, E2_main, Y2, x[0], s,
+               false);
   EXPECT_FALSE(state.UsableLinearEstimate());
 
   // Verify that linear AEC usability is true when the filter is converged
   std::fill(x[0].begin(), x[0].end(), 101.f);
   for (int k = 0; k < 3000; ++k) {
-    state.Update(converged_filter_frequency_response, rtc::Optional<size_t>(2),
-                 render_buffer, E2_main, Y2, x[0], false);
+    state.Update(converged_filter_frequency_response, impulse_response,
+                 rtc::Optional<size_t>(2), render_buffer, E2_main, Y2, x[0], s,
+                 false);
   }
   EXPECT_TRUE(state.UsableLinearEstimate());
 
   // Verify that linear AEC usability becomes false after an echo path change is
   // reported
   state.HandleEchoPathChange(EchoPathVariability(true, false));
-  state.Update(converged_filter_frequency_response, rtc::Optional<size_t>(2),
-               render_buffer, E2_main, Y2, x[0], false);
+  state.Update(converged_filter_frequency_response, impulse_response,
+               rtc::Optional<size_t>(2), render_buffer, E2_main, Y2, x[0], s,
+               false);
   EXPECT_FALSE(state.UsableLinearEstimate());
 
   // Verify that the active render detection works as intended.
   std::fill(x[0].begin(), x[0].end(), 101.f);
   state.HandleEchoPathChange(EchoPathVariability(true, true));
-  state.Update(converged_filter_frequency_response, rtc::Optional<size_t>(2),
-               render_buffer, E2_main, Y2, x[0], false);
+  state.Update(converged_filter_frequency_response, impulse_response,
+               rtc::Optional<size_t>(2), render_buffer, E2_main, Y2, x[0], s,
+               false);
   EXPECT_FALSE(state.ActiveRender());
 
   for (int k = 0; k < 1000; ++k) {
-    state.Update(converged_filter_frequency_response, rtc::Optional<size_t>(2),
-                 render_buffer, E2_main, Y2, x[0], false);
+    state.Update(converged_filter_frequency_response, impulse_response,
+                 rtc::Optional<size_t>(2), render_buffer, E2_main, Y2, x[0], s,
+                 false);
   }
   EXPECT_TRUE(state.ActiveRender());
 
   // Verify that echo leakage is properly reported.
-  state.Update(converged_filter_frequency_response, rtc::Optional<size_t>(2),
-               render_buffer, E2_main, Y2, x[0], false);
+  state.Update(converged_filter_frequency_response, impulse_response,
+               rtc::Optional<size_t>(2), render_buffer, E2_main, Y2, x[0], s,
+               false);
   EXPECT_FALSE(state.EchoLeakageDetected());
 
-  state.Update(converged_filter_frequency_response, rtc::Optional<size_t>(2),
-               render_buffer, E2_main, Y2, x[0], true);
+  state.Update(converged_filter_frequency_response, impulse_response,
+               rtc::Optional<size_t>(2), render_buffer, E2_main, Y2, x[0], s,
+               true);
   EXPECT_TRUE(state.EchoLeakageDetected());
 
   // Verify that the ERL is properly estimated
@@ -91,8 +103,9 @@
 
   Y2.fill(10.f * 10000.f * 10000.f);
   for (size_t k = 0; k < 1000; ++k) {
-    state.Update(converged_filter_frequency_response, rtc::Optional<size_t>(2),
-                 render_buffer, E2_main, Y2, x[0], false);
+    state.Update(converged_filter_frequency_response, impulse_response,
+                 rtc::Optional<size_t>(2), render_buffer, E2_main, Y2, x[0], s,
+                 false);
   }
 
   ASSERT_TRUE(state.UsableLinearEstimate());
@@ -107,8 +120,9 @@
   E2_main.fill(1.f * 10000.f * 10000.f);
   Y2.fill(10.f * E2_main[0]);
   for (size_t k = 0; k < 1000; ++k) {
-    state.Update(converged_filter_frequency_response, rtc::Optional<size_t>(2),
-                 render_buffer, E2_main, Y2, x[0], false);
+    state.Update(converged_filter_frequency_response, impulse_response,
+                 rtc::Optional<size_t>(2), render_buffer, E2_main, Y2, x[0], s,
+                 false);
   }
   ASSERT_TRUE(state.UsableLinearEstimate());
   {
@@ -127,8 +141,9 @@
   E2_main.fill(1.f * 10000.f * 10000.f);
   Y2.fill(5.f * E2_main[0]);
   for (size_t k = 0; k < 1000; ++k) {
-    state.Update(converged_filter_frequency_response, rtc::Optional<size_t>(2),
-                 render_buffer, E2_main, Y2, x[0], false);
+    state.Update(converged_filter_frequency_response, impulse_response,
+                 rtc::Optional<size_t>(2), render_buffer, E2_main, Y2, x[0], s,
+                 false);
   }
 
   ASSERT_TRUE(state.UsableLinearEstimate());
@@ -155,6 +170,8 @@
   std::array<float, kFftLengthBy2Plus1> Y2;
   std::array<float, kBlockSize> x;
   EchoPathVariability echo_path_variability(false, false);
+  std::array<float, kBlockSize> s;
+  s.fill(100.f);
   x.fill(0.f);
 
   std::vector<std::array<float, kFftLengthBy2Plus1>> frequency_response(30);
@@ -162,10 +179,13 @@
     v.fill(0.01f);
   }
 
+  std::array<float, kAdaptiveFilterTimeDomainLength> impulse_response;
+  impulse_response.fill(0.f);
+
   // Verify that a non-significant filter delay is identified correctly.
   state.HandleEchoPathChange(echo_path_variability);
-  state.Update(frequency_response, rtc::Optional<size_t>(), render_buffer,
-               E2_main, Y2, x, false);
+  state.Update(frequency_response, impulse_response, rtc::Optional<size_t>(),
+               render_buffer, E2_main, Y2, x, s, false);
   EXPECT_FALSE(state.FilterDelay());
 }
 
@@ -179,11 +199,16 @@
   std::array<float, kFftLengthBy2Plus1> Y2;
   std::array<float, kBlockSize> x;
   EchoPathVariability echo_path_variability(false, false);
+  std::array<float, kBlockSize> s;
+  s.fill(100.f);
   x.fill(0.f);
 
   std::vector<std::array<float, kFftLengthBy2Plus1>> frequency_response(
       kFilterLength);
 
+  std::array<float, kAdaptiveFilterTimeDomainLength> impulse_response;
+  impulse_response.fill(0.f);
+
   // Verify that the filter delay for a converged filter is properly identified.
   for (int k = 0; k < kFilterLength; ++k) {
     for (auto& v : frequency_response) {
@@ -192,8 +217,8 @@
     frequency_response[k].fill(100.f);
     frequency_response[k][0] = 0.f;
     state.HandleEchoPathChange(echo_path_variability);
-    state.Update(frequency_response, rtc::Optional<size_t>(), render_buffer,
-                 E2_main, Y2, x, false);
+    state.Update(frequency_response, impulse_response, rtc::Optional<size_t>(),
+                 render_buffer, E2_main, Y2, x, s, false);
     EXPECT_TRUE(k == (kFilterLength - 1) || state.FilterDelay());
     if (k != (kFilterLength - 1)) {
       EXPECT_EQ(k, state.FilterDelay());
@@ -208,6 +233,8 @@
   std::array<float, kFftLengthBy2Plus1> E2_shadow;
   std::array<float, kFftLengthBy2Plus1> Y2;
   std::array<float, kBlockSize> x;
+  std::array<float, kBlockSize> s;
+  s.fill(100.f);
   E2_main.fill(0.f);
   E2_shadow.fill(0.f);
   Y2.fill(0.f);
@@ -219,10 +246,14 @@
     v.fill(0.01f);
   }
 
+  std::array<float, kAdaptiveFilterTimeDomainLength> impulse_response;
+  impulse_response.fill(0.f);
+
   for (size_t k = 0; k < frequency_response.size() - 1; ++k) {
     state.HandleEchoPathChange(EchoPathVariability(false, false));
-    state.Update(frequency_response, rtc::Optional<size_t>(k * kBlockSize + 5),
-                 render_buffer, E2_main, Y2, x, false);
+    state.Update(frequency_response, impulse_response,
+                 rtc::Optional<size_t>(k * kBlockSize + 5), render_buffer,
+                 E2_main, Y2, x, s, false);
     EXPECT_TRUE(state.ExternalDelay());
     EXPECT_EQ(k, state.ExternalDelay());
   }
@@ -230,8 +261,8 @@
   // Verify that the externally reported delay is properly unset when it is no
   // longer present.
   state.HandleEchoPathChange(EchoPathVariability(false, false));
-  state.Update(frequency_response, rtc::Optional<size_t>(), render_buffer,
-               E2_main, Y2, x, false);
+  state.Update(frequency_response, impulse_response, rtc::Optional<size_t>(),
+               render_buffer, E2_main, Y2, x, s, false);
   EXPECT_FALSE(state.ExternalDelay());
 }
 

diff --git a/webrtc/modules/audio_processing/aec3/echo_remover.cc b/webrtc/modules/audio_processing/aec3/echo_remover.cc
index 842c385..64ffbad 100644
--- a/webrtc/modules/audio_processing/aec3/echo_remover.cc
+++ b/webrtc/modules/audio_processing/aec3/echo_remover.cc

@@ -131,6 +131,8 @@
                         LowestBandRate(sample_rate_hz_), 1);
   data_dumper_->DumpWav("aec3_echo_remover_render_input", kBlockSize, &x0[0],
                         LowestBandRate(sample_rate_hz_), 1);
+  data_dumper_->DumpRaw("aec3_echo_remover_capture_input", y0);
+  data_dumper_->DumpRaw("aec3_echo_remover_render_input", x0);
 
   aec_state_.UpdateCaptureSaturation(capture_signal_saturation);
 
@@ -167,13 +169,15 @@
 
   // Update the AEC state information.
   aec_state_.Update(subtractor_.FilterFrequencyResponse(),
+                    subtractor_.FilterImpulseResponse(),
                     echo_path_delay_samples, render_buffer, E2_main, Y2, x0,
-                    echo_leakage_detected_);
+                    subtractor_output.s_main, echo_leakage_detected_);
 
   // Choose the linear output.
   output_selector_.FormLinearOutput(!aec_state_.HeadsetDetected(), e_main, y0);
   data_dumper_->DumpWav("aec3_output_linear", kBlockSize, &y0[0],
                         LowestBandRate(sample_rate_hz_), 1);
+  data_dumper_->DumpRaw("aec3_output_linear", y0);
   const auto& E2 = output_selector_.UseSubtractorOutput() ? E2_main : Y2;
 
   // Estimate the residual echo power.
@@ -194,7 +198,14 @@
   // Update the metrics.
   metrics_.Update(aec_state_, cng_.NoiseSpectrum(), G);
 
+  // Update the aec state with the aec output characteristics.
+  aec_state_.UpdateWithOutput(y0);
+
   // Debug outputs for the purpose of development and analysis.
+  data_dumper_->DumpWav("aec3_echo_estimate", kBlockSize,
+                        &subtractor_output.s_main[0],
+                        LowestBandRate(sample_rate_hz_), 1);
+  data_dumper_->DumpRaw("aec3_output", y0);
   data_dumper_->DumpRaw("aec3_N2", cng_.NoiseSpectrum());
   data_dumper_->DumpRaw("aec3_suppressor_gain", G);
   data_dumper_->DumpWav("aec3_output",

diff --git a/webrtc/modules/audio_processing/aec3/main_filter_update_gain_unittest.cc b/webrtc/modules/audio_processing/aec3/main_filter_update_gain_unittest.cc
index fc33e12..6e8a80b 100644
--- a/webrtc/modules/audio_processing/aec3/main_filter_update_gain_unittest.cc
+++ b/webrtc/modules/audio_processing/aec3/main_filter_update_gain_unittest.cc

@@ -55,7 +55,8 @@
   std::vector<float> y(kBlockSize, 0.f);
   AecState aec_state(0.f);
   RenderSignalAnalyzer render_signal_analyzer;
-  std::array<float, kFftLength> s;
+  std::array<float, kFftLength> s_scratch;
+  std::array<float, kBlockSize> s;
   FftData S;
   FftData G;
   SubtractorOutput output;
@@ -96,18 +97,21 @@
 
     // Apply the main filter.
     main_filter.Filter(render_buffer, &S);
-    fft.Ifft(S, &s);
-    std::transform(y.begin(), y.end(), s.begin() + kFftLengthBy2,
+    fft.Ifft(S, &s_scratch);
+    std::transform(y.begin(), y.end(), s_scratch.begin() + kFftLengthBy2,
                    e_main.begin(),
                    [&](float a, float b) { return a - b * kScale; });
     std::for_each(e_main.begin(), e_main.end(),
                   [](float& a) { a = rtc::SafeClamp(a, -32768.f, 32767.f); });
     fft.ZeroPaddedFft(e_main, &E_main);
+    for (size_t k = 0; k < kBlockSize; ++k) {
+      s[k] = kScale * s_scratch[k + kFftLengthBy2];
+    }
 
     // Apply the shadow filter.
     shadow_filter.Filter(render_buffer, &S);
-    fft.Ifft(S, &s);
-    std::transform(y.begin(), y.end(), s.begin() + kFftLengthBy2,
+    fft.Ifft(S, &s_scratch);
+    std::transform(y.begin(), y.end(), s_scratch.begin() + kFftLengthBy2,
                    e_shadow.begin(),
                    [&](float a, float b) { return a - b * kScale; });
     std::for_each(e_shadow.begin(), e_shadow.end(),
@@ -131,8 +135,9 @@
     // Update the delay.
     aec_state.HandleEchoPathChange(EchoPathVariability(false, false));
     aec_state.Update(main_filter.FilterFrequencyResponse(),
+                     main_filter.FilterImpulseResponse(),
                      rtc::Optional<size_t>(), render_buffer, E2_main, Y2, x[0],
-                     false);
+                     s, false);
   }
 
   std::copy(e_main.begin(), e_main.end(), e_last_block->begin());

diff --git a/webrtc/modules/audio_processing/aec3/residual_echo_estimator.cc b/webrtc/modules/audio_processing/aec3/residual_echo_estimator.cc
index 6ec00e4..d17afa6 100644
--- a/webrtc/modules/audio_processing/aec3/residual_echo_estimator.cc
+++ b/webrtc/modules/audio_processing/aec3/residual_echo_estimator.cc

@@ -111,7 +111,7 @@
     const int filter_delay = *aec_state.FilterDelay();
     LinearEstimate(S2_linear, aec_state.Erle(), filter_delay, R2);
     AddEchoReverb(S2_linear, aec_state.SaturatedEcho(), filter_delay,
-                  aec_state.ReverbDecayFactor(), R2);
+                  aec_state.ReverbDecay(), R2);
   } else {
     // Estimate the echo generating signal power.
     std::array<float, kFftLengthBy2Plus1> X2;
@@ -142,7 +142,12 @@
     AddEchoReverb(*R2, aec_state.SaturatedEcho(),
                   std::min(static_cast<size_t>(kAdaptiveFilterLength),
                            delay.value_or(kAdaptiveFilterLength)),
-                  aec_state.ReverbDecayFactor(), R2);
+                  aec_state.ReverbDecay(), R2);
+  }
+
+  // If the echo is deemed inaudible, set the residual echo to zero.
+  if (aec_state.InaudibleEcho()) {
+    R2->fill(0.f);
   }
 
   // If the echo is saturated, estimate the echo power as the maximum echo power

diff --git a/webrtc/modules/audio_processing/aec3/residual_echo_estimator_unittest.cc b/webrtc/modules/audio_processing/aec3/residual_echo_estimator_unittest.cc
index b28cf51..b448c4d 100644
--- a/webrtc/modules/audio_processing/aec3/residual_echo_estimator_unittest.cc
+++ b/webrtc/modules/audio_processing/aec3/residual_echo_estimator_unittest.cc

@@ -52,6 +52,7 @@
   Random random_generator(42U);
   FftData X;
   std::array<float, kBlockSize> x_old;
+  std::array<float, kBlockSize> s;
   Aec3Fft fft;
 
   for (auto& H2_k : H2) {
@@ -60,6 +61,11 @@
   H2[2].fill(10.f);
   H2[2][0] = 0.1f;
 
+  std::array<float, kAdaptiveFilterTimeDomainLength> h;
+  h.fill(0.f);
+
+  s.fill(100.f);
+
   constexpr float kLevel = 10.f;
   E2_shadow.fill(kLevel);
   E2_main.fill(kLevel);
@@ -74,8 +80,8 @@
     render_buffer.Insert(x);
 
     aec_state.HandleEchoPathChange(echo_path_variability);
-    aec_state.Update(H2, rtc::Optional<size_t>(2), render_buffer, E2_main, Y2,
-                     x[0], false);
+    aec_state.Update(H2, h, rtc::Optional<size_t>(2), render_buffer, E2_main,
+                     Y2, x[0], s, false);
 
     estimator.Estimate(true, aec_state, render_buffer, S2_linear, Y2, &R2);
   }

diff --git a/webrtc/modules/audio_processing/aec3/subtractor.cc b/webrtc/modules/audio_processing/aec3/subtractor.cc
index a7bf84d..20ba510 100644
--- a/webrtc/modules/audio_processing/aec3/subtractor.cc
+++ b/webrtc/modules/audio_processing/aec3/subtractor.cc

@@ -25,15 +25,22 @@
                      const FftData& S,
                      rtc::ArrayView<const float> y,
                      std::array<float, kBlockSize>* e,
-                     FftData* E) {
-  std::array<float, kFftLength> s;
-  fft.Ifft(S, &s);
+                     FftData* E,
+                     std::array<float, kBlockSize>* s) {
+  std::array<float, kFftLength> s_scratch;
+  fft.Ifft(S, &s_scratch);
   constexpr float kScale = 1.0f / kFftLengthBy2;
-  std::transform(y.begin(), y.end(), s.begin() + kFftLengthBy2, e->begin(),
-                 [&](float a, float b) { return a - b * kScale; });
+  std::transform(y.begin(), y.end(), s_scratch.begin() + kFftLengthBy2,
+                 e->begin(), [&](float a, float b) { return a - b * kScale; });
   std::for_each(e->begin(), e->end(),
                 [](float& a) { a = rtc::SafeClamp(a, -32768.f, 32767.f); });
   fft.ZeroPaddedFft(*e, E);
+
+  if (s) {
+    for (size_t k = 0; k < s->size(); ++k) {
+      (*s)[k] = kScale * s_scratch[k + kFftLengthBy2];
+    }
+  }
 }
 }  // namespace
 
@@ -47,7 +54,7 @@
   RTC_DCHECK(data_dumper_);
 }
 
-Subtractor::~Subtractor() {}
+Subtractor::~Subtractor() = default;
 
 void Subtractor::HandleEchoPathChange(
     const EchoPathVariability& echo_path_variability) {
@@ -76,11 +83,11 @@
 
   // Form the output of the main filter.
   main_filter_.Filter(render_buffer, &S);
-  PredictionError(fft_, S, y, &e_main, &E_main);
+  PredictionError(fft_, S, y, &e_main, &E_main, &output->s_main);
 
   // Form the output of the shadow filter.
   shadow_filter_.Filter(render_buffer, &S);
-  PredictionError(fft_, S, y, &e_shadow, &E_shadow);
+  PredictionError(fft_, S, y, &e_shadow, &E_shadow, nullptr);
 
   // Compute spectra for future use.
   E_main.Spectrum(optimization_, &output->E2_main);

diff --git a/webrtc/modules/audio_processing/aec3/subtractor.h b/webrtc/modules/audio_processing/aec3/subtractor.h
index c194b2c..777e4ff 100644
--- a/webrtc/modules/audio_processing/aec3/subtractor.h
+++ b/webrtc/modules/audio_processing/aec3/subtractor.h

@@ -45,12 +45,18 @@
 
   void HandleEchoPathChange(const EchoPathVariability& echo_path_variability);
 
-  // Returns the block-wise frequency response of the main adaptive filter.
+  // Returns the block-wise frequency response for the main adaptive filter.
   const std::vector<std::array<float, kFftLengthBy2Plus1>>&
   FilterFrequencyResponse() const {
     return main_filter_.FilterFrequencyResponse();
   }
 
+  // Returns the estimate of the impulse response for the main adaptive filter.
+  const std::array<float, kAdaptiveFilterTimeDomainLength>&
+  FilterImpulseResponse() const {
+    return main_filter_.FilterImpulseResponse();
+  }
+
  private:
   const Aec3Fft fft_;
   ApmDataDumper* data_dumper_;

diff --git a/webrtc/modules/audio_processing/aec3/subtractor_output.h b/webrtc/modules/audio_processing/aec3/subtractor_output.h
index e2d23b5..8755047 100644
--- a/webrtc/modules/audio_processing/aec3/subtractor_output.h
+++ b/webrtc/modules/audio_processing/aec3/subtractor_output.h

@@ -20,6 +20,7 @@
 
 // Stores the values being returned from the echo subtractor.
 struct SubtractorOutput {
+  std::array<float, kBlockSize> s_main;
   std::array<float, kBlockSize> e_main;
   std::array<float, kBlockSize> e_shadow;
   FftData E_main;
@@ -27,6 +28,7 @@
   std::array<float, kFftLengthBy2Plus1> E2_shadow;
 
   void Reset() {
+    s_main.fill(0.f);
     e_main.fill(0.f);
     e_shadow.fill(0.f);
     E_main.re.fill(0.f);

diff --git a/webrtc/modules/audio_processing/aec3/subtractor_unittest.cc b/webrtc/modules/audio_processing/aec3/subtractor_unittest.cc
index a5e2a4e..32fc054 100644
--- a/webrtc/modules/audio_processing/aec3/subtractor_unittest.cc
+++ b/webrtc/modules/audio_processing/aec3/subtractor_unittest.cc

@@ -68,8 +68,9 @@
 
     aec_state.HandleEchoPathChange(EchoPathVariability(false, false));
     aec_state.Update(subtractor.FilterFrequencyResponse(),
+                     subtractor.FilterImpulseResponse(),
                      rtc::Optional<size_t>(delay_samples / kBlockSize),
-                     render_buffer, E2_main, Y2, x[0], false);
+                     render_buffer, E2_main, Y2, x[0], output.s_main, false);
   }
 
   const float output_power = std::inner_product(
commit	2910357621dee4368bd3eaa0040cec82ac230dad	[log] [tgz]
author	peah <peah@webrtc.org>	Tue Jul 11 09:54:02 2017
committer	Commit Bot <commit-bot@chromium.org>	Tue Jul 11 09:54:02 2017
tree	512ef6314bea9fcb03d5b7e8eb0fadbe9f274dad
parent	863f03ba38f397c078931d2bd27d40a4d60efc02 [diff]