AEC3: Delay estimator uses bandpass filtered signal with downsampling factor 8

Letting the delay estimator operate at a sampling frequency of 2 kHz
with audio between 0 and 1 kHz makes it sensitive to noisy environments.
This CL bandpass filters the 16 kHz signal before downsampling to 2 kHz
in a way that the downsampled 2 kHz signal contains audio between 1 and
2 kHz. It also sets downsampling factor 8 as default which significantly
reduces computational complexity.

Bug: webrtc:9288,chromium:846615
Change-Id: Iaf67898a1a14326cd61bb7f81c14d3c12a697c8d
Reviewed-on: https://webrtc-review.googlesource.com/78703
Commit-Queue: Gustaf Ullberg <gustaf@webrtc.org>
Reviewed-by: Per Ã…hgren <peah@webrtc.org>
Cr-Commit-Position: refs/heads/master@{#23395}
diff --git a/modules/audio_processing/aec3/decimator.cc b/modules/audio_processing/aec3/decimator.cc
index 75aa331..6135db5 100644
--- a/modules/audio_processing/aec3/decimator.cc
+++ b/modules/audio_processing/aec3/decimator.cc
@@ -28,32 +28,39 @@
     {-1.5879f, 0.6594f}};
 constexpr int kNumFilters4 = 3;
 
-// b, a = signal.butter(2, 800/8000.0, 'lowpass', analog=False) which are the
-// same as b, a = signal.butter(2, 400/4000.0, 'lowpass', analog=False).
-const CascadedBiQuadFilter::BiQuadCoefficients kLowPassFilterCoefficients8 = {
-    {0.02008337f, 0.04016673f, 0.02008337f},
-    {-1.56101808f, 0.64135154f}};
-constexpr int kNumFilters8 = 4;
+// b, a = signal.cheby1(1, 6, [1000/8000, 2000/8000], btype='bandpass',
+// analog=False)
+const CascadedBiQuadFilter::BiQuadCoefficients kBandPassFilterCoefficients8 = {
+    {0.10330478f, 0.f, -0.10330478f},
+    {-1.520363f, 0.79339043f}};
+constexpr int kNumFilters8 = 5;
 
 // b, a = signal.butter(2, 1000/8000.0, 'highpass', analog=False)
-const CascadedBiQuadFilter::BiQuadCoefficients kHighPassFilterCoefficients4 = {
+const CascadedBiQuadFilter::BiQuadCoefficients kHighPassFilterCoefficients = {
     {0.75707638f, -1.51415275f, 0.75707638f},
     {-1.45424359f, 0.57406192f}};
+constexpr int kNumFiltersHP2 = 1;
 constexpr int kNumFiltersHP4 = 1;
+constexpr int kNumFiltersHP8 = 0;
 
 }  // namespace
 
 Decimator::Decimator(size_t down_sampling_factor)
     : down_sampling_factor_(down_sampling_factor),
-      low_pass_filter_(
+      anti_aliasing_filter_(
           down_sampling_factor_ == 4
               ? kLowPassFilterCoefficients4
-              : (down_sampling_factor_ == 8 ? kLowPassFilterCoefficients8
+              : (down_sampling_factor_ == 8 ? kBandPassFilterCoefficients8
                                             : kLowPassFilterCoefficients2),
           down_sampling_factor_ == 4
               ? kNumFilters4
               : (down_sampling_factor_ == 8 ? kNumFilters8 : kNumFilters2)),
-      high_pass_filter_(kHighPassFilterCoefficients4, kNumFiltersHP4) {
+      noise_reduction_filter_(
+          kHighPassFilterCoefficients,
+          down_sampling_factor_ == 4
+              ? kNumFiltersHP4
+              : (down_sampling_factor_ == 8 ? kNumFiltersHP8
+                                            : kNumFiltersHP2)) {
   RTC_DCHECK(down_sampling_factor_ == 2 || down_sampling_factor_ == 4 ||
              down_sampling_factor_ == 8);
 }
@@ -65,11 +72,10 @@
   std::array<float, kBlockSize> x;
 
   // Limit the frequency content of the signal to avoid aliasing.
-  low_pass_filter_.Process(in, x);
+  anti_aliasing_filter_.Process(in, x);
 
-  // High-pass filter to reduce the impact of near-end noise.
-  if (down_sampling_factor_ == 4)
-    high_pass_filter_.Process(x, x);
+  // Reduce the impact of near-end noise.
+  noise_reduction_filter_.Process(x);
 
   // Downsample the signal.
   for (size_t j = 0, k = 0; j < out.size(); ++j, k += down_sampling_factor_) {
diff --git a/modules/audio_processing/aec3/decimator.h b/modules/audio_processing/aec3/decimator.h
index e6922e0..2bb60a4 100644
--- a/modules/audio_processing/aec3/decimator.h
+++ b/modules/audio_processing/aec3/decimator.h
@@ -30,8 +30,8 @@
 
  private:
   const size_t down_sampling_factor_;
-  CascadedBiQuadFilter low_pass_filter_;
-  CascadedBiQuadFilter high_pass_filter_;
+  CascadedBiQuadFilter anti_aliasing_filter_;
+  CascadedBiQuadFilter noise_reduction_filter_;
 
   RTC_DISALLOW_COPY_AND_ASSIGN(Decimator);
 };
diff --git a/modules/audio_processing/aec3/echo_path_delay_estimator.cc b/modules/audio_processing/aec3/echo_path_delay_estimator.cc
index 0026522..14cf8da 100644
--- a/modules/audio_processing/aec3/echo_path_delay_estimator.cc
+++ b/modules/audio_processing/aec3/echo_path_delay_estimator.cc
@@ -16,14 +16,24 @@
 #include "modules/audio_processing/aec3/aec3_common.h"
 #include "modules/audio_processing/logging/apm_data_dumper.h"
 #include "rtc_base/checks.h"
+#include "system_wrappers/include/field_trial.h"
 
 namespace webrtc {
+namespace {
+size_t GetDownSamplingFactor(const EchoCanceller3Config& config) {
+  // Do not use down sampling factor 8 if kill switch is triggered.
+  return (config.delay.down_sampling_factor == 8 &&
+          field_trial::IsEnabled("WebRTC-Aec3DownSamplingFactor8KillSwitch"))
+             ? 4
+             : config.delay.down_sampling_factor;
+}
+}  // namespace
 
 EchoPathDelayEstimator::EchoPathDelayEstimator(
     ApmDataDumper* data_dumper,
     const EchoCanceller3Config& config)
     : data_dumper_(data_dumper),
-      down_sampling_factor_(config.delay.down_sampling_factor),
+      down_sampling_factor_(GetDownSamplingFactor(config)),
       sub_block_size_(down_sampling_factor_ != 0
                           ? kBlockSize / down_sampling_factor_
                           : kBlockSize),
diff --git a/modules/audio_processing/aec3/render_delay_buffer.cc b/modules/audio_processing/aec3/render_delay_buffer.cc
index 28909a2..b01593b 100644
--- a/modules/audio_processing/aec3/render_delay_buffer.cc
+++ b/modules/audio_processing/aec3/render_delay_buffer.cc
@@ -35,6 +35,14 @@
       "WebRTC-Aec3ZeroExternalDelayHeadroomKillSwitch");
 }
 
+size_t GetDownSamplingFactor(const EchoCanceller3Config& config) {
+  // Do not use down sampling factor 8 if kill switch is triggered.
+  return (config.delay.down_sampling_factor == 8 &&
+          field_trial::IsEnabled("WebRTC-Aec3DownSamplingFactor8KillSwitch"))
+             ? 4
+             : config.delay.down_sampling_factor;
+}
+
 class RenderDelayBufferImpl final : public RenderDelayBuffer {
  public:
   RenderDelayBufferImpl(const EchoCanceller3Config& config, size_t num_bands);
@@ -63,6 +71,7 @@
   std::unique_ptr<ApmDataDumper> data_dumper_;
   const Aec3Optimization optimization_;
   const EchoCanceller3Config config_;
+  size_t down_sampling_factor_;
   const bool use_zero_external_delay_headroom_;
   const int sub_block_size_;
   MatrixBuffer blocks_;
@@ -168,12 +177,12 @@
           new ApmDataDumper(rtc::AtomicOps::Increment(&instance_count_))),
       optimization_(DetectOptimization()),
       config_(config),
+      down_sampling_factor_(GetDownSamplingFactor(config)),
       use_zero_external_delay_headroom_(EnableZeroExternalDelayHeadroom()),
-      sub_block_size_(
-          static_cast<int>(config.delay.down_sampling_factor > 0
-                               ? kBlockSize / config.delay.down_sampling_factor
-                               : kBlockSize)),
-      blocks_(GetRenderDelayBufferSize(config.delay.down_sampling_factor,
+      sub_block_size_(static_cast<int>(down_sampling_factor_ > 0
+                                           ? kBlockSize / down_sampling_factor_
+                                           : kBlockSize)),
+      blocks_(GetRenderDelayBufferSize(down_sampling_factor_,
                                        config.delay.num_filters,
                                        config.filter.main.length_blocks),
               num_bands,
@@ -182,9 +191,9 @@
       ffts_(blocks_.buffer.size()),
       delay_(config_.delay.default_delay),
       echo_remover_buffer_(&blocks_, &spectra_, &ffts_),
-      low_rate_(GetDownSampledBufferSize(config.delay.down_sampling_factor,
+      low_rate_(GetDownSampledBufferSize(down_sampling_factor_,
                                          config.delay.num_filters)),
-      render_decimator_(config.delay.down_sampling_factor),
+      render_decimator_(down_sampling_factor_),
       zero_block_(num_bands, std::vector<float>(kBlockSize, 0.f)),
       fft_(),
       render_ds_(sub_block_size_, 0.f),
@@ -433,7 +442,7 @@
                         block[0].data(), 16000, 1);
   render_decimator_.Decimate(block[0], ds);
   data_dumper_->DumpWav("aec3_render_decimator_output", ds.size(), ds.data(),
-                        16000 / config_.delay.down_sampling_factor, 1);
+                        16000 / down_sampling_factor_, 1);
   std::copy(ds.rbegin(), ds.rend(), lr.buffer.begin() + lr.write);
   fft_.PaddedFft(block[0], b.buffer[previous_write][0], &f.buffer[f.write]);
   f.buffer[f.write].Spectrum(optimization_, s.buffer[s.write]);