Disable high-pass filtering of the AEC reference

Currently the echo canceller reference signal is high-pass filtered to
avoid the need of modeling the capture-side high-pass filter as part of
the echo path.

This can lead to the lowest frequency bins of the linear filter
diverging as there is little low-frequency content available for
training. Over time the filter can output an increasing amount of
low-frequency power, which in turn affects the filter's ability to
adapt properly.

Disabling the high-pass filtering of the echo canceller reference solves
this issue, resulting in improved filter convergence.

Bug: webrtc:12265
Change-Id: Ic526a4b1b73e1808cfcd96a8cdee801b96a27671
Reviewed-on: https://webrtc-review.googlesource.com/c/src/+/208288
Reviewed-by: Per Ã…hgren <peah@webrtc.org>
Commit-Queue: Gustaf Ullberg <gustaf@webrtc.org>
Cr-Commit-Position: refs/heads/master@{#33322}
diff --git a/api/audio/echo_canceller3_config.h b/api/audio/echo_canceller3_config.h
index 55281af..2ccc9ac 100644
--- a/api/audio/echo_canceller3_config.h
+++ b/api/audio/echo_canceller3_config.h
@@ -90,6 +90,7 @@
     bool conservative_initial_phase = false;
     bool enable_coarse_filter_output_usage = true;
     bool use_linear_filter = true;
+    bool high_pass_filter_echo_reference = false;
     bool export_linear_aec_output = false;
   } filter;
 
diff --git a/api/audio/echo_canceller3_config_json.cc b/api/audio/echo_canceller3_config_json.cc
index 9d10da9..9e15e3a 100644
--- a/api/audio/echo_canceller3_config_json.cc
+++ b/api/audio/echo_canceller3_config_json.cc
@@ -230,6 +230,8 @@
     ReadParam(section, "enable_coarse_filter_output_usage",
               &cfg.filter.enable_coarse_filter_output_usage);
     ReadParam(section, "use_linear_filter", &cfg.filter.use_linear_filter);
+    ReadParam(section, "high_pass_filter_echo_reference",
+              &cfg.filter.high_pass_filter_echo_reference);
     ReadParam(section, "export_linear_aec_output",
               &cfg.filter.export_linear_aec_output);
   }
@@ -513,6 +515,9 @@
       << ",";
   ost << "\"use_linear_filter\": "
       << (config.filter.use_linear_filter ? "true" : "false") << ",";
+  ost << "\"high_pass_filter_echo_reference\": "
+      << (config.filter.high_pass_filter_echo_reference ? "true" : "false")
+      << ",";
   ost << "\"export_linear_aec_output\": "
       << (config.filter.export_linear_aec_output ? "true" : "false");
 
diff --git a/api/audio/test/echo_canceller3_config_json_unittest.cc b/api/audio/test/echo_canceller3_config_json_unittest.cc
index 4a952fe..d6edd07 100644
--- a/api/audio/test/echo_canceller3_config_json_unittest.cc
+++ b/api/audio/test/echo_canceller3_config_json_unittest.cc
@@ -21,6 +21,8 @@
   cfg.delay.log_warning_on_delay_changes = true;
   cfg.filter.refined.error_floor = 2.f;
   cfg.filter.coarse_initial.length_blocks = 3u;
+  cfg.filter.high_pass_filter_echo_reference =
+      !cfg.filter.high_pass_filter_echo_reference;
   cfg.comfort_noise.noise_floor_dbfs = 100.f;
   cfg.echo_model.model_reverb_in_nonlinear_mode = false;
   cfg.suppressor.normal_tuning.mask_hf.enr_suppress = .5f;
@@ -47,6 +49,8 @@
             cfg_transformed.filter.coarse_initial.length_blocks);
   EXPECT_EQ(cfg.filter.refined.error_floor,
             cfg_transformed.filter.refined.error_floor);
+  EXPECT_EQ(cfg.filter.high_pass_filter_echo_reference,
+            cfg_transformed.filter.high_pass_filter_echo_reference);
   EXPECT_EQ(cfg.comfort_noise.noise_floor_dbfs,
             cfg_transformed.comfort_noise.noise_floor_dbfs);
   EXPECT_EQ(cfg.echo_model.model_reverb_in_nonlinear_mode,
diff --git a/modules/audio_processing/aec3/echo_canceller3.cc b/modules/audio_processing/aec3/echo_canceller3.cc
index 98da232..0f8e35d 100644
--- a/modules/audio_processing/aec3/echo_canceller3.cc
+++ b/modules/audio_processing/aec3/echo_canceller3.cc
@@ -251,6 +251,10 @@
     adjusted_cfg.filter.initial_state_seconds = 2.0f;
   }
 
+  if (field_trial::IsEnabled("WebRTC-Aec3HighPassFilterEchoReference")) {
+    adjusted_cfg.filter.high_pass_filter_echo_reference = true;
+  }
+
   if (field_trial::IsEnabled("WebRTC-Aec3EchoSaturationDetectionKillSwitch")) {
     adjusted_cfg.ep_strength.echo_can_saturate = false;
   }
@@ -574,6 +578,7 @@
 class EchoCanceller3::RenderWriter {
  public:
   RenderWriter(ApmDataDumper* data_dumper,
+               const EchoCanceller3Config& config,
                SwapQueue<std::vector<std::vector<std::vector<float>>>,
                          Aec3RenderQueueItemVerifier>* render_transfer_queue,
                size_t num_bands,
@@ -590,7 +595,7 @@
   ApmDataDumper* data_dumper_;
   const size_t num_bands_;
   const size_t num_channels_;
-  HighPassFilter high_pass_filter_;
+  std::unique_ptr<HighPassFilter> high_pass_filter_;
   std::vector<std::vector<std::vector<float>>> render_queue_input_frame_;
   SwapQueue<std::vector<std::vector<std::vector<float>>>,
             Aec3RenderQueueItemVerifier>* render_transfer_queue_;
@@ -598,6 +603,7 @@
 
 EchoCanceller3::RenderWriter::RenderWriter(
     ApmDataDumper* data_dumper,
+    const EchoCanceller3Config& config,
     SwapQueue<std::vector<std::vector<std::vector<float>>>,
               Aec3RenderQueueItemVerifier>* render_transfer_queue,
     size_t num_bands,
@@ -605,7 +611,6 @@
     : data_dumper_(data_dumper),
       num_bands_(num_bands),
       num_channels_(num_channels),
-      high_pass_filter_(16000, num_channels),
       render_queue_input_frame_(
           num_bands_,
           std::vector<std::vector<float>>(
@@ -613,6 +618,9 @@
               std::vector<float>(AudioBuffer::kSplitBandSize, 0.f))),
       render_transfer_queue_(render_transfer_queue) {
   RTC_DCHECK(data_dumper);
+  if (config.filter.high_pass_filter_echo_reference) {
+    high_pass_filter_ = std::make_unique<HighPassFilter>(16000, num_channels);
+  }
 }
 
 EchoCanceller3::RenderWriter::~RenderWriter() = default;
@@ -631,7 +639,9 @@
 
   CopyBufferIntoFrame(input, num_bands_, num_channels_,
                       &render_queue_input_frame_);
-  high_pass_filter_.Process(&render_queue_input_frame_[0]);
+  if (high_pass_filter_) {
+    high_pass_filter_->Process(&render_queue_input_frame_[0]);
+  }
 
   static_cast<void>(render_transfer_queue_->Insert(&render_queue_input_frame_));
 }
@@ -704,7 +714,7 @@
         config_.delay.fixed_capture_delay_samples));
   }
 
-  render_writer_.reset(new RenderWriter(data_dumper_.get(),
+  render_writer_.reset(new RenderWriter(data_dumper_.get(), config_,
                                         &render_transfer_queue_, num_bands_,
                                         num_render_channels_));
 
diff --git a/modules/audio_processing/aec3/echo_canceller3_unittest.cc b/modules/audio_processing/aec3/echo_canceller3_unittest.cc
index a02cfa3..acf8473 100644
--- a/modules/audio_processing/aec3/echo_canceller3_unittest.cc
+++ b/modules/audio_processing/aec3/echo_canceller3_unittest.cc
@@ -252,8 +252,6 @@
         capture_output.push_back(capture_buffer_.split_bands(0)[0][k]);
       }
     }
-    HighPassFilter hp_filter(16000, 1);
-    hp_filter.Process(&render_input);
 
     EXPECT_TRUE(
         VerifyOutputFrameBitexactness(render_input[0], capture_output, -64));
@@ -545,8 +543,6 @@
         capture_output.push_back(capture_buffer_.split_bands(0)[0][k]);
       }
     }
-    HighPassFilter hp_filter(16000, 1);
-    hp_filter.Process(&render_input);
 
     EXPECT_TRUE(
         VerifyOutputFrameBitexactness(render_input[0], capture_output, -64));
diff --git a/modules/audio_processing/audio_processing_unittest.cc b/modules/audio_processing/audio_processing_unittest.cc
index e3aedf6..4c879c7 100644
--- a/modules/audio_processing/audio_processing_unittest.cc
+++ b/modules/audio_processing/audio_processing_unittest.cc
@@ -2216,7 +2216,7 @@
                       std::make_tuple(32000, 44100, 16000, 44100, 19, 15),
                       std::make_tuple(32000, 32000, 48000, 32000, 40, 35),
                       std::make_tuple(32000, 32000, 32000, 32000, 0, 0),
-                      std::make_tuple(32000, 32000, 16000, 32000, 40, 20),
+                      std::make_tuple(32000, 32000, 16000, 32000, 39, 20),
                       std::make_tuple(32000, 16000, 48000, 16000, 25, 20),
                       std::make_tuple(32000, 16000, 32000, 16000, 25, 20),
                       std::make_tuple(32000, 16000, 16000, 16000, 25, 0),
@@ -2231,7 +2231,7 @@
                       std::make_tuple(16000, 32000, 32000, 32000, 25, 0),
                       std::make_tuple(16000, 32000, 16000, 32000, 25, 20),
                       std::make_tuple(16000, 16000, 48000, 16000, 39, 20),
-                      std::make_tuple(16000, 16000, 32000, 16000, 40, 20),
+                      std::make_tuple(16000, 16000, 32000, 16000, 39, 20),
                       std::make_tuple(16000, 16000, 16000, 16000, 0, 0)));
 
 #elif defined(WEBRTC_AUDIOPROC_FIXED_PROFILE)
diff --git a/resources/audio_processing/output_data_float.pb.sha1 b/resources/audio_processing/output_data_float.pb.sha1
index a19c6c3..d337594 100644
--- a/resources/audio_processing/output_data_float.pb.sha1
+++ b/resources/audio_processing/output_data_float.pb.sha1
@@ -1 +1 @@
-1dd2c11da1f1dec49f728881628c1348e07a19cd
\ No newline at end of file
+749efdfd1e3c3ace434b3673dac9ce4938534449
\ No newline at end of file
diff --git a/resources/audio_processing/output_data_float_avx2.pb.sha1 b/resources/audio_processing/output_data_float_avx2.pb.sha1
index 54a5b06..79a95ef 100644
--- a/resources/audio_processing/output_data_float_avx2.pb.sha1
+++ b/resources/audio_processing/output_data_float_avx2.pb.sha1
@@ -1 +1 @@
-16e9d8f3b8b6c23b2b5100a1162acfe67acc37a7
\ No newline at end of file
+78c1a84de332173863c997538aa19b8cdcba5020
\ No newline at end of file