AEC3: Redesign delay headroom

This change reduces the risk of echo due to noise in the headroom
of the linear filter.

Changes:
- Use shorter delay headroom
- Delay headroom is specified in samples (not blocks)
- No hysteresis limit when delay is reduced

Bug: chromium:119942,webrtc:10341
Change-Id: I708e80e26d541dff8ca04b6da2d346a1d59cbfcb
Reviewed-on: https://webrtc-review.googlesource.com/c/src/+/126420
Commit-Queue: Gustaf Ullberg <gustaf@webrtc.org>
Reviewed-by: Sam Zackrisson <saza@webrtc.org>
Reviewed-by: Per Ã…hgren <peah@webrtc.org>
Reviewed-by: Jesus de Vicente Pena <devicentepena@webrtc.org>
Cr-Commit-Position: refs/heads/master@{#27126}
diff --git a/api/audio/echo_canceller3_config.cc b/api/audio/echo_canceller3_config.cc
index c3053a4..36bb6fd 100644
--- a/api/audio/echo_canceller3_config.cc
+++ b/api/audio/echo_canceller3_config.cc
@@ -87,16 +87,11 @@
     c->delay.down_sampling_factor = 4;
     res = false;
   }
-  if (c->delay.delay_headroom_blocks <= 1 &&
-      c->delay.hysteresis_limit_2_blocks == 1) {
-    c->delay.hysteresis_limit_2_blocks = 0;
-    res = false;
-  }
+
   res = res & Limit(&c->delay.default_delay, 0, 5000);
   res = res & Limit(&c->delay.num_filters, 0, 5000);
-  res = res & Limit(&c->delay.delay_headroom_blocks, 0, 5000);
-  res = res & Limit(&c->delay.hysteresis_limit_1_blocks, 0, 5000);
-  res = res & Limit(&c->delay.hysteresis_limit_2_blocks, 0, 5000);
+  res = res & Limit(&c->delay.delay_headroom_samples, 0, 5000);
+  res = res & Limit(&c->delay.hysteresis_limit_blocks, 0, 5000);
   res = res & Limit(&c->delay.fixed_capture_delay_samples, 0, 5000);
   res = res & Limit(&c->delay.delay_estimate_smoothing, 0.f, 1.f);
   res = res & Limit(&c->delay.delay_candidate_detection_threshold, 0.f, 1.f);
@@ -239,12 +234,6 @@
 
   res = res & Limit(&c->suppressor.floor_first_increase, 0.f, 1000000.f);
 
-  if (c->delay.delay_headroom_blocks >
-      c->filter.main_initial.length_blocks - 1) {
-    c->delay.delay_headroom_blocks = c->filter.main_initial.length_blocks - 1;
-    res = false;
-  }
-
   return res;
 }
 }  // namespace webrtc
diff --git a/api/audio/echo_canceller3_config.h b/api/audio/echo_canceller3_config.h
index 1115340..4ae87d0 100644
--- a/api/audio/echo_canceller3_config.h
+++ b/api/audio/echo_canceller3_config.h
@@ -37,9 +37,8 @@
     size_t default_delay = 5;
     size_t down_sampling_factor = 4;
     size_t num_filters = 5;
-    size_t delay_headroom_blocks = 2;
-    size_t hysteresis_limit_1_blocks = 1;
-    size_t hysteresis_limit_2_blocks = 1;
+    size_t delay_headroom_samples = 32;
+    size_t hysteresis_limit_blocks = 1;
     size_t fixed_capture_delay_samples = 0;
     float delay_estimate_smoothing = 0.7f;
     float delay_candidate_detection_threshold = 0.2f;
diff --git a/api/audio/echo_canceller3_config_json.cc b/api/audio/echo_canceller3_config_json.cc
index da535c1..9aabc2d 100644
--- a/api/audio/echo_canceller3_config_json.cc
+++ b/api/audio/echo_canceller3_config_json.cc
@@ -148,12 +148,10 @@
     ReadParam(section, "default_delay", &cfg.delay.default_delay);
     ReadParam(section, "down_sampling_factor", &cfg.delay.down_sampling_factor);
     ReadParam(section, "num_filters", &cfg.delay.num_filters);
-    ReadParam(section, "delay_headroom_blocks",
-              &cfg.delay.delay_headroom_blocks);
-    ReadParam(section, "hysteresis_limit_1_blocks",
-              &cfg.delay.hysteresis_limit_1_blocks);
-    ReadParam(section, "hysteresis_limit_2_blocks",
-              &cfg.delay.hysteresis_limit_2_blocks);
+    ReadParam(section, "delay_headroom_samples",
+              &cfg.delay.delay_headroom_samples);
+    ReadParam(section, "hysteresis_limit_blocks",
+              &cfg.delay.hysteresis_limit_blocks);
     ReadParam(section, "fixed_capture_delay_samples",
               &cfg.delay.fixed_capture_delay_samples);
     ReadParam(section, "delay_estimate_smoothing",
@@ -344,12 +342,10 @@
   ost << "\"down_sampling_factor\": " << config.delay.down_sampling_factor
       << ",";
   ost << "\"num_filters\": " << config.delay.num_filters << ",";
-  ost << "\"delay_headroom_blocks\": " << config.delay.delay_headroom_blocks
+  ost << "\"delay_headroom_samples\": " << config.delay.delay_headroom_samples
       << ",";
-  ost << "\"hysteresis_limit_1_blocks\": "
-      << config.delay.hysteresis_limit_1_blocks << ",";
-  ost << "\"hysteresis_limit_2_blocks\": "
-      << config.delay.hysteresis_limit_2_blocks << ",";
+  ost << "\"hysteresis_limit_blocks\": " << config.delay.hysteresis_limit_blocks
+      << ",";
   ost << "\"fixed_capture_delay_samples\": "
       << config.delay.fixed_capture_delay_samples << ",";
   ost << "\"delay_estimate_smoothing\": "
diff --git a/modules/audio_processing/aec3/aec_state.cc b/modules/audio_processing/aec3/aec_state.cc
index bd5985a..99791a7 100644
--- a/modules/audio_processing/aec3/aec_state.cc
+++ b/modules/audio_processing/aec3/aec_state.cc
@@ -251,7 +251,7 @@
 }
 
 AecState::FilterDelay::FilterDelay(const EchoCanceller3Config& config)
-    : delay_headroom_blocks_(config.delay.delay_headroom_blocks) {}
+    : delay_headroom_samples_(config.delay.delay_headroom_samples) {}
 
 void AecState::FilterDelay::Update(
     const FilterAnalyzer& filter_analyzer,
@@ -269,7 +269,7 @@
   const bool delay_estimator_may_not_have_converged =
       blocks_with_proper_filter_adaptation < 2 * kNumBlocksPerSecond;
   if (delay_estimator_may_not_have_converged && external_delay_) {
-    filter_delay_blocks_ = delay_headroom_blocks_;
+    filter_delay_blocks_ = delay_headroom_samples_ / kBlockSize;
   } else {
     filter_delay_blocks_ = filter_analyzer.DelayBlocks();
   }
diff --git a/modules/audio_processing/aec3/aec_state.h b/modules/audio_processing/aec3/aec_state.h
index f511429..e323b2c 100644
--- a/modules/audio_processing/aec3/aec_state.h
+++ b/modules/audio_processing/aec3/aec_state.h
@@ -192,7 +192,7 @@
                 size_t blocks_with_proper_filter_adaptation);
 
    private:
-    const int delay_headroom_blocks_;
+    const int delay_headroom_samples_;
     bool external_delay_reported_ = false;
     int filter_delay_blocks_ = 0;
     absl::optional<DelayEstimate> external_delay_;
diff --git a/modules/audio_processing/aec3/echo_canceller3.cc b/modules/audio_processing/aec3/echo_canceller3.cc
index f2daf57..8a4d8c2 100644
--- a/modules/audio_processing/aec3/echo_canceller3.cc
+++ b/modules/audio_processing/aec3/echo_canceller3.cc
@@ -15,6 +15,7 @@
 #include "modules/audio_processing/aec3/aec3_common.h"
 #include "modules/audio_processing/logging/apm_data_dumper.h"
 #include "rtc_base/atomic_ops.h"
+#include "system_wrappers/include/field_trial.h"
 
 namespace webrtc {
 
@@ -34,6 +35,12 @@
 // Method for adjusting config parameter dependencies..
 EchoCanceller3Config AdjustConfig(const EchoCanceller3Config& config) {
   EchoCanceller3Config adjusted_cfg = config;
+
+  if (field_trial::IsEnabled("WebRTC-Aec3ShortHeadroomKillSwitch")) {
+    // Two blocks headroom.
+    adjusted_cfg.delay.delay_headroom_samples = kBlockSize * 2;
+  }
+
   return adjusted_cfg;
 }
 
diff --git a/modules/audio_processing/aec3/erle_estimator_unittest.cc b/modules/audio_processing/aec3/erle_estimator_unittest.cc
index 6113a9e..5ef4f24 100644
--- a/modules/audio_processing/aec3/erle_estimator_unittest.cc
+++ b/modules/audio_processing/aec3/erle_estimator_unittest.cc
@@ -86,8 +86,8 @@
 
 void GetFilterFreq(std::vector<std::array<float, kFftLengthBy2Plus1>>&
                        filter_frequency_response,
-                   size_t delay_headroom_blocks) {
-  RTC_DCHECK_GE(filter_frequency_response.size(), delay_headroom_blocks);
+                   size_t delay_headroom_samples) {
+  const size_t delay_headroom_blocks = delay_headroom_samples / kBlockSize;
   for (auto& block_freq_resp : filter_frequency_response) {
     block_freq_resp.fill(0.f);
   }
@@ -110,7 +110,7 @@
   std::unique_ptr<RenderDelayBuffer> render_delay_buffer(
       RenderDelayBuffer::Create(config, 3));
 
-  GetFilterFreq(filter_frequency_response, config.delay.delay_headroom_blocks);
+  GetFilterFreq(filter_frequency_response, config.delay.delay_headroom_samples);
 
   ErleEstimator estimator(0, config);
 
@@ -154,7 +154,7 @@
   std::unique_ptr<RenderDelayBuffer> render_delay_buffer(
       RenderDelayBuffer::Create(config, 3));
 
-  GetFilterFreq(filter_frequency_response, config.delay.delay_headroom_blocks);
+  GetFilterFreq(filter_frequency_response, config.delay.delay_headroom_samples);
 
   ErleEstimator estimator(0, config);
 
diff --git a/modules/audio_processing/aec3/render_delay_controller.cc b/modules/audio_processing/aec3/render_delay_controller.cc
index 403c734..4f9fa8e 100644
--- a/modules/audio_processing/aec3/render_delay_controller.cc
+++ b/modules/audio_processing/aec3/render_delay_controller.cc
@@ -46,9 +46,8 @@
  private:
   static int instance_count_;
   std::unique_ptr<ApmDataDumper> data_dumper_;
-  const int delay_headroom_blocks_;
-  const int hysteresis_limit_1_blocks_;
-  const int hysteresis_limit_2_blocks_;
+  const int hysteresis_limit_blocks_;
+  const int delay_headroom_samples_;
   absl::optional<DelayEstimate> delay_;
   EchoPathDelayEstimator delay_estimator_;
   RenderDelayControllerMetrics metrics_;
@@ -61,32 +60,22 @@
 
 DelayEstimate ComputeBufferDelay(
     const absl::optional<DelayEstimate>& current_delay,
-    int delay_headroom_blocks,
-    int hysteresis_limit_1_blocks,
-    int hysteresis_limit_2_blocks,
+    int hysteresis_limit_blocks,
+    int delay_headroom_samples,
     DelayEstimate estimated_delay) {
-  // The below division is not exact and the truncation is intended.
-  const int echo_path_delay_blocks = estimated_delay.delay >> kBlockSizeLog2;
+  // Subtract delay headroom.
+  const int delay_with_headroom_samples = std::max(
+      static_cast<int>(estimated_delay.delay) - delay_headroom_samples, 0);
 
   // Compute the buffer delay increase required to achieve the desired latency.
-  size_t new_delay_blocks =
-      std::max(echo_path_delay_blocks - delay_headroom_blocks, 0);
+  size_t new_delay_blocks = delay_with_headroom_samples >> kBlockSizeLog2;
 
   // Add hysteresis.
   if (current_delay) {
     size_t current_delay_blocks = current_delay->delay;
-    if (new_delay_blocks > current_delay_blocks) {
-      if (new_delay_blocks <=
-          current_delay_blocks + hysteresis_limit_1_blocks) {
-        new_delay_blocks = current_delay_blocks;
-      }
-    } else if (new_delay_blocks < current_delay_blocks) {
-      size_t hysteresis_limit = std::max(
-          static_cast<int>(current_delay_blocks) - hysteresis_limit_2_blocks,
-          0);
-      if (new_delay_blocks >= hysteresis_limit) {
-        new_delay_blocks = current_delay_blocks;
-      }
+    if (new_delay_blocks > current_delay_blocks &&
+        new_delay_blocks <= current_delay_blocks + hysteresis_limit_blocks) {
+      new_delay_blocks = current_delay_blocks;
     }
   }
 
@@ -102,12 +91,9 @@
     int sample_rate_hz)
     : data_dumper_(
           new ApmDataDumper(rtc::AtomicOps::Increment(&instance_count_))),
-      delay_headroom_blocks_(
-          static_cast<int>(config.delay.delay_headroom_blocks)),
-      hysteresis_limit_1_blocks_(
-          static_cast<int>(config.delay.hysteresis_limit_1_blocks)),
-      hysteresis_limit_2_blocks_(
-          static_cast<int>(config.delay.hysteresis_limit_2_blocks)),
+      hysteresis_limit_blocks_(
+          static_cast<int>(config.delay.hysteresis_limit_blocks)),
+      delay_headroom_samples_(config.delay.delay_headroom_samples),
       delay_estimator_(data_dumper_.get(), config),
       last_delay_estimate_quality_(DelayEstimate::Quality::kCoarse) {
   RTC_DCHECK(ValidFullBandRate(sample_rate_hz));
@@ -177,10 +163,9 @@
     const bool use_hysteresis =
         last_delay_estimate_quality_ == DelayEstimate::Quality::kRefined &&
         delay_samples_->quality == DelayEstimate::Quality::kRefined;
-    delay_ = ComputeBufferDelay(delay_, delay_headroom_blocks_,
-                                use_hysteresis ? hysteresis_limit_1_blocks_ : 0,
-                                use_hysteresis ? hysteresis_limit_2_blocks_ : 0,
-                                *delay_samples_);
+    delay_ = ComputeBufferDelay(delay_,
+                                use_hysteresis ? hysteresis_limit_blocks_ : 0,
+                                delay_headroom_samples_, *delay_samples_);
     last_delay_estimate_quality_ = delay_samples_->quality;
   }
 
diff --git a/modules/audio_processing/aec3/signal_dependent_erle_estimator.cc b/modules/audio_processing/aec3/signal_dependent_erle_estimator.cc
index 1a01c6d..6a8d7e3 100644
--- a/modules/audio_processing/aec3/signal_dependent_erle_estimator.cc
+++ b/modules/audio_processing/aec3/signal_dependent_erle_estimator.cc
@@ -122,7 +122,7 @@
     : min_erle_(config.erle.min),
       num_sections_(config.erle.num_sections),
       num_blocks_(config.filter.main.length_blocks),
-      delay_headroom_blocks_(config.delay.delay_headroom_blocks),
+      delay_headroom_blocks_(config.delay.delay_headroom_samples / kBlockSize),
       band_to_subband_(FormSubbandMap()),
       max_erle_(SetMaxErleSubbands(config.erle.max_l,
                                    config.erle.max_h,
diff --git a/modules/audio_processing/aec3/signal_dependent_erle_estimator_unittest.cc b/modules/audio_processing/aec3/signal_dependent_erle_estimator_unittest.cc
index 6ce4e97..4e62c94 100644
--- a/modules/audio_processing/aec3/signal_dependent_erle_estimator_unittest.cc
+++ b/modules/audio_processing/aec3/signal_dependent_erle_estimator_unittest.cc
@@ -115,7 +115,7 @@
         cfg.filter.main.length_blocks = blocks;
         cfg.filter.main_initial.length_blocks =
             std::min(cfg.filter.main_initial.length_blocks, blocks);
-        cfg.delay.delay_headroom_blocks = delay_headroom;
+        cfg.delay.delay_headroom_samples = delay_headroom * kBlockSize;
         cfg.erle.num_sections = num_sections;
         if (EchoCanceller3Config::Validate(&cfg)) {
           SignalDependentErleEstimator s(cfg);
@@ -137,9 +137,8 @@
   EchoCanceller3Config cfg;
   cfg.filter.main.length_blocks = 2;
   cfg.filter.main_initial.length_blocks = 1;
-  cfg.delay.delay_headroom_blocks = 0;
-  cfg.delay.hysteresis_limit_1_blocks = 0;
-  cfg.delay.hysteresis_limit_2_blocks = 0;
+  cfg.delay.delay_headroom_samples = 0;
+  cfg.delay.hysteresis_limit_blocks = 0;
   cfg.erle.num_sections = 2;
   EXPECT_EQ(EchoCanceller3Config::Validate(&cfg), true);
   std::array<float, kFftLengthBy2Plus1> average_erle;
diff --git a/test/fuzzers/audio_processing_configs_fuzzer.cc b/test/fuzzers/audio_processing_configs_fuzzer.cc
index 4346ffb..cf41da2 100644
--- a/test/fuzzers/audio_processing_configs_fuzzer.cc
+++ b/test/fuzzers/audio_processing_configs_fuzzer.cc
@@ -29,6 +29,7 @@
     "WebRTC-Audio-Agc2ForceExtraSaturationMargin",
     "WebRTC-Audio-Agc2ForceInitialSaturationMargin",
     "WebRTC-Aec3MinErleDuringOnsetsKillSwitch",
+    "WebRTC-Aec3ShortHeadroomKillSwitch",
 };
 
 std::unique_ptr<AudioProcessing> CreateApm(test::FuzzDataHelper* fuzz_data,