AEC3: Redesign delay headroom
This change reduces the risk of echo due to noise in the headroom
of the linear filter.
Changes:
- Use shorter delay headroom
- Delay headroom is specified in samples (not blocks)
- No hysteresis limit when delay is reduced
Bug: chromium:119942,webrtc:10341
Change-Id: I708e80e26d541dff8ca04b6da2d346a1d59cbfcb
Reviewed-on: https://webrtc-review.googlesource.com/c/src/+/126420
Commit-Queue: Gustaf Ullberg <gustaf@webrtc.org>
Reviewed-by: Sam Zackrisson <saza@webrtc.org>
Reviewed-by: Per Ã…hgren <peah@webrtc.org>
Reviewed-by: Jesus de Vicente Pena <devicentepena@webrtc.org>
Cr-Commit-Position: refs/heads/master@{#27126}
diff --git a/api/audio/echo_canceller3_config.cc b/api/audio/echo_canceller3_config.cc
index c3053a4..36bb6fd 100644
--- a/api/audio/echo_canceller3_config.cc
+++ b/api/audio/echo_canceller3_config.cc
@@ -87,16 +87,11 @@
c->delay.down_sampling_factor = 4;
res = false;
}
- if (c->delay.delay_headroom_blocks <= 1 &&
- c->delay.hysteresis_limit_2_blocks == 1) {
- c->delay.hysteresis_limit_2_blocks = 0;
- res = false;
- }
+
res = res & Limit(&c->delay.default_delay, 0, 5000);
res = res & Limit(&c->delay.num_filters, 0, 5000);
- res = res & Limit(&c->delay.delay_headroom_blocks, 0, 5000);
- res = res & Limit(&c->delay.hysteresis_limit_1_blocks, 0, 5000);
- res = res & Limit(&c->delay.hysteresis_limit_2_blocks, 0, 5000);
+ res = res & Limit(&c->delay.delay_headroom_samples, 0, 5000);
+ res = res & Limit(&c->delay.hysteresis_limit_blocks, 0, 5000);
res = res & Limit(&c->delay.fixed_capture_delay_samples, 0, 5000);
res = res & Limit(&c->delay.delay_estimate_smoothing, 0.f, 1.f);
res = res & Limit(&c->delay.delay_candidate_detection_threshold, 0.f, 1.f);
@@ -239,12 +234,6 @@
res = res & Limit(&c->suppressor.floor_first_increase, 0.f, 1000000.f);
- if (c->delay.delay_headroom_blocks >
- c->filter.main_initial.length_blocks - 1) {
- c->delay.delay_headroom_blocks = c->filter.main_initial.length_blocks - 1;
- res = false;
- }
-
return res;
}
} // namespace webrtc
diff --git a/api/audio/echo_canceller3_config.h b/api/audio/echo_canceller3_config.h
index 1115340..4ae87d0 100644
--- a/api/audio/echo_canceller3_config.h
+++ b/api/audio/echo_canceller3_config.h
@@ -37,9 +37,8 @@
size_t default_delay = 5;
size_t down_sampling_factor = 4;
size_t num_filters = 5;
- size_t delay_headroom_blocks = 2;
- size_t hysteresis_limit_1_blocks = 1;
- size_t hysteresis_limit_2_blocks = 1;
+ size_t delay_headroom_samples = 32;
+ size_t hysteresis_limit_blocks = 1;
size_t fixed_capture_delay_samples = 0;
float delay_estimate_smoothing = 0.7f;
float delay_candidate_detection_threshold = 0.2f;
diff --git a/api/audio/echo_canceller3_config_json.cc b/api/audio/echo_canceller3_config_json.cc
index da535c1..9aabc2d 100644
--- a/api/audio/echo_canceller3_config_json.cc
+++ b/api/audio/echo_canceller3_config_json.cc
@@ -148,12 +148,10 @@
ReadParam(section, "default_delay", &cfg.delay.default_delay);
ReadParam(section, "down_sampling_factor", &cfg.delay.down_sampling_factor);
ReadParam(section, "num_filters", &cfg.delay.num_filters);
- ReadParam(section, "delay_headroom_blocks",
- &cfg.delay.delay_headroom_blocks);
- ReadParam(section, "hysteresis_limit_1_blocks",
- &cfg.delay.hysteresis_limit_1_blocks);
- ReadParam(section, "hysteresis_limit_2_blocks",
- &cfg.delay.hysteresis_limit_2_blocks);
+ ReadParam(section, "delay_headroom_samples",
+ &cfg.delay.delay_headroom_samples);
+ ReadParam(section, "hysteresis_limit_blocks",
+ &cfg.delay.hysteresis_limit_blocks);
ReadParam(section, "fixed_capture_delay_samples",
&cfg.delay.fixed_capture_delay_samples);
ReadParam(section, "delay_estimate_smoothing",
@@ -344,12 +342,10 @@
ost << "\"down_sampling_factor\": " << config.delay.down_sampling_factor
<< ",";
ost << "\"num_filters\": " << config.delay.num_filters << ",";
- ost << "\"delay_headroom_blocks\": " << config.delay.delay_headroom_blocks
+ ost << "\"delay_headroom_samples\": " << config.delay.delay_headroom_samples
<< ",";
- ost << "\"hysteresis_limit_1_blocks\": "
- << config.delay.hysteresis_limit_1_blocks << ",";
- ost << "\"hysteresis_limit_2_blocks\": "
- << config.delay.hysteresis_limit_2_blocks << ",";
+ ost << "\"hysteresis_limit_blocks\": " << config.delay.hysteresis_limit_blocks
+ << ",";
ost << "\"fixed_capture_delay_samples\": "
<< config.delay.fixed_capture_delay_samples << ",";
ost << "\"delay_estimate_smoothing\": "
diff --git a/modules/audio_processing/aec3/aec_state.cc b/modules/audio_processing/aec3/aec_state.cc
index bd5985a..99791a7 100644
--- a/modules/audio_processing/aec3/aec_state.cc
+++ b/modules/audio_processing/aec3/aec_state.cc
@@ -251,7 +251,7 @@
}
AecState::FilterDelay::FilterDelay(const EchoCanceller3Config& config)
- : delay_headroom_blocks_(config.delay.delay_headroom_blocks) {}
+ : delay_headroom_samples_(config.delay.delay_headroom_samples) {}
void AecState::FilterDelay::Update(
const FilterAnalyzer& filter_analyzer,
@@ -269,7 +269,7 @@
const bool delay_estimator_may_not_have_converged =
blocks_with_proper_filter_adaptation < 2 * kNumBlocksPerSecond;
if (delay_estimator_may_not_have_converged && external_delay_) {
- filter_delay_blocks_ = delay_headroom_blocks_;
+ filter_delay_blocks_ = delay_headroom_samples_ / kBlockSize;
} else {
filter_delay_blocks_ = filter_analyzer.DelayBlocks();
}
diff --git a/modules/audio_processing/aec3/aec_state.h b/modules/audio_processing/aec3/aec_state.h
index f511429..e323b2c 100644
--- a/modules/audio_processing/aec3/aec_state.h
+++ b/modules/audio_processing/aec3/aec_state.h
@@ -192,7 +192,7 @@
size_t blocks_with_proper_filter_adaptation);
private:
- const int delay_headroom_blocks_;
+ const int delay_headroom_samples_;
bool external_delay_reported_ = false;
int filter_delay_blocks_ = 0;
absl::optional<DelayEstimate> external_delay_;
diff --git a/modules/audio_processing/aec3/echo_canceller3.cc b/modules/audio_processing/aec3/echo_canceller3.cc
index f2daf57..8a4d8c2 100644
--- a/modules/audio_processing/aec3/echo_canceller3.cc
+++ b/modules/audio_processing/aec3/echo_canceller3.cc
@@ -15,6 +15,7 @@
#include "modules/audio_processing/aec3/aec3_common.h"
#include "modules/audio_processing/logging/apm_data_dumper.h"
#include "rtc_base/atomic_ops.h"
+#include "system_wrappers/include/field_trial.h"
namespace webrtc {
@@ -34,6 +35,12 @@
// Method for adjusting config parameter dependencies..
EchoCanceller3Config AdjustConfig(const EchoCanceller3Config& config) {
EchoCanceller3Config adjusted_cfg = config;
+
+ if (field_trial::IsEnabled("WebRTC-Aec3ShortHeadroomKillSwitch")) {
+ // Two blocks headroom.
+ adjusted_cfg.delay.delay_headroom_samples = kBlockSize * 2;
+ }
+
return adjusted_cfg;
}
diff --git a/modules/audio_processing/aec3/erle_estimator_unittest.cc b/modules/audio_processing/aec3/erle_estimator_unittest.cc
index 6113a9e..5ef4f24 100644
--- a/modules/audio_processing/aec3/erle_estimator_unittest.cc
+++ b/modules/audio_processing/aec3/erle_estimator_unittest.cc
@@ -86,8 +86,8 @@
void GetFilterFreq(std::vector<std::array<float, kFftLengthBy2Plus1>>&
filter_frequency_response,
- size_t delay_headroom_blocks) {
- RTC_DCHECK_GE(filter_frequency_response.size(), delay_headroom_blocks);
+ size_t delay_headroom_samples) {
+ const size_t delay_headroom_blocks = delay_headroom_samples / kBlockSize;
for (auto& block_freq_resp : filter_frequency_response) {
block_freq_resp.fill(0.f);
}
@@ -110,7 +110,7 @@
std::unique_ptr<RenderDelayBuffer> render_delay_buffer(
RenderDelayBuffer::Create(config, 3));
- GetFilterFreq(filter_frequency_response, config.delay.delay_headroom_blocks);
+ GetFilterFreq(filter_frequency_response, config.delay.delay_headroom_samples);
ErleEstimator estimator(0, config);
@@ -154,7 +154,7 @@
std::unique_ptr<RenderDelayBuffer> render_delay_buffer(
RenderDelayBuffer::Create(config, 3));
- GetFilterFreq(filter_frequency_response, config.delay.delay_headroom_blocks);
+ GetFilterFreq(filter_frequency_response, config.delay.delay_headroom_samples);
ErleEstimator estimator(0, config);
diff --git a/modules/audio_processing/aec3/render_delay_controller.cc b/modules/audio_processing/aec3/render_delay_controller.cc
index 403c734..4f9fa8e 100644
--- a/modules/audio_processing/aec3/render_delay_controller.cc
+++ b/modules/audio_processing/aec3/render_delay_controller.cc
@@ -46,9 +46,8 @@
private:
static int instance_count_;
std::unique_ptr<ApmDataDumper> data_dumper_;
- const int delay_headroom_blocks_;
- const int hysteresis_limit_1_blocks_;
- const int hysteresis_limit_2_blocks_;
+ const int hysteresis_limit_blocks_;
+ const int delay_headroom_samples_;
absl::optional<DelayEstimate> delay_;
EchoPathDelayEstimator delay_estimator_;
RenderDelayControllerMetrics metrics_;
@@ -61,32 +60,22 @@
DelayEstimate ComputeBufferDelay(
const absl::optional<DelayEstimate>& current_delay,
- int delay_headroom_blocks,
- int hysteresis_limit_1_blocks,
- int hysteresis_limit_2_blocks,
+ int hysteresis_limit_blocks,
+ int delay_headroom_samples,
DelayEstimate estimated_delay) {
- // The below division is not exact and the truncation is intended.
- const int echo_path_delay_blocks = estimated_delay.delay >> kBlockSizeLog2;
+ // Subtract delay headroom.
+ const int delay_with_headroom_samples = std::max(
+ static_cast<int>(estimated_delay.delay) - delay_headroom_samples, 0);
// Compute the buffer delay increase required to achieve the desired latency.
- size_t new_delay_blocks =
- std::max(echo_path_delay_blocks - delay_headroom_blocks, 0);
+ size_t new_delay_blocks = delay_with_headroom_samples >> kBlockSizeLog2;
// Add hysteresis.
if (current_delay) {
size_t current_delay_blocks = current_delay->delay;
- if (new_delay_blocks > current_delay_blocks) {
- if (new_delay_blocks <=
- current_delay_blocks + hysteresis_limit_1_blocks) {
- new_delay_blocks = current_delay_blocks;
- }
- } else if (new_delay_blocks < current_delay_blocks) {
- size_t hysteresis_limit = std::max(
- static_cast<int>(current_delay_blocks) - hysteresis_limit_2_blocks,
- 0);
- if (new_delay_blocks >= hysteresis_limit) {
- new_delay_blocks = current_delay_blocks;
- }
+ if (new_delay_blocks > current_delay_blocks &&
+ new_delay_blocks <= current_delay_blocks + hysteresis_limit_blocks) {
+ new_delay_blocks = current_delay_blocks;
}
}
@@ -102,12 +91,9 @@
int sample_rate_hz)
: data_dumper_(
new ApmDataDumper(rtc::AtomicOps::Increment(&instance_count_))),
- delay_headroom_blocks_(
- static_cast<int>(config.delay.delay_headroom_blocks)),
- hysteresis_limit_1_blocks_(
- static_cast<int>(config.delay.hysteresis_limit_1_blocks)),
- hysteresis_limit_2_blocks_(
- static_cast<int>(config.delay.hysteresis_limit_2_blocks)),
+ hysteresis_limit_blocks_(
+ static_cast<int>(config.delay.hysteresis_limit_blocks)),
+ delay_headroom_samples_(config.delay.delay_headroom_samples),
delay_estimator_(data_dumper_.get(), config),
last_delay_estimate_quality_(DelayEstimate::Quality::kCoarse) {
RTC_DCHECK(ValidFullBandRate(sample_rate_hz));
@@ -177,10 +163,9 @@
const bool use_hysteresis =
last_delay_estimate_quality_ == DelayEstimate::Quality::kRefined &&
delay_samples_->quality == DelayEstimate::Quality::kRefined;
- delay_ = ComputeBufferDelay(delay_, delay_headroom_blocks_,
- use_hysteresis ? hysteresis_limit_1_blocks_ : 0,
- use_hysteresis ? hysteresis_limit_2_blocks_ : 0,
- *delay_samples_);
+ delay_ = ComputeBufferDelay(delay_,
+ use_hysteresis ? hysteresis_limit_blocks_ : 0,
+ delay_headroom_samples_, *delay_samples_);
last_delay_estimate_quality_ = delay_samples_->quality;
}
diff --git a/modules/audio_processing/aec3/signal_dependent_erle_estimator.cc b/modules/audio_processing/aec3/signal_dependent_erle_estimator.cc
index 1a01c6d..6a8d7e3 100644
--- a/modules/audio_processing/aec3/signal_dependent_erle_estimator.cc
+++ b/modules/audio_processing/aec3/signal_dependent_erle_estimator.cc
@@ -122,7 +122,7 @@
: min_erle_(config.erle.min),
num_sections_(config.erle.num_sections),
num_blocks_(config.filter.main.length_blocks),
- delay_headroom_blocks_(config.delay.delay_headroom_blocks),
+ delay_headroom_blocks_(config.delay.delay_headroom_samples / kBlockSize),
band_to_subband_(FormSubbandMap()),
max_erle_(SetMaxErleSubbands(config.erle.max_l,
config.erle.max_h,
diff --git a/modules/audio_processing/aec3/signal_dependent_erle_estimator_unittest.cc b/modules/audio_processing/aec3/signal_dependent_erle_estimator_unittest.cc
index 6ce4e97..4e62c94 100644
--- a/modules/audio_processing/aec3/signal_dependent_erle_estimator_unittest.cc
+++ b/modules/audio_processing/aec3/signal_dependent_erle_estimator_unittest.cc
@@ -115,7 +115,7 @@
cfg.filter.main.length_blocks = blocks;
cfg.filter.main_initial.length_blocks =
std::min(cfg.filter.main_initial.length_blocks, blocks);
- cfg.delay.delay_headroom_blocks = delay_headroom;
+ cfg.delay.delay_headroom_samples = delay_headroom * kBlockSize;
cfg.erle.num_sections = num_sections;
if (EchoCanceller3Config::Validate(&cfg)) {
SignalDependentErleEstimator s(cfg);
@@ -137,9 +137,8 @@
EchoCanceller3Config cfg;
cfg.filter.main.length_blocks = 2;
cfg.filter.main_initial.length_blocks = 1;
- cfg.delay.delay_headroom_blocks = 0;
- cfg.delay.hysteresis_limit_1_blocks = 0;
- cfg.delay.hysteresis_limit_2_blocks = 0;
+ cfg.delay.delay_headroom_samples = 0;
+ cfg.delay.hysteresis_limit_blocks = 0;
cfg.erle.num_sections = 2;
EXPECT_EQ(EchoCanceller3Config::Validate(&cfg), true);
std::array<float, kFftLengthBy2Plus1> average_erle;
diff --git a/test/fuzzers/audio_processing_configs_fuzzer.cc b/test/fuzzers/audio_processing_configs_fuzzer.cc
index 4346ffb..cf41da2 100644
--- a/test/fuzzers/audio_processing_configs_fuzzer.cc
+++ b/test/fuzzers/audio_processing_configs_fuzzer.cc
@@ -29,6 +29,7 @@
"WebRTC-Audio-Agc2ForceExtraSaturationMargin",
"WebRTC-Audio-Agc2ForceInitialSaturationMargin",
"WebRTC-Aec3MinErleDuringOnsetsKillSwitch",
+ "WebRTC-Aec3ShortHeadroomKillSwitch",
};
std::unique_ptr<AudioProcessing> CreateApm(test::FuzzDataHelper* fuzz_data,