Add a specific AEC3 behavior for setups with known clock-drift
TBR=gustaf@webrtc.org
Change-Id: I9c726fc8e1b010255a1bee166c99fe6cb75d7658
Bug: chromium:826655,webrtc:9079
Reviewed-on: https://webrtc-review.googlesource.com/64982
Commit-Queue: Per Åhgren <peah@webrtc.org>
Reviewed-by: Gustaf Ullberg <gustaf@webrtc.org>
Cr-Commit-Position: refs/heads/master@{#22657}
diff --git a/api/audio/echo_canceller3_config.cc b/api/audio/echo_canceller3_config.cc
index d74d7a8..805a765 100644
--- a/api/audio/echo_canceller3_config.cc
+++ b/api/audio/echo_canceller3_config.cc
@@ -12,5 +12,7 @@
namespace webrtc {
EchoCanceller3Config::EchoCanceller3Config() = default;
+EchoCanceller3Config::EchoCanceller3Config(const EchoCanceller3Config& e) =
+ default;
} // namespace webrtc
diff --git a/api/audio/echo_canceller3_config.h b/api/audio/echo_canceller3_config.h
index 41b26d0..cf45255 100644
--- a/api/audio/echo_canceller3_config.h
+++ b/api/audio/echo_canceller3_config.h
@@ -18,6 +18,7 @@
// Configuration struct for EchoCanceller3
struct EchoCanceller3Config {
EchoCanceller3Config();
+ EchoCanceller3Config(const EchoCanceller3Config& e);
struct Delay {
size_t default_delay = 5;
size_t down_sampling_factor = 4;
@@ -119,6 +120,18 @@
bool has_clock_drift = false;
} echo_removal_control;
+
+ struct EchoModel {
+ size_t noise_floor_hold = 50;
+ float min_noise_floor_power = 1638400.f;
+ float stationary_gate_slope = 10.f;
+ float noise_gate_power = 27509.42f;
+ float noise_gate_slope = 0.3f;
+ size_t render_pre_window_size = 1;
+ size_t render_post_window_size = 3;
+ float nonlinear_hold = 2;
+ float nonlinear_release = 0.1f;
+ } echo_model;
};
} // namespace webrtc
diff --git a/modules/audio_processing/aec3/echo_canceller3.cc b/modules/audio_processing/aec3/echo_canceller3.cc
index f0cbbc8..a0cf4f9 100644
--- a/modules/audio_processing/aec3/echo_canceller3.cc
+++ b/modules/audio_processing/aec3/echo_canceller3.cc
@@ -13,6 +13,7 @@
#include "modules/audio_processing/logging/apm_data_dumper.h"
#include "rtc_base/atomicops.h"
+#include "rtc_base/logging.h"
namespace webrtc {
@@ -29,6 +30,43 @@
return false;
}
+// Method for adjusting config parameter dependencies..
+EchoCanceller3Config AdjustConfig(const EchoCanceller3Config& config) {
+ EchoCanceller3Config adjusted_cfg = config;
+
+ // Use customized parameters when the system has clock-drift.
+ if (config.echo_removal_control.has_clock_drift) {
+ RTC_LOG(LS_WARNING)
+ << "Customizing parameters to work well for the clock-drift case.";
+ if (config.ep_strength.bounded_erl) {
+ adjusted_cfg.ep_strength.default_len = 0.85f;
+ adjusted_cfg.ep_strength.lf = 0.01f;
+ adjusted_cfg.ep_strength.mf = 0.01f;
+ adjusted_cfg.ep_strength.hf = 0.01f;
+ adjusted_cfg.echo_model.render_pre_window_size = 1;
+ adjusted_cfg.echo_model.render_post_window_size = 1;
+ adjusted_cfg.echo_model.nonlinear_hold = 3;
+ adjusted_cfg.echo_model.nonlinear_release = 0.001f;
+ } else {
+ adjusted_cfg.ep_strength.bounded_erl = true;
+ adjusted_cfg.delay.down_sampling_factor = 2;
+ adjusted_cfg.ep_strength.default_len = 0.8f;
+ adjusted_cfg.ep_strength.lf = 0.01f;
+ adjusted_cfg.ep_strength.mf = 0.01f;
+ adjusted_cfg.ep_strength.hf = 0.01f;
+ adjusted_cfg.filter.main = {30, 0.1f, 0.8f, 0.001f, 20075344.f};
+ adjusted_cfg.filter.shadow = {30, 0.7f, 20075344.f};
+ adjusted_cfg.filter.main_initial = {30, 0.1f, 1.5f, 0.001f, 20075344.f};
+ adjusted_cfg.filter.shadow_initial = {30, 0.9f, 20075344.f};
+ adjusted_cfg.echo_model.render_pre_window_size = 2;
+ adjusted_cfg.echo_model.render_post_window_size = 2;
+ adjusted_cfg.echo_model.nonlinear_hold = 3;
+ adjusted_cfg.echo_model.nonlinear_release = 0.6f;
+ }
+ }
+ return adjusted_cfg;
+}
+
void FillSubFrameView(AudioBuffer* frame,
size_t sub_frame_index,
std::vector<rtc::ArrayView<float>>* sub_frame_view) {
@@ -209,11 +247,12 @@
EchoCanceller3::EchoCanceller3(const EchoCanceller3Config& config,
int sample_rate_hz,
bool use_highpass_filter)
- : EchoCanceller3(config,
- sample_rate_hz,
- use_highpass_filter,
- std::unique_ptr<BlockProcessor>(
- BlockProcessor::Create(config, sample_rate_hz))) {}
+ : EchoCanceller3(
+ AdjustConfig(config),
+ sample_rate_hz,
+ use_highpass_filter,
+ std::unique_ptr<BlockProcessor>(
+ BlockProcessor::Create(AdjustConfig(config), sample_rate_hz))) {}
EchoCanceller3::EchoCanceller3(const EchoCanceller3Config& config,
int sample_rate_hz,
bool use_highpass_filter,
diff --git a/modules/audio_processing/aec3/residual_echo_estimator.cc b/modules/audio_processing/aec3/residual_echo_estimator.cc
index bf7e427..f534817 100644
--- a/modules/audio_processing/aec3/residual_echo_estimator.cc
+++ b/modules/audio_processing/aec3/residual_echo_estimator.cc
@@ -17,65 +17,6 @@
#include "rtc_base/checks.h"
namespace webrtc {
-namespace {
-
-// Estimates the echo generating signal power as gated maximal power over a time
-// window.
-void EchoGeneratingPower(const RenderBuffer& render_buffer,
- size_t min_delay,
- size_t max_delay,
- std::array<float, kFftLengthBy2Plus1>* X2) {
- X2->fill(0.f);
- for (size_t k = min_delay; k <= max_delay; ++k) {
- std::transform(X2->begin(), X2->end(), render_buffer.Spectrum(k).begin(),
- X2->begin(),
- [](float a, float b) { return std::max(a, b); });
- }
-
- // Apply soft noise gate of -78 dBFS.
- static constexpr float kNoiseGatePower = 27509.42f;
- std::for_each(X2->begin(), X2->end(), [](float& a) {
- if (kNoiseGatePower > a) {
- a = std::max(0.f, a - 0.3f * (kNoiseGatePower - a));
- }
- });
-}
-
-constexpr int kNoiseFloorCounterMax = 50;
-constexpr float kNoiseFloorMin = 10.f * 10.f * 128.f * 128.f;
-
-// Updates estimate for the power of the stationary noise component in the
-// render signal.
-void RenderNoisePower(
- const RenderBuffer& render_buffer,
- std::array<float, kFftLengthBy2Plus1>* X2_noise_floor,
- std::array<int, kFftLengthBy2Plus1>* X2_noise_floor_counter) {
- RTC_DCHECK(X2_noise_floor);
- RTC_DCHECK(X2_noise_floor_counter);
-
- const auto render_power = render_buffer.Spectrum(0);
- RTC_DCHECK_EQ(X2_noise_floor->size(), render_power.size());
- RTC_DCHECK_EQ(X2_noise_floor_counter->size(), render_power.size());
-
- // Estimate the stationary noise power in a minimum statistics manner.
- for (size_t k = 0; k < render_power.size(); ++k) {
- // Decrease rapidly.
- if (render_power[k] < (*X2_noise_floor)[k]) {
- (*X2_noise_floor)[k] = render_power[k];
- (*X2_noise_floor_counter)[k] = 0;
- } else {
- // Increase in a delayed, leaky manner.
- if ((*X2_noise_floor_counter)[k] >= kNoiseFloorCounterMax) {
- (*X2_noise_floor)[k] =
- std::max((*X2_noise_floor)[k] * 1.1f, kNoiseFloorMin);
- } else {
- ++(*X2_noise_floor_counter)[k];
- }
- }
- }
-}
-
-} // namespace
ResidualEchoEstimator::ResidualEchoEstimator(const EchoCanceller3Config& config)
: config_(config), S2_old_(config_.filter.main.length_blocks) {
@@ -112,15 +53,21 @@
std::array<float, kFftLengthBy2Plus1> X2;
// Computes the spectral power over the blocks surrounding the delay.
- EchoGeneratingPower(render_buffer,
- std::max(0, aec_state.FilterDelayBlocks() - 1),
- aec_state.FilterDelayBlocks() + 3, &X2);
+ size_t window_start = std::max(
+ 0, aec_state.FilterDelayBlocks() -
+ static_cast<int>(config_.echo_model.render_pre_window_size));
+ size_t window_end =
+ aec_state.FilterDelayBlocks() +
+ static_cast<int>(config_.echo_model.render_post_window_size);
+ EchoGeneratingPower(render_buffer, window_start, window_end, &X2);
// Subtract the stationary noise power to avoid stationary noise causing
// excessive echo suppression.
- std::transform(
- X2.begin(), X2.end(), X2_noise_floor_.begin(), X2.begin(),
- [](float a, float b) { return std::max(0.f, a - 10.f * b); });
+ std::transform(X2.begin(), X2.end(), X2_noise_floor_.begin(), X2.begin(),
+ [&](float a, float b) {
+ return std::max(
+ 0.f, a - config_.echo_model.stationary_gate_slope * b);
+ });
NonLinearEstimate(aec_state.SaturatedEcho(), aec_state.EchoPathGain(), X2,
Y2, R2);
@@ -144,8 +91,8 @@
}
void ResidualEchoEstimator::Reset() {
- X2_noise_floor_counter_.fill(kNoiseFloorCounterMax);
- X2_noise_floor_.fill(kNoiseFloorMin);
+ X2_noise_floor_counter_.fill(config_.echo_model.noise_floor_hold);
+ X2_noise_floor_.fill(config_.echo_model.min_noise_floor_power);
R2_reverb_.fill(0.f);
R2_old_.fill(0.f);
R2_hold_counter_.fill(0.f);
@@ -186,9 +133,12 @@
// Compute the residual echo by holding a maximum echo powers and an echo
// fading corresponding to a room with an RT60 value of about 50 ms.
- (*R2)[k] = R2_hold_counter_[k] < 2
- ? std::max((*R2)[k], R2_old_[k])
- : std::min((*R2)[k] + R2_old_[k] * 0.1f, Y2[k]);
+ (*R2)[k] =
+ R2_hold_counter_[k] < config_.echo_model.nonlinear_hold
+ ? std::max((*R2)[k], R2_old_[k])
+ : std::min(
+ (*R2)[k] + R2_old_[k] * config_.echo_model.nonlinear_release,
+ Y2[k]);
}
}
@@ -233,4 +183,56 @@
std::plus<float>());
}
+void ResidualEchoEstimator::EchoGeneratingPower(
+ const RenderBuffer& render_buffer,
+ size_t min_delay,
+ size_t max_delay,
+ std::array<float, kFftLengthBy2Plus1>* X2) const {
+ X2->fill(0.f);
+ for (size_t k = min_delay; k <= max_delay; ++k) {
+ std::transform(X2->begin(), X2->end(), render_buffer.Spectrum(k).begin(),
+ X2->begin(),
+ [](float a, float b) { return std::max(a, b); });
+ }
+
+ // Apply soft noise gate.
+ std::for_each(X2->begin(), X2->end(), [&](float& a) {
+ if (config_.echo_model.noise_gate_power > a) {
+ a = std::max(0.f, a - config_.echo_model.noise_gate_slope *
+ (config_.echo_model.noise_gate_power - a));
+ }
+ });
+}
+
+void ResidualEchoEstimator::RenderNoisePower(
+ const RenderBuffer& render_buffer,
+ std::array<float, kFftLengthBy2Plus1>* X2_noise_floor,
+ std::array<int, kFftLengthBy2Plus1>* X2_noise_floor_counter) const {
+ RTC_DCHECK(X2_noise_floor);
+ RTC_DCHECK(X2_noise_floor_counter);
+
+ const auto render_power = render_buffer.Spectrum(0);
+ RTC_DCHECK_EQ(X2_noise_floor->size(), render_power.size());
+ RTC_DCHECK_EQ(X2_noise_floor_counter->size(), render_power.size());
+
+ // Estimate the stationary noise power in a minimum statistics manner.
+ for (size_t k = 0; k < render_power.size(); ++k) {
+ // Decrease rapidly.
+ if (render_power[k] < (*X2_noise_floor)[k]) {
+ (*X2_noise_floor)[k] = render_power[k];
+ (*X2_noise_floor_counter)[k] = 0;
+ } else {
+ // Increase in a delayed, leaky manner.
+ if ((*X2_noise_floor_counter)[k] >=
+ static_cast<int>(config_.echo_model.noise_floor_hold)) {
+ (*X2_noise_floor)[k] =
+ std::max((*X2_noise_floor)[k] * 1.1f,
+ config_.echo_model.min_noise_floor_power);
+ } else {
+ ++(*X2_noise_floor_counter)[k];
+ }
+ }
+ }
+}
+
} // namespace webrtc
diff --git a/modules/audio_processing/aec3/residual_echo_estimator.h b/modules/audio_processing/aec3/residual_echo_estimator.h
index 3758114..1222d54 100644
--- a/modules/audio_processing/aec3/residual_echo_estimator.h
+++ b/modules/audio_processing/aec3/residual_echo_estimator.h
@@ -61,6 +61,21 @@
size_t delay,
float reverb_decay_factor,
std::array<float, kFftLengthBy2Plus1>* R2);
+
+ // Estimates the echo generating signal power as gated maximal power over a
+ // time window.
+ void EchoGeneratingPower(const RenderBuffer& render_buffer,
+ size_t min_delay,
+ size_t max_delay,
+ std::array<float, kFftLengthBy2Plus1>* X2) const;
+
+ // Updates estimate for the power of the stationary noise component in the
+ // render signal.
+ void RenderNoisePower(
+ const RenderBuffer& render_buffer,
+ std::array<float, kFftLengthBy2Plus1>* X2_noise_floor,
+ std::array<int, kFftLengthBy2Plus1>* X2_noise_floor_counter) const;
+
const EchoCanceller3Config config_;
std::array<float, kFftLengthBy2Plus1> R2_old_;
std::array<int, kFftLengthBy2Plus1> R2_hold_counter_;
diff --git a/modules/audio_processing/test/audio_processing_simulator.cc b/modules/audio_processing/test/audio_processing_simulator.cc
index f7b538b..d281964 100644
--- a/modules/audio_processing/test/audio_processing_simulator.cc
+++ b/modules/audio_processing/test/audio_processing_simulator.cc
@@ -248,6 +248,23 @@
&cfg.echo_removal_control.has_clock_drift);
}
+ if (rtc::GetValueFromJsonObject(root, "echo_model", §ion)) {
+ Json::Value subsection;
+ ReadParam(section, "noise_floor_hold", &cfg.echo_model.noise_floor_hold);
+ ReadParam(section, "min_noise_floor_power",
+ &cfg.echo_model.min_noise_floor_power);
+ ReadParam(section, "stationary_gate_slope",
+ &cfg.echo_model.stationary_gate_slope);
+ ReadParam(section, "noise_gate_power", &cfg.echo_model.noise_gate_power);
+ ReadParam(section, "noise_gate_slope", &cfg.echo_model.noise_gate_slope);
+ ReadParam(section, "render_pre_window_size",
+ &cfg.echo_model.render_pre_window_size);
+ ReadParam(section, "render_post_window_size",
+ &cfg.echo_model.render_post_window_size);
+ ReadParam(section, "nonlinear_hold", &cfg.echo_model.nonlinear_hold);
+ ReadParam(section, "nonlinear_release", &cfg.echo_model.nonlinear_release);
+ }
+
std::cout << std::endl;
return cfg;
}