AEC3: Avoid overcompensating for render onsets during dominant nearend
The ERLE is used to estimate residual echo for echo suppression. The
ERLE is reduced during far-end offset to avoid echo leakage. When there
is a strong near-end present this can cause unnecessary transparency loss.
This change adds an ERLE estimation that does not compensate for onsets and
uses it for residual echo estimation when the suppressor considers the near-end to be dominant.
Bug: webrtc:12686
Change-Id: Ida78eeacf1f95c6e62403f86ba3f2ff055898a84
Reviewed-on: https://webrtc-review.googlesource.com/c/src/+/215323
Commit-Queue: Gustaf Ullberg <gustaf@webrtc.org>
Reviewed-by: Jesus de Vicente Pena <devicentepena@webrtc.org>
Cr-Commit-Position: refs/heads/master@{#33786}
diff --git a/api/audio/echo_canceller3_config.h b/api/audio/echo_canceller3_config.h
index 2ccc9ac..8ffc3d9 100644
--- a/api/audio/echo_canceller3_config.h
+++ b/api/audio/echo_canceller3_config.h
@@ -109,6 +109,7 @@
float default_len = 0.83f;
bool echo_can_saturate = true;
bool bounded_erl = false;
+ bool erle_onset_compensation_in_dominant_nearend = false;
} ep_strength;
struct EchoAudibility {
diff --git a/api/audio/echo_canceller3_config_json.cc b/api/audio/echo_canceller3_config_json.cc
index 9e15e3a..89256b3 100644
--- a/api/audio/echo_canceller3_config_json.cc
+++ b/api/audio/echo_canceller3_config_json.cc
@@ -253,6 +253,8 @@
ReadParam(section, "default_len", &cfg.ep_strength.default_len);
ReadParam(section, "echo_can_saturate", &cfg.ep_strength.echo_can_saturate);
ReadParam(section, "bounded_erl", &cfg.ep_strength.bounded_erl);
+ ReadParam(section, "erle_onset_compensation_in_dominant_nearend",
+ &cfg.ep_strength.erle_onset_compensation_in_dominant_nearend);
}
if (rtc::GetValueFromJsonObject(aec3_root, "echo_audibility", §ion)) {
@@ -542,8 +544,11 @@
ost << "\"echo_can_saturate\": "
<< (config.ep_strength.echo_can_saturate ? "true" : "false") << ",";
ost << "\"bounded_erl\": "
- << (config.ep_strength.bounded_erl ? "true" : "false");
-
+ << (config.ep_strength.bounded_erl ? "true" : "false") << ",";
+ ost << "\"erle_onset_compensation_in_dominant_nearend\": "
+ << (config.ep_strength.erle_onset_compensation_in_dominant_nearend
+ ? "true"
+ : "false");
ost << "},";
ost << "\"echo_audibility\": {";
diff --git a/modules/audio_processing/aec3/aec_state.cc b/modules/audio_processing/aec3/aec_state.cc
index 15f3e17..21cad21 100644
--- a/modules/audio_processing/aec3/aec_state.cc
+++ b/modules/audio_processing/aec3/aec_state.cc
@@ -294,7 +294,9 @@
data_dumper_->DumpRaw("aec3_active_render", active_render);
data_dumper_->DumpRaw("aec3_erl", Erl());
data_dumper_->DumpRaw("aec3_erl_time_domain", ErlTimeDomain());
- data_dumper_->DumpRaw("aec3_erle", Erle()[0]);
+ data_dumper_->DumpRaw("aec3_erle", Erle(/*onset_compensated=*/false)[0]);
+ data_dumper_->DumpRaw("aec3_erle_onset_compensated",
+ Erle(/*onset_compensated=*/true)[0]);
data_dumper_->DumpRaw("aec3_usable_linear_estimate", UsableLinearEstimate());
data_dumper_->DumpRaw("aec3_transparent_mode", TransparentModeActive());
data_dumper_->DumpRaw("aec3_filter_delay",
diff --git a/modules/audio_processing/aec3/aec_state.h b/modules/audio_processing/aec3/aec_state.h
index 22b4fed..125ae83 100644
--- a/modules/audio_processing/aec3/aec_state.h
+++ b/modules/audio_processing/aec3/aec_state.h
@@ -70,8 +70,9 @@
}
// Returns the ERLE.
- rtc::ArrayView<const std::array<float, kFftLengthBy2Plus1>> Erle() const {
- return erle_estimator_.Erle();
+ rtc::ArrayView<const std::array<float, kFftLengthBy2Plus1>> Erle(
+ bool onset_compensated) const {
+ return erle_estimator_.Erle(onset_compensated);
}
// Returns the fullband ERLE estimate in log2 units.
diff --git a/modules/audio_processing/aec3/aec_state_unittest.cc b/modules/audio_processing/aec3/aec_state_unittest.cc
index c9db8bd..6e62a58 100644
--- a/modules/audio_processing/aec3/aec_state_unittest.cc
+++ b/modules/audio_processing/aec3/aec_state_unittest.cc
@@ -182,7 +182,7 @@
{
// Note that the render spectrum is built so it does not have energy in
// the odd bands but just in the even bands.
- const auto& erle = state.Erle()[0];
+ const auto& erle = state.Erle(/*onset_compensated=*/true)[0];
EXPECT_EQ(erle[0], erle[1]);
constexpr size_t kLowFrequencyLimit = 32;
for (size_t k = 2; k < kLowFrequencyLimit; k = k + 2) {
@@ -210,7 +210,7 @@
ASSERT_TRUE(state.UsableLinearEstimate());
{
- const auto& erle = state.Erle()[0];
+ const auto& erle = state.Erle(/*onset_compensated=*/true)[0];
EXPECT_EQ(erle[0], erle[1]);
constexpr size_t kLowFrequencyLimit = 32;
for (size_t k = 1; k < kLowFrequencyLimit; ++k) {
diff --git a/modules/audio_processing/aec3/echo_remover.cc b/modules/audio_processing/aec3/echo_remover.cc
index 1a83fef..6c177c9 100644
--- a/modules/audio_processing/aec3/echo_remover.cc
+++ b/modules/audio_processing/aec3/echo_remover.cc
@@ -406,6 +406,7 @@
if (capture_output_used_) {
// Estimate the residual echo power.
residual_echo_estimator_.Estimate(aec_state_, *render_buffer, S2_linear, Y2,
+ suppression_gain_.IsDominantNearend(),
R2);
// Suppressor nearend estimate.
diff --git a/modules/audio_processing/aec3/erle_estimator.cc b/modules/audio_processing/aec3/erle_estimator.cc
index 4d84345..0e3d715 100644
--- a/modules/audio_processing/aec3/erle_estimator.cc
+++ b/modules/audio_processing/aec3/erle_estimator.cc
@@ -52,8 +52,9 @@
rtc::ArrayView<const std::array<float, kFftLengthBy2Plus1>>
subtractor_spectra,
const std::vector<bool>& converged_filters) {
- RTC_DCHECK_EQ(subband_erle_estimator_.Erle().size(), capture_spectra.size());
- RTC_DCHECK_EQ(subband_erle_estimator_.Erle().size(),
+ RTC_DCHECK_EQ(subband_erle_estimator_.Erle(/*onset_compensated=*/true).size(),
+ capture_spectra.size());
+ RTC_DCHECK_EQ(subband_erle_estimator_.Erle(/*onset_compensated=*/true).size(),
subtractor_spectra.size());
const auto& X2_reverb = avg_render_spectrum_with_reverb;
const auto& Y2 = capture_spectra;
@@ -68,7 +69,9 @@
if (signal_dependent_erle_estimator_) {
signal_dependent_erle_estimator_->Update(
render_buffer, filter_frequency_responses, X2_reverb, Y2, E2,
- subband_erle_estimator_.Erle(), converged_filters);
+ subband_erle_estimator_.Erle(/*onset_compensated=*/false),
+ subband_erle_estimator_.Erle(/*onset_compensated=*/true),
+ converged_filters);
}
fullband_erle_estimator_.Update(X2_reverb, Y2, E2, converged_filters);
diff --git a/modules/audio_processing/aec3/erle_estimator.h b/modules/audio_processing/aec3/erle_estimator.h
index d741cff..cae896e 100644
--- a/modules/audio_processing/aec3/erle_estimator.h
+++ b/modules/audio_processing/aec3/erle_estimator.h
@@ -55,17 +55,18 @@
const std::vector<bool>& converged_filters);
// Returns the most recent subband ERLE estimates.
- rtc::ArrayView<const std::array<float, kFftLengthBy2Plus1>> Erle() const {
+ rtc::ArrayView<const std::array<float, kFftLengthBy2Plus1>> Erle(
+ bool onset_compensated) const {
return signal_dependent_erle_estimator_
- ? signal_dependent_erle_estimator_->Erle()
- : subband_erle_estimator_.Erle();
+ ? signal_dependent_erle_estimator_->Erle(onset_compensated)
+ : subband_erle_estimator_.Erle(onset_compensated);
}
// Returns the subband ERLE that are estimated during onsets (only used for
// testing).
- rtc::ArrayView<const std::array<float, kFftLengthBy2Plus1>> ErleOnsets()
+ rtc::ArrayView<const std::array<float, kFftLengthBy2Plus1>> ErleDuringOnsets()
const {
- return subband_erle_estimator_.ErleOnsets();
+ return subband_erle_estimator_.ErleDuringOnsets();
}
// Returns the fullband ERLE estimate.
diff --git a/modules/audio_processing/aec3/erle_estimator_unittest.cc b/modules/audio_processing/aec3/erle_estimator_unittest.cc
index 2a5a98d..6df7142 100644
--- a/modules/audio_processing/aec3/erle_estimator_unittest.cc
+++ b/modules/audio_processing/aec3/erle_estimator_unittest.cc
@@ -178,8 +178,9 @@
estimator.Update(*render_delay_buffer->GetRenderBuffer(),
filter_frequency_response, X2, Y2, E2, converged_filters);
}
- VerifyErle(estimator.Erle(), std::pow(2.f, estimator.FullbandErleLog2()),
- config.erle.max_l, config.erle.max_h);
+ VerifyErle(estimator.Erle(/*onset_compensated=*/true),
+ std::pow(2.f, estimator.FullbandErleLog2()), config.erle.max_l,
+ config.erle.max_h);
FormNearendFrame(&x, &X2, E2, Y2);
// Verifies that the ERLE is not immediately decreased during nearend
@@ -190,8 +191,9 @@
estimator.Update(*render_delay_buffer->GetRenderBuffer(),
filter_frequency_response, X2, Y2, E2, converged_filters);
}
- VerifyErle(estimator.Erle(), std::pow(2.f, estimator.FullbandErleLog2()),
- config.erle.max_l, config.erle.max_h);
+ VerifyErle(estimator.Erle(/*onset_compensated=*/true),
+ std::pow(2.f, estimator.FullbandErleLog2()), config.erle.max_l,
+ config.erle.max_h);
}
TEST_P(ErleEstimatorMultiChannel, VerifyErleTrackingOnOnsets) {
@@ -253,7 +255,8 @@
converged_filters);
}
}
- VerifyErleBands(estimator.ErleOnsets(), config.erle.min, config.erle.min);
+ VerifyErleBands(estimator.ErleDuringOnsets(), config.erle.min,
+ config.erle.min);
FormNearendFrame(&x, &X2, E2, Y2);
for (size_t k = 0; k < 1000; k++) {
estimator.Update(*render_delay_buffer->GetRenderBuffer(),
@@ -261,8 +264,9 @@
}
// Verifies that during ne activity, Erle converges to the Erle for
// onsets.
- VerifyErle(estimator.Erle(), std::pow(2.f, estimator.FullbandErleLog2()),
- config.erle.min, config.erle.min);
+ VerifyErle(estimator.Erle(/*onset_compensated=*/true),
+ std::pow(2.f, estimator.FullbandErleLog2()), config.erle.min,
+ config.erle.min);
}
} // namespace webrtc
diff --git a/modules/audio_processing/aec3/residual_echo_estimator.cc b/modules/audio_processing/aec3/residual_echo_estimator.cc
index 0567b54..0688429 100644
--- a/modules/audio_processing/aec3/residual_echo_estimator.cc
+++ b/modules/audio_processing/aec3/residual_echo_estimator.cc
@@ -45,6 +45,13 @@
return config.default_gain;
}
+bool UseErleOnsetCompensationInDominantNearend(
+ const EchoCanceller3Config::EpStrength& config) {
+ return config.erle_onset_compensation_in_dominant_nearend ||
+ field_trial::IsEnabled(
+ "WebRTC-Aec3UseErleOnsetCompensationInDominantNearend");
+}
+
// Computes the indexes that will be used for computing spectral power over
// the blocks surrounding the delay.
void GetRenderIndexesToAnalyze(
@@ -156,7 +163,9 @@
early_reflections_general_gain_(
GetEarlyReflectionsDefaultModeGain(config_.ep_strength)),
late_reflections_general_gain_(
- GetLateReflectionsDefaultModeGain(config_.ep_strength)) {
+ GetLateReflectionsDefaultModeGain(config_.ep_strength)),
+ erle_onset_compensation_in_dominant_nearend_(
+ UseErleOnsetCompensationInDominantNearend(config_.ep_strength)) {
Reset();
}
@@ -167,6 +176,7 @@
const RenderBuffer& render_buffer,
rtc::ArrayView<const std::array<float, kFftLengthBy2Plus1>> S2_linear,
rtc::ArrayView<const std::array<float, kFftLengthBy2Plus1>> Y2,
+ bool dominant_nearend,
rtc::ArrayView<std::array<float, kFftLengthBy2Plus1>> R2) {
RTC_DCHECK_EQ(R2.size(), Y2.size());
RTC_DCHECK_EQ(R2.size(), S2_linear.size());
@@ -185,7 +195,9 @@
std::copy(Y2[ch].begin(), Y2[ch].end(), R2[ch].begin());
}
} else {
- LinearEstimate(S2_linear, aec_state.Erle(), R2);
+ const bool onset_compensated =
+ erle_onset_compensation_in_dominant_nearend_ || !dominant_nearend;
+ LinearEstimate(S2_linear, aec_state.Erle(onset_compensated), R2);
}
AddReverb(ReverbType::kLinear, aec_state, render_buffer, R2);
diff --git a/modules/audio_processing/aec3/residual_echo_estimator.h b/modules/audio_processing/aec3/residual_echo_estimator.h
index 8fe7a84..9e97776 100644
--- a/modules/audio_processing/aec3/residual_echo_estimator.h
+++ b/modules/audio_processing/aec3/residual_echo_estimator.h
@@ -39,6 +39,7 @@
const RenderBuffer& render_buffer,
rtc::ArrayView<const std::array<float, kFftLengthBy2Plus1>> S2_linear,
rtc::ArrayView<const std::array<float, kFftLengthBy2Plus1>> Y2,
+ bool dominant_nearend,
rtc::ArrayView<std::array<float, kFftLengthBy2Plus1>> R2);
private:
@@ -68,6 +69,7 @@
const float late_reflections_transparent_mode_gain_;
const float early_reflections_general_gain_;
const float late_reflections_general_gain_;
+ const bool erle_onset_compensation_in_dominant_nearend_;
std::array<float, kFftLengthBy2Plus1> X2_noise_floor_;
std::array<int, kFftLengthBy2Plus1> X2_noise_floor_counter_;
ReverbModel echo_reverb_;
diff --git a/modules/audio_processing/aec3/residual_echo_estimator_unittest.cc b/modules/audio_processing/aec3/residual_echo_estimator_unittest.cc
index f184eb8..e80838b 100644
--- a/modules/audio_processing/aec3/residual_echo_estimator_unittest.cc
+++ b/modules/audio_processing/aec3/residual_echo_estimator_unittest.cc
@@ -100,7 +100,7 @@
output);
estimator.Estimate(aec_state, *render_delay_buffer->GetRenderBuffer(),
- S2_linear, Y2, R2);
+ S2_linear, Y2, /*dominant_nearend=*/false, R2);
}
}
diff --git a/modules/audio_processing/aec3/signal_dependent_erle_estimator.cc b/modules/audio_processing/aec3/signal_dependent_erle_estimator.cc
index 5a3ba6c..a5e7709 100644
--- a/modules/audio_processing/aec3/signal_dependent_erle_estimator.cc
+++ b/modules/audio_processing/aec3/signal_dependent_erle_estimator.cc
@@ -131,7 +131,9 @@
section_boundaries_blocks_(SetSectionsBoundaries(delay_headroom_blocks_,
num_blocks_,
num_sections_)),
+ use_onset_detection_(config.erle.onset_detection),
erle_(num_capture_channels),
+ erle_onset_compensated_(num_capture_channels),
S2_section_accum_(
num_capture_channels,
std::vector<std::array<float, kFftLengthBy2Plus1>>(num_sections_)),
@@ -154,6 +156,7 @@
void SignalDependentErleEstimator::Reset() {
for (size_t ch = 0; ch < erle_.size(); ++ch) {
erle_[ch].fill(min_erle_);
+ erle_onset_compensated_[ch].fill(min_erle_);
for (auto& erle_estimator : erle_estimators_[ch]) {
erle_estimator.fill(min_erle_);
}
@@ -180,6 +183,8 @@
rtc::ArrayView<const std::array<float, kFftLengthBy2Plus1>> Y2,
rtc::ArrayView<const std::array<float, kFftLengthBy2Plus1>> E2,
rtc::ArrayView<const std::array<float, kFftLengthBy2Plus1>> average_erle,
+ rtc::ArrayView<const std::array<float, kFftLengthBy2Plus1>>
+ average_erle_onset_compensated,
const std::vector<bool>& converged_filters) {
RTC_DCHECK_GT(num_sections_, 1);
@@ -202,6 +207,11 @@
[band_to_subband_[k]];
erle_[ch][k] = rtc::SafeClamp(average_erle[ch][k] * correction_factor,
min_erle_, max_erle_[band_to_subband_[k]]);
+ if (use_onset_detection_) {
+ erle_onset_compensated_[ch][k] = rtc::SafeClamp(
+ average_erle_onset_compensated[ch][k] * correction_factor,
+ min_erle_, max_erle_[band_to_subband_[k]]);
+ }
}
}
}
diff --git a/modules/audio_processing/aec3/signal_dependent_erle_estimator.h b/modules/audio_processing/aec3/signal_dependent_erle_estimator.h
index 498e922..6847c1a 100644
--- a/modules/audio_processing/aec3/signal_dependent_erle_estimator.h
+++ b/modules/audio_processing/aec3/signal_dependent_erle_estimator.h
@@ -37,8 +37,10 @@
void Reset();
// Returns the Erle per frequency subband.
- rtc::ArrayView<const std::array<float, kFftLengthBy2Plus1>> Erle() const {
- return erle_;
+ rtc::ArrayView<const std::array<float, kFftLengthBy2Plus1>> Erle(
+ bool onset_compensated) const {
+ return onset_compensated && use_onset_detection_ ? erle_onset_compensated_
+ : erle_;
}
// Updates the Erle estimate. The Erle that is passed as an input is required
@@ -51,6 +53,8 @@
rtc::ArrayView<const std::array<float, kFftLengthBy2Plus1>> Y2,
rtc::ArrayView<const std::array<float, kFftLengthBy2Plus1>> E2,
rtc::ArrayView<const std::array<float, kFftLengthBy2Plus1>> average_erle,
+ rtc::ArrayView<const std::array<float, kFftLengthBy2Plus1>>
+ average_erle_onset_compensated,
const std::vector<bool>& converged_filters);
void Dump(const std::unique_ptr<ApmDataDumper>& data_dumper) const;
@@ -83,7 +87,9 @@
const std::array<size_t, kFftLengthBy2Plus1> band_to_subband_;
const std::array<float, kSubbands> max_erle_;
const std::vector<size_t> section_boundaries_blocks_;
+ const bool use_onset_detection_;
std::vector<std::array<float, kFftLengthBy2Plus1>> erle_;
+ std::vector<std::array<float, kFftLengthBy2Plus1>> erle_onset_compensated_;
std::vector<std::vector<std::array<float, kFftLengthBy2Plus1>>>
S2_section_accum_;
std::vector<std::vector<std::array<float, kSubbands>>> erle_estimators_;
diff --git a/modules/audio_processing/aec3/signal_dependent_erle_estimator_unittest.cc b/modules/audio_processing/aec3/signal_dependent_erle_estimator_unittest.cc
index f8a4aec..58f56d8 100644
--- a/modules/audio_processing/aec3/signal_dependent_erle_estimator_unittest.cc
+++ b/modules/audio_processing/aec3/signal_dependent_erle_estimator_unittest.cc
@@ -172,7 +172,7 @@
for (size_t n = 0; n < 10; ++n) {
inputs.Update();
s.Update(inputs.GetRenderBuffer(), inputs.GetH2(), inputs.GetX2(),
- inputs.GetY2(), inputs.GetE2(), average_erle,
+ inputs.GetY2(), inputs.GetE2(), average_erle, average_erle,
inputs.GetConvergedFilters());
}
}
@@ -201,7 +201,7 @@
for (size_t n = 0; n < 200; ++n) {
inputs.Update();
s.Update(inputs.GetRenderBuffer(), inputs.GetH2(), inputs.GetX2(),
- inputs.GetY2(), inputs.GetE2(), average_erle,
+ inputs.GetY2(), inputs.GetE2(), average_erle, average_erle,
inputs.GetConvergedFilters());
}
}
diff --git a/modules/audio_processing/aec3/subband_erle_estimator.cc b/modules/audio_processing/aec3/subband_erle_estimator.cc
index 6c00091..1e957f2 100644
--- a/modules/audio_processing/aec3/subband_erle_estimator.cc
+++ b/modules/audio_processing/aec3/subband_erle_estimator.cc
@@ -48,7 +48,8 @@
use_min_erle_during_onsets_(EnableMinErleDuringOnsets()),
accum_spectra_(num_capture_channels),
erle_(num_capture_channels),
- erle_onsets_(num_capture_channels),
+ erle_onset_compensated_(num_capture_channels),
+ erle_during_onsets_(num_capture_channels),
coming_onset_(num_capture_channels),
hold_counters_(num_capture_channels) {
Reset();
@@ -57,11 +58,11 @@
SubbandErleEstimator::~SubbandErleEstimator() = default;
void SubbandErleEstimator::Reset() {
- for (auto& erle : erle_) {
- erle.fill(min_erle_);
- }
- for (size_t ch = 0; ch < erle_onsets_.size(); ++ch) {
- erle_onsets_[ch].fill(min_erle_);
+ const size_t num_capture_channels = erle_.size();
+ for (size_t ch = 0; ch < num_capture_channels; ++ch) {
+ erle_[ch].fill(min_erle_);
+ erle_onset_compensated_[ch].fill(min_erle_);
+ erle_during_onsets_[ch].fill(min_erle_);
coming_onset_[ch].fill(true);
hold_counters_[ch].fill(0);
}
@@ -80,15 +81,21 @@
DecreaseErlePerBandForLowRenderSignals();
}
- for (auto& erle : erle_) {
+ const size_t num_capture_channels = erle_.size();
+ for (size_t ch = 0; ch < num_capture_channels; ++ch) {
+ auto& erle = erle_[ch];
erle[0] = erle[1];
erle[kFftLengthBy2] = erle[kFftLengthBy2 - 1];
+
+ auto& erle_oc = erle_onset_compensated_[ch];
+ erle_oc[0] = erle_oc[1];
+ erle_oc[kFftLengthBy2] = erle_oc[kFftLengthBy2 - 1];
}
}
void SubbandErleEstimator::Dump(
const std::unique_ptr<ApmDataDumper>& data_dumper) const {
- data_dumper->DumpRaw("aec3_erle_onset", ErleOnsets()[0]);
+ data_dumper->DumpRaw("aec3_erle_onset", ErleDuringOnsets()[0]);
}
void SubbandErleEstimator::UpdateBands(
@@ -102,13 +109,16 @@
continue;
}
+ if (accum_spectra_.num_points[ch] != kPointsToAccumulate) {
+ continue;
+ }
+
std::array<float, kFftLengthBy2> new_erle;
std::array<bool, kFftLengthBy2> is_erle_updated;
is_erle_updated.fill(false);
for (size_t k = 1; k < kFftLengthBy2; ++k) {
- if (accum_spectra_.num_points[ch] == kPointsToAccumulate &&
- accum_spectra_.E2[ch][k] > 0.f) {
+ if (accum_spectra_.E2[ch][k] > 0.f) {
new_erle[k] = accum_spectra_.Y2[ch][k] / accum_spectra_.E2[ch][k];
is_erle_updated[k] = true;
}
@@ -120,10 +130,11 @@
if (coming_onset_[ch][k]) {
coming_onset_[ch][k] = false;
if (!use_min_erle_during_onsets_) {
- float alpha = new_erle[k] < erle_onsets_[ch][k] ? 0.3f : 0.15f;
- erle_onsets_[ch][k] = rtc::SafeClamp(
- erle_onsets_[ch][k] +
- alpha * (new_erle[k] - erle_onsets_[ch][k]),
+ float alpha =
+ new_erle[k] < erle_during_onsets_[ch][k] ? 0.3f : 0.15f;
+ erle_during_onsets_[ch][k] = rtc::SafeClamp(
+ erle_during_onsets_[ch][k] +
+ alpha * (new_erle[k] - erle_during_onsets_[ch][k]),
min_erle_, max_erle_[k]);
}
}
@@ -132,15 +143,26 @@
}
}
+ auto update_erle_band = [](float& erle, float new_erle,
+ bool low_render_energy, float min_erle,
+ float max_erle) {
+ float alpha = 0.05f;
+ if (new_erle < erle) {
+ alpha = low_render_energy ? 0.f : 0.1f;
+ }
+ erle =
+ rtc::SafeClamp(erle + alpha * (new_erle - erle), min_erle, max_erle);
+ };
+
for (size_t k = 1; k < kFftLengthBy2; ++k) {
if (is_erle_updated[k]) {
- float alpha = 0.05f;
- if (new_erle[k] < erle_[ch][k]) {
- alpha = accum_spectra_.low_render_energy[ch][k] ? 0.f : 0.1f;
+ const bool low_render_energy = accum_spectra_.low_render_energy[ch][k];
+ update_erle_band(erle_[ch][k], new_erle[k], low_render_energy,
+ min_erle_, max_erle_[k]);
+ if (use_onset_detection_) {
+ update_erle_band(erle_onset_compensated_[ch][k], new_erle[k],
+ low_render_energy, min_erle_, max_erle_[k]);
}
- erle_[ch][k] =
- rtc::SafeClamp(erle_[ch][k] + alpha * (new_erle[k] - erle_[ch][k]),
- min_erle_, max_erle_[k]);
}
}
}
@@ -153,9 +175,11 @@
--hold_counters_[ch][k];
if (hold_counters_[ch][k] <=
(kBlocksForOnsetDetection - kBlocksToHoldErle)) {
- if (erle_[ch][k] > erle_onsets_[ch][k]) {
- erle_[ch][k] = std::max(erle_onsets_[ch][k], 0.97f * erle_[ch][k]);
- RTC_DCHECK_LE(min_erle_, erle_[ch][k]);
+ if (erle_onset_compensated_[ch][k] > erle_during_onsets_[ch][k]) {
+ erle_onset_compensated_[ch][k] =
+ std::max(erle_during_onsets_[ch][k],
+ 0.97f * erle_onset_compensated_[ch][k]);
+ RTC_DCHECK_LE(min_erle_, erle_onset_compensated_[ch][k]);
}
if (hold_counters_[ch][k] <= 0) {
coming_onset_[ch][k] = true;
@@ -167,7 +191,7 @@
}
void SubbandErleEstimator::ResetAccumulatedSpectra() {
- for (size_t ch = 0; ch < erle_onsets_.size(); ++ch) {
+ for (size_t ch = 0; ch < erle_during_onsets_.size(); ++ch) {
accum_spectra_.Y2[ch].fill(0.f);
accum_spectra_.E2[ch].fill(0.f);
accum_spectra_.num_points[ch] = 0;
diff --git a/modules/audio_processing/aec3/subband_erle_estimator.h b/modules/audio_processing/aec3/subband_erle_estimator.h
index 90363e0..ffed6a5 100644
--- a/modules/audio_processing/aec3/subband_erle_estimator.h
+++ b/modules/audio_processing/aec3/subband_erle_estimator.h
@@ -41,14 +41,16 @@
const std::vector<bool>& converged_filters);
// Returns the ERLE estimate.
- rtc::ArrayView<const std::array<float, kFftLengthBy2Plus1>> Erle() const {
- return erle_;
+ rtc::ArrayView<const std::array<float, kFftLengthBy2Plus1>> Erle(
+ bool onset_compensated) const {
+ return onset_compensated && use_onset_detection_ ? erle_onset_compensated_
+ : erle_;
}
// Returns the ERLE estimate at onsets (only used for testing).
- rtc::ArrayView<const std::array<float, kFftLengthBy2Plus1>> ErleOnsets()
+ rtc::ArrayView<const std::array<float, kFftLengthBy2Plus1>> ErleDuringOnsets()
const {
- return erle_onsets_;
+ return erle_during_onsets_;
}
void Dump(const std::unique_ptr<ApmDataDumper>& data_dumper) const;
@@ -82,8 +84,12 @@
const std::array<float, kFftLengthBy2Plus1> max_erle_;
const bool use_min_erle_during_onsets_;
AccumulatedSpectra accum_spectra_;
+ // ERLE without special handling of render onsets.
std::vector<std::array<float, kFftLengthBy2Plus1>> erle_;
- std::vector<std::array<float, kFftLengthBy2Plus1>> erle_onsets_;
+ // ERLE lowered during render onsets.
+ std::vector<std::array<float, kFftLengthBy2Plus1>> erle_onset_compensated_;
+ // Estimation of ERLE during render onsets.
+ std::vector<std::array<float, kFftLengthBy2Plus1>> erle_during_onsets_;
std::vector<std::array<bool, kFftLengthBy2Plus1>> coming_onset_;
std::vector<std::array<int, kFftLengthBy2Plus1>> hold_counters_;
};
diff --git a/modules/audio_processing/aec3/suppression_gain.h b/modules/audio_processing/aec3/suppression_gain.h
index e7175c3..d049bae 100644
--- a/modules/audio_processing/aec3/suppression_gain.h
+++ b/modules/audio_processing/aec3/suppression_gain.h
@@ -51,6 +51,10 @@
float* high_bands_gain,
std::array<float, kFftLengthBy2Plus1>* low_band_gain);
+ bool IsDominantNearend() {
+ return dominant_nearend_detector_->IsNearendState();
+ }
+
// Toggles the usage of the initial state.
void SetInitialState(bool state);