Transparency improvements in the echo canceller 3
This CL adds two changes:
-Adaptive adjustment of the echo suppression to both cover the cases
when the echo path well covers the room, and when when it does not.
-Identification of the case when the echo is too low to be audible
and adaptive handling of this case in the echo suppression.
BUG=webrtc:7519, webrtc:7956,webrtc:7957
Review-Url: https://codereview.webrtc.org/2974583004
Cr-Commit-Position: refs/heads/master@{#18962}
diff --git a/webrtc/modules/audio_processing/aec3/adaptive_fir_filter.cc b/webrtc/modules/audio_processing/aec3/adaptive_fir_filter.cc
index b372df5..43cc901 100644
--- a/webrtc/modules/audio_processing/aec3/adaptive_fir_filter.cc
+++ b/webrtc/modules/audio_processing/aec3/adaptive_fir_filter.cc
@@ -25,22 +25,6 @@
namespace webrtc {
-namespace {
-
-// Constrains the a partiton of the frequency domain filter to be limited in
-// time via setting the relevant time-domain coefficients to zero.
-void Constrain(const Aec3Fft& fft, FftData* H) {
- std::array<float, kFftLength> h;
- fft.Ifft(*H, &h);
- constexpr float kScale = 1.0f / kFftLengthBy2;
- std::for_each(h.begin(), h.begin() + kFftLengthBy2,
- [kScale](float& a) { a *= kScale; });
- std::fill(h.begin() + kFftLengthBy2, h.end(), 0.f);
- fft.Fft(&h, H);
-}
-
-} // namespace
-
namespace aec3 {
// Computes and stores the frequency response of the filter.
@@ -434,6 +418,7 @@
H2_(size_partitions, std::array<float, kFftLengthBy2Plus1>()) {
RTC_DCHECK(data_dumper_);
+ h_.fill(0.f);
for (auto& H_j : H_) {
H_j.Clear();
}
@@ -446,6 +431,7 @@
AdaptiveFirFilter::~AdaptiveFirFilter() = default;
void AdaptiveFirFilter::HandleEchoPathChange() {
+ h_.fill(0.f);
for (auto& H_j : H_) {
H_j.Clear();
}
@@ -493,10 +479,7 @@
}
// Constrain the filter partitions in a cyclic manner.
- Constrain(fft_, &H_[partition_to_constrain_]);
- partition_to_constrain_ = partition_to_constrain_ < (H_.size() - 1)
- ? partition_to_constrain_ + 1
- : 0;
+ Constrain();
// Update the frequency response and echo return loss for the filter.
switch (optimization_) {
@@ -518,4 +501,25 @@
}
}
+// Constrains the a partiton of the frequency domain filter to be limited in
+// time via setting the relevant time-domain coefficients to zero.
+void AdaptiveFirFilter::Constrain() {
+ std::array<float, kFftLength> h;
+ fft_.Ifft(H_[partition_to_constrain_], &h);
+
+ constexpr float kScale = 1.0f / kFftLengthBy2;
+ std::for_each(h.begin(), h.begin() + kFftLengthBy2,
+ [kScale](float& a) { a *= kScale; });
+ std::fill(h.begin() + kFftLengthBy2, h.end(), 0.f);
+
+ std::copy(h.begin(), h.begin() + kFftLengthBy2,
+ h_.begin() + partition_to_constrain_ * kFftLengthBy2);
+
+ fft_.Fft(&h, &H_[partition_to_constrain_]);
+
+ partition_to_constrain_ = partition_to_constrain_ < (H_.size() - 1)
+ ? partition_to_constrain_ + 1
+ : 0;
+}
+
} // namespace webrtc
diff --git a/webrtc/modules/audio_processing/aec3/adaptive_fir_filter.h b/webrtc/modules/audio_processing/aec3/adaptive_fir_filter.h
index d77cbca..6fae158 100644
--- a/webrtc/modules/audio_processing/aec3/adaptive_fir_filter.h
+++ b/webrtc/modules/audio_processing/aec3/adaptive_fir_filter.h
@@ -119,6 +119,12 @@
return H2_;
}
+ // Returns the estimate of the impulse response.
+ const std::array<float, kAdaptiveFilterTimeDomainLength>&
+ FilterImpulseResponse() const {
+ return h_;
+ }
+
void DumpFilter(const char* name) {
for (auto& H : H_) {
data_dumper_->DumpRaw(name, H.re);
@@ -127,11 +133,15 @@
}
private:
+ // Constrain the filter partitions in a cyclic manner.
+ void Constrain();
+
ApmDataDumper* const data_dumper_;
const Aec3Fft fft_;
const Aec3Optimization optimization_;
std::vector<FftData> H_;
std::vector<std::array<float, kFftLengthBy2Plus1>> H2_;
+ std::array<float, kAdaptiveFilterTimeDomainLength> h_;
std::array<float, kFftLengthBy2Plus1> erl_;
size_t partition_to_constrain_ = 0;
diff --git a/webrtc/modules/audio_processing/aec3/adaptive_fir_filter_unittest.cc b/webrtc/modules/audio_processing/aec3/adaptive_fir_filter_unittest.cc
index 474174c..32b20a4 100644
--- a/webrtc/modules/audio_processing/aec3/adaptive_fir_filter_unittest.cc
+++ b/webrtc/modules/audio_processing/aec3/adaptive_fir_filter_unittest.cc
@@ -308,7 +308,8 @@
AecState aec_state(0.f);
RenderSignalAnalyzer render_signal_analyzer;
std::vector<float> e(kBlockSize, 0.f);
- std::array<float, kFftLength> s;
+ std::array<float, kFftLength> s_scratch;
+ std::array<float, kBlockSize> s;
FftData S;
FftData G;
FftData E;
@@ -348,20 +349,24 @@
render_signal_analyzer.Update(render_buffer, aec_state.FilterDelay());
filter.Filter(render_buffer, &S);
- fft.Ifft(S, &s);
- std::transform(y.begin(), y.end(), s.begin() + kFftLengthBy2, e.begin(),
+ fft.Ifft(S, &s_scratch);
+ std::transform(y.begin(), y.end(), s_scratch.begin() + kFftLengthBy2,
+ e.begin(),
[&](float a, float b) { return a - b * kScale; });
std::for_each(e.begin(), e.end(),
[](float& a) { a = rtc::SafeClamp(a, -32768.f, 32767.f); });
fft.ZeroPaddedFft(e, &E);
+ for (size_t k = 0; k < kBlockSize; ++k) {
+ s[k] = kScale * s_scratch[k + kFftLengthBy2];
+ }
gain.Compute(render_buffer, render_signal_analyzer, E,
filter.SizePartitions(), false, &G);
filter.Adapt(render_buffer, G);
aec_state.HandleEchoPathChange(EchoPathVariability(false, false));
aec_state.Update(filter.FilterFrequencyResponse(),
- rtc::Optional<size_t>(), render_buffer, E2_main, Y2,
- x[0], false);
+ filter.FilterImpulseResponse(), rtc::Optional<size_t>(),
+ render_buffer, E2_main, Y2, x[0], s, false);
}
// Verify that the filter is able to perform well.
EXPECT_LT(1000 * std::inner_product(e.begin(), e.end(), e.begin(), 0.f),
diff --git a/webrtc/modules/audio_processing/aec3/aec3_common.h b/webrtc/modules/audio_processing/aec3/aec3_common.h
index 04b86e9..e6cabb4 100644
--- a/webrtc/modules/audio_processing/aec3/aec3_common.h
+++ b/webrtc/modules/audio_processing/aec3/aec3_common.h
@@ -33,14 +33,16 @@
constexpr int kMetricsCollectionBlocks =
kMetricsReportingIntervalBlocks - kMetricsComputationBlocks;
-constexpr int kAdaptiveFilterLength = 12;
-constexpr int kResidualEchoPowerRenderWindowSize = 30;
-
constexpr size_t kFftLengthBy2 = 64;
constexpr size_t kFftLengthBy2Plus1 = kFftLengthBy2 + 1;
constexpr size_t kFftLengthBy2Minus1 = kFftLengthBy2 - 1;
constexpr size_t kFftLength = 2 * kFftLengthBy2;
+constexpr int kAdaptiveFilterLength = 12;
+constexpr int kResidualEchoPowerRenderWindowSize = 30;
+constexpr int kAdaptiveFilterTimeDomainLength =
+ kAdaptiveFilterLength * kFftLengthBy2;
+
constexpr size_t kMaxNumBands = 3;
constexpr size_t kSubFrameLength = 80;
diff --git a/webrtc/modules/audio_processing/aec3/aec_state.cc b/webrtc/modules/audio_processing/aec3/aec_state.cc
index 3840ef9..aa389c8 100644
--- a/webrtc/modules/audio_processing/aec3/aec_state.cc
+++ b/webrtc/modules/audio_processing/aec3/aec_state.cc
@@ -78,11 +78,11 @@
int AecState::instance_count_ = 0;
-AecState::AecState(float echo_decay)
+AecState::AecState(float reverb_decay)
: data_dumper_(
new ApmDataDumper(rtc::AtomicOps::Increment(&instance_count_))),
echo_path_change_counter_(kEchoPathChangeCounterInitial),
- echo_decay_factor_(echo_decay) {}
+ reverb_decay_(reverb_decay) {}
AecState::~AecState() = default;
@@ -111,12 +111,18 @@
void AecState::Update(const std::vector<std::array<float, kFftLengthBy2Plus1>>&
adaptive_filter_frequency_response,
+ const std::array<float, kAdaptiveFilterTimeDomainLength>&
+ adaptive_filter_impulse_response,
const rtc::Optional<size_t>& external_delay_samples,
const RenderBuffer& render_buffer,
const std::array<float, kFftLengthBy2Plus1>& E2_main,
const std::array<float, kFftLengthBy2Plus1>& Y2,
rtc::ArrayView<const float> x,
+ const std::array<float, kBlockSize>& s,
bool echo_leakage_detected) {
+ // Update the echo audibility evaluator.
+ echo_audibility_.Update(x, s);
+
// Store input parameters.
echo_leakage_detected_ = echo_leakage_detected;
@@ -179,6 +185,126 @@
!external_delay_ && !filter_delay_ &&
(!render_received_ ||
blocks_with_filter_adaptation_ >= kEchoPathChangeConvergenceBlocks);
+
+ // Update the room reverb estimate.
+ UpdateReverb(adaptive_filter_impulse_response);
+}
+
+void AecState::UpdateReverb(
+ const std::array<float, kAdaptiveFilterTimeDomainLength>&
+ impulse_response) {
+ if ((!(filter_delay_ && usable_linear_estimate_)) ||
+ (*filter_delay_ > kAdaptiveFilterLength - 4)) {
+ return;
+ }
+
+ // Form the data to match against by squaring the impulse response
+ // coefficients.
+ std::array<float, kAdaptiveFilterTimeDomainLength> matching_data;
+ std::transform(impulse_response.begin(), impulse_response.end(),
+ matching_data.begin(), [](float a) { return a * a; });
+
+ // Avoid matching against noise in the model by subtracting an estimate of the
+ // model noise power.
+ constexpr size_t kTailLength = 64;
+ constexpr size_t tail_index = kAdaptiveFilterTimeDomainLength - kTailLength;
+ const float tail_power = *std::max_element(matching_data.begin() + tail_index,
+ matching_data.end());
+ std::for_each(matching_data.begin(), matching_data.begin() + tail_index,
+ [tail_power](float& a) { a = std::max(0.f, a - tail_power); });
+
+ // Identify the peak index of the impulse response.
+ const size_t peak_index = *std::max_element(
+ matching_data.begin(), matching_data.begin() + tail_index);
+
+ if (peak_index + 128 < tail_index) {
+ size_t start_index = peak_index + 64;
+ // Compute the matching residual error for the current candidate to match.
+ float residual_sqr_sum = 0.f;
+ float d_k = reverb_decay_to_test_;
+ for (size_t k = start_index; k < tail_index; ++k) {
+ if (matching_data[start_index + 1] == 0.f) {
+ break;
+ }
+
+ float residual = matching_data[k] - matching_data[peak_index] * d_k;
+ residual_sqr_sum += residual * residual;
+ d_k *= reverb_decay_to_test_;
+ }
+
+ // If needed, update the best candidate for the reverb decay.
+ if (reverb_decay_candidate_residual_ < 0.f ||
+ residual_sqr_sum < reverb_decay_candidate_residual_) {
+ reverb_decay_candidate_residual_ = residual_sqr_sum;
+ reverb_decay_candidate_ = reverb_decay_to_test_;
+ }
+ }
+
+ // Compute the next reverb candidate to evaluate such that all candidates will
+ // be evaluated within one second.
+ reverb_decay_to_test_ += (0.9965f - 0.9f) / (5 * kNumBlocksPerSecond);
+
+ // If all reverb candidates have been evaluated, choose the best one as the
+ // reverb decay.
+ if (reverb_decay_to_test_ >= 0.9965f) {
+ if (reverb_decay_candidate_residual_ < 0.f) {
+ // Transform the decay to be in the unit of blocks.
+ reverb_decay_ = powf(reverb_decay_candidate_, kFftLengthBy2);
+
+ // Limit the estimated reverb_decay_ to the maximum one needed in practice
+ // to minimize the impact of incorrect estimates.
+ reverb_decay_ = std::min(0.8f, reverb_decay_);
+ }
+ reverb_decay_to_test_ = 0.9f;
+ reverb_decay_candidate_residual_ = -1.f;
+ }
+
+ // For noisy impulse responses, assume a fixed tail length.
+ if (tail_power > 0.0005f) {
+ reverb_decay_ = 0.7f;
+ }
+ data_dumper_->DumpRaw("aec3_reverb_decay", reverb_decay_);
+ data_dumper_->DumpRaw("aec3_tail_power", tail_power);
+}
+
+void AecState::EchoAudibility::Update(rtc::ArrayView<const float> x,
+ const std::array<float, kBlockSize>& s) {
+ auto result_x = std::minmax_element(x.begin(), x.end());
+ auto result_s = std::minmax_element(s.begin(), s.end());
+ const float x_abs =
+ std::max(std::abs(*result_x.first), std::abs(*result_x.second));
+ const float s_abs =
+ std::max(std::abs(*result_s.first), std::abs(*result_s.second));
+
+ if (x_abs < 5.f) {
+ ++low_farend_counter_;
+ } else {
+ low_farend_counter_ = 0;
+ }
+
+ // The echo is deemed as not audible if the echo estimate is on the level of
+ // the quantization noise in the FFTs and the nearend level is sufficiently
+ // strong to mask that by ensuring that the playout and AGC gains do not boost
+ // any residual echo that is below the quantization noise level. Furthermore,
+ // cases where the render signal is very close to zero are also identified as
+ // not producing audible echo.
+ inaudible_echo_ = max_nearend_ > 500 && s_abs < 30.f;
+ inaudible_echo_ = inaudible_echo_ || low_farend_counter_ > 20;
+}
+
+void AecState::EchoAudibility::UpdateWithOutput(rtc::ArrayView<const float> e) {
+ const float e_max = *std::max_element(e.begin(), e.end());
+ const float e_min = *std::min_element(e.begin(), e.end());
+ const float e_abs = std::max(std::abs(e_max), std::abs(e_min));
+
+ if (max_nearend_ < e_abs) {
+ max_nearend_ = e_abs;
+ max_nearend_counter_ = 0;
+ } else {
+ if (++max_nearend_counter_ > 5 * kNumBlocksPerSecond) {
+ max_nearend_ *= 0.995f;
+ }
+ }
}
} // namespace webrtc
diff --git a/webrtc/modules/audio_processing/aec3/aec_state.h b/webrtc/modules/audio_processing/aec3/aec_state.h
index 1b00bf5..5192a92 100644
--- a/webrtc/modules/audio_processing/aec3/aec_state.h
+++ b/webrtc/modules/audio_processing/aec3/aec_state.h
@@ -31,7 +31,7 @@
// Handles the state and the conditions for the echo removal functionality.
class AecState {
public:
- explicit AecState(float echo_decay);
+ explicit AecState(float reverb_decay);
~AecState();
// Returns whether the linear filter estimate is usable.
@@ -78,23 +78,50 @@
void HandleEchoPathChange(const EchoPathVariability& echo_path_variability);
// Returns the decay factor for the echo reverberation.
- // TODO(peah): Make this adaptive.
- float ReverbDecayFactor() const { return echo_decay_factor_; }
+ float ReverbDecay() const { return reverb_decay_; }
// Returns whether the echo suppression gain should be forced to zero.
bool ForcedZeroGain() const { return force_zero_gain_; }
+ // Returns whether the echo in the capture signal is audible.
+ bool InaudibleEcho() const { return echo_audibility_.InaudibleEcho(); }
+
+ // Updates the aec state with the AEC output signal.
+ void UpdateWithOutput(rtc::ArrayView<const float> e) {
+ echo_audibility_.UpdateWithOutput(e);
+ }
+
// Updates the aec state.
void Update(const std::vector<std::array<float, kFftLengthBy2Plus1>>&
adaptive_filter_frequency_response,
+ const std::array<float, kAdaptiveFilterTimeDomainLength>&
+ adaptive_filter_impulse_response,
const rtc::Optional<size_t>& external_delay_samples,
const RenderBuffer& render_buffer,
const std::array<float, kFftLengthBy2Plus1>& E2_main,
const std::array<float, kFftLengthBy2Plus1>& Y2,
rtc::ArrayView<const float> x,
+ const std::array<float, kBlockSize>& s_main,
bool echo_leakage_detected);
private:
+ class EchoAudibility {
+ public:
+ void Update(rtc::ArrayView<const float> x,
+ const std::array<float, kBlockSize>& s);
+ void UpdateWithOutput(rtc::ArrayView<const float> e);
+ bool InaudibleEcho() const { return inaudible_echo_; }
+
+ private:
+ float max_nearend_ = 0.f;
+ size_t max_nearend_counter_ = 0;
+ size_t low_farend_counter_ = 0;
+ bool inaudible_echo_ = false;
+ };
+
+ void UpdateReverb(const std::array<float, kAdaptiveFilterTimeDomainLength>&
+ impulse_response);
+
static int instance_count_;
std::unique_ptr<ApmDataDumper> data_dumper_;
ErlEstimator erl_estimator_;
@@ -113,7 +140,12 @@
rtc::Optional<size_t> filter_delay_;
rtc::Optional<size_t> external_delay_;
size_t blocks_since_last_saturation_ = 1000;
- const float echo_decay_factor_;
+ float reverb_decay_;
+ float reverb_decay_to_test_ = 0.9f;
+ float reverb_decay_candidate_ = 0.f;
+ float reverb_decay_candidate_residual_ = -1.f;
+ EchoAudibility echo_audibility_;
+
RTC_DISALLOW_IMPLICIT_CONSTRUCTORS(AecState);
};
diff --git a/webrtc/modules/audio_processing/aec3/aec_state_unittest.cc b/webrtc/modules/audio_processing/aec3/aec_state_unittest.cc
index 682126e..7062d24 100644
--- a/webrtc/modules/audio_processing/aec3/aec_state_unittest.cc
+++ b/webrtc/modules/audio_processing/aec3/aec_state_unittest.cc
@@ -25,6 +25,8 @@
std::array<float, kFftLengthBy2Plus1> Y2 = {};
std::vector<std::vector<float>> x(3, std::vector<float>(kBlockSize, 0.f));
EchoPathVariability echo_path_variability(false, false);
+ std::array<float, kBlockSize> s;
+ s.fill(100.f);
std::vector<std::array<float, kFftLengthBy2Plus1>>
converged_filter_frequency_response(10);
@@ -36,47 +38,57 @@
converged_filter_frequency_response[2].fill(100.f);
converged_filter_frequency_response[2][0] = 1.f;
+ std::array<float, kAdaptiveFilterTimeDomainLength> impulse_response;
+ impulse_response.fill(0.f);
+
// Verify that linear AEC usability is false when the filter is diverged and
// there is no external delay reported.
- state.Update(diverged_filter_frequency_response, rtc::Optional<size_t>(),
- render_buffer, E2_main, Y2, x[0], false);
+ state.Update(diverged_filter_frequency_response, impulse_response,
+ rtc::Optional<size_t>(), render_buffer, E2_main, Y2, x[0], s,
+ false);
EXPECT_FALSE(state.UsableLinearEstimate());
// Verify that linear AEC usability is true when the filter is converged
std::fill(x[0].begin(), x[0].end(), 101.f);
for (int k = 0; k < 3000; ++k) {
- state.Update(converged_filter_frequency_response, rtc::Optional<size_t>(2),
- render_buffer, E2_main, Y2, x[0], false);
+ state.Update(converged_filter_frequency_response, impulse_response,
+ rtc::Optional<size_t>(2), render_buffer, E2_main, Y2, x[0], s,
+ false);
}
EXPECT_TRUE(state.UsableLinearEstimate());
// Verify that linear AEC usability becomes false after an echo path change is
// reported
state.HandleEchoPathChange(EchoPathVariability(true, false));
- state.Update(converged_filter_frequency_response, rtc::Optional<size_t>(2),
- render_buffer, E2_main, Y2, x[0], false);
+ state.Update(converged_filter_frequency_response, impulse_response,
+ rtc::Optional<size_t>(2), render_buffer, E2_main, Y2, x[0], s,
+ false);
EXPECT_FALSE(state.UsableLinearEstimate());
// Verify that the active render detection works as intended.
std::fill(x[0].begin(), x[0].end(), 101.f);
state.HandleEchoPathChange(EchoPathVariability(true, true));
- state.Update(converged_filter_frequency_response, rtc::Optional<size_t>(2),
- render_buffer, E2_main, Y2, x[0], false);
+ state.Update(converged_filter_frequency_response, impulse_response,
+ rtc::Optional<size_t>(2), render_buffer, E2_main, Y2, x[0], s,
+ false);
EXPECT_FALSE(state.ActiveRender());
for (int k = 0; k < 1000; ++k) {
- state.Update(converged_filter_frequency_response, rtc::Optional<size_t>(2),
- render_buffer, E2_main, Y2, x[0], false);
+ state.Update(converged_filter_frequency_response, impulse_response,
+ rtc::Optional<size_t>(2), render_buffer, E2_main, Y2, x[0], s,
+ false);
}
EXPECT_TRUE(state.ActiveRender());
// Verify that echo leakage is properly reported.
- state.Update(converged_filter_frequency_response, rtc::Optional<size_t>(2),
- render_buffer, E2_main, Y2, x[0], false);
+ state.Update(converged_filter_frequency_response, impulse_response,
+ rtc::Optional<size_t>(2), render_buffer, E2_main, Y2, x[0], s,
+ false);
EXPECT_FALSE(state.EchoLeakageDetected());
- state.Update(converged_filter_frequency_response, rtc::Optional<size_t>(2),
- render_buffer, E2_main, Y2, x[0], true);
+ state.Update(converged_filter_frequency_response, impulse_response,
+ rtc::Optional<size_t>(2), render_buffer, E2_main, Y2, x[0], s,
+ true);
EXPECT_TRUE(state.EchoLeakageDetected());
// Verify that the ERL is properly estimated
@@ -91,8 +103,9 @@
Y2.fill(10.f * 10000.f * 10000.f);
for (size_t k = 0; k < 1000; ++k) {
- state.Update(converged_filter_frequency_response, rtc::Optional<size_t>(2),
- render_buffer, E2_main, Y2, x[0], false);
+ state.Update(converged_filter_frequency_response, impulse_response,
+ rtc::Optional<size_t>(2), render_buffer, E2_main, Y2, x[0], s,
+ false);
}
ASSERT_TRUE(state.UsableLinearEstimate());
@@ -107,8 +120,9 @@
E2_main.fill(1.f * 10000.f * 10000.f);
Y2.fill(10.f * E2_main[0]);
for (size_t k = 0; k < 1000; ++k) {
- state.Update(converged_filter_frequency_response, rtc::Optional<size_t>(2),
- render_buffer, E2_main, Y2, x[0], false);
+ state.Update(converged_filter_frequency_response, impulse_response,
+ rtc::Optional<size_t>(2), render_buffer, E2_main, Y2, x[0], s,
+ false);
}
ASSERT_TRUE(state.UsableLinearEstimate());
{
@@ -127,8 +141,9 @@
E2_main.fill(1.f * 10000.f * 10000.f);
Y2.fill(5.f * E2_main[0]);
for (size_t k = 0; k < 1000; ++k) {
- state.Update(converged_filter_frequency_response, rtc::Optional<size_t>(2),
- render_buffer, E2_main, Y2, x[0], false);
+ state.Update(converged_filter_frequency_response, impulse_response,
+ rtc::Optional<size_t>(2), render_buffer, E2_main, Y2, x[0], s,
+ false);
}
ASSERT_TRUE(state.UsableLinearEstimate());
@@ -155,6 +170,8 @@
std::array<float, kFftLengthBy2Plus1> Y2;
std::array<float, kBlockSize> x;
EchoPathVariability echo_path_variability(false, false);
+ std::array<float, kBlockSize> s;
+ s.fill(100.f);
x.fill(0.f);
std::vector<std::array<float, kFftLengthBy2Plus1>> frequency_response(30);
@@ -162,10 +179,13 @@
v.fill(0.01f);
}
+ std::array<float, kAdaptiveFilterTimeDomainLength> impulse_response;
+ impulse_response.fill(0.f);
+
// Verify that a non-significant filter delay is identified correctly.
state.HandleEchoPathChange(echo_path_variability);
- state.Update(frequency_response, rtc::Optional<size_t>(), render_buffer,
- E2_main, Y2, x, false);
+ state.Update(frequency_response, impulse_response, rtc::Optional<size_t>(),
+ render_buffer, E2_main, Y2, x, s, false);
EXPECT_FALSE(state.FilterDelay());
}
@@ -179,11 +199,16 @@
std::array<float, kFftLengthBy2Plus1> Y2;
std::array<float, kBlockSize> x;
EchoPathVariability echo_path_variability(false, false);
+ std::array<float, kBlockSize> s;
+ s.fill(100.f);
x.fill(0.f);
std::vector<std::array<float, kFftLengthBy2Plus1>> frequency_response(
kFilterLength);
+ std::array<float, kAdaptiveFilterTimeDomainLength> impulse_response;
+ impulse_response.fill(0.f);
+
// Verify that the filter delay for a converged filter is properly identified.
for (int k = 0; k < kFilterLength; ++k) {
for (auto& v : frequency_response) {
@@ -192,8 +217,8 @@
frequency_response[k].fill(100.f);
frequency_response[k][0] = 0.f;
state.HandleEchoPathChange(echo_path_variability);
- state.Update(frequency_response, rtc::Optional<size_t>(), render_buffer,
- E2_main, Y2, x, false);
+ state.Update(frequency_response, impulse_response, rtc::Optional<size_t>(),
+ render_buffer, E2_main, Y2, x, s, false);
EXPECT_TRUE(k == (kFilterLength - 1) || state.FilterDelay());
if (k != (kFilterLength - 1)) {
EXPECT_EQ(k, state.FilterDelay());
@@ -208,6 +233,8 @@
std::array<float, kFftLengthBy2Plus1> E2_shadow;
std::array<float, kFftLengthBy2Plus1> Y2;
std::array<float, kBlockSize> x;
+ std::array<float, kBlockSize> s;
+ s.fill(100.f);
E2_main.fill(0.f);
E2_shadow.fill(0.f);
Y2.fill(0.f);
@@ -219,10 +246,14 @@
v.fill(0.01f);
}
+ std::array<float, kAdaptiveFilterTimeDomainLength> impulse_response;
+ impulse_response.fill(0.f);
+
for (size_t k = 0; k < frequency_response.size() - 1; ++k) {
state.HandleEchoPathChange(EchoPathVariability(false, false));
- state.Update(frequency_response, rtc::Optional<size_t>(k * kBlockSize + 5),
- render_buffer, E2_main, Y2, x, false);
+ state.Update(frequency_response, impulse_response,
+ rtc::Optional<size_t>(k * kBlockSize + 5), render_buffer,
+ E2_main, Y2, x, s, false);
EXPECT_TRUE(state.ExternalDelay());
EXPECT_EQ(k, state.ExternalDelay());
}
@@ -230,8 +261,8 @@
// Verify that the externally reported delay is properly unset when it is no
// longer present.
state.HandleEchoPathChange(EchoPathVariability(false, false));
- state.Update(frequency_response, rtc::Optional<size_t>(), render_buffer,
- E2_main, Y2, x, false);
+ state.Update(frequency_response, impulse_response, rtc::Optional<size_t>(),
+ render_buffer, E2_main, Y2, x, s, false);
EXPECT_FALSE(state.ExternalDelay());
}
diff --git a/webrtc/modules/audio_processing/aec3/echo_remover.cc b/webrtc/modules/audio_processing/aec3/echo_remover.cc
index 842c385..64ffbad 100644
--- a/webrtc/modules/audio_processing/aec3/echo_remover.cc
+++ b/webrtc/modules/audio_processing/aec3/echo_remover.cc
@@ -131,6 +131,8 @@
LowestBandRate(sample_rate_hz_), 1);
data_dumper_->DumpWav("aec3_echo_remover_render_input", kBlockSize, &x0[0],
LowestBandRate(sample_rate_hz_), 1);
+ data_dumper_->DumpRaw("aec3_echo_remover_capture_input", y0);
+ data_dumper_->DumpRaw("aec3_echo_remover_render_input", x0);
aec_state_.UpdateCaptureSaturation(capture_signal_saturation);
@@ -167,13 +169,15 @@
// Update the AEC state information.
aec_state_.Update(subtractor_.FilterFrequencyResponse(),
+ subtractor_.FilterImpulseResponse(),
echo_path_delay_samples, render_buffer, E2_main, Y2, x0,
- echo_leakage_detected_);
+ subtractor_output.s_main, echo_leakage_detected_);
// Choose the linear output.
output_selector_.FormLinearOutput(!aec_state_.HeadsetDetected(), e_main, y0);
data_dumper_->DumpWav("aec3_output_linear", kBlockSize, &y0[0],
LowestBandRate(sample_rate_hz_), 1);
+ data_dumper_->DumpRaw("aec3_output_linear", y0);
const auto& E2 = output_selector_.UseSubtractorOutput() ? E2_main : Y2;
// Estimate the residual echo power.
@@ -194,7 +198,14 @@
// Update the metrics.
metrics_.Update(aec_state_, cng_.NoiseSpectrum(), G);
+ // Update the aec state with the aec output characteristics.
+ aec_state_.UpdateWithOutput(y0);
+
// Debug outputs for the purpose of development and analysis.
+ data_dumper_->DumpWav("aec3_echo_estimate", kBlockSize,
+ &subtractor_output.s_main[0],
+ LowestBandRate(sample_rate_hz_), 1);
+ data_dumper_->DumpRaw("aec3_output", y0);
data_dumper_->DumpRaw("aec3_N2", cng_.NoiseSpectrum());
data_dumper_->DumpRaw("aec3_suppressor_gain", G);
data_dumper_->DumpWav("aec3_output",
diff --git a/webrtc/modules/audio_processing/aec3/main_filter_update_gain_unittest.cc b/webrtc/modules/audio_processing/aec3/main_filter_update_gain_unittest.cc
index fc33e12..6e8a80b 100644
--- a/webrtc/modules/audio_processing/aec3/main_filter_update_gain_unittest.cc
+++ b/webrtc/modules/audio_processing/aec3/main_filter_update_gain_unittest.cc
@@ -55,7 +55,8 @@
std::vector<float> y(kBlockSize, 0.f);
AecState aec_state(0.f);
RenderSignalAnalyzer render_signal_analyzer;
- std::array<float, kFftLength> s;
+ std::array<float, kFftLength> s_scratch;
+ std::array<float, kBlockSize> s;
FftData S;
FftData G;
SubtractorOutput output;
@@ -96,18 +97,21 @@
// Apply the main filter.
main_filter.Filter(render_buffer, &S);
- fft.Ifft(S, &s);
- std::transform(y.begin(), y.end(), s.begin() + kFftLengthBy2,
+ fft.Ifft(S, &s_scratch);
+ std::transform(y.begin(), y.end(), s_scratch.begin() + kFftLengthBy2,
e_main.begin(),
[&](float a, float b) { return a - b * kScale; });
std::for_each(e_main.begin(), e_main.end(),
[](float& a) { a = rtc::SafeClamp(a, -32768.f, 32767.f); });
fft.ZeroPaddedFft(e_main, &E_main);
+ for (size_t k = 0; k < kBlockSize; ++k) {
+ s[k] = kScale * s_scratch[k + kFftLengthBy2];
+ }
// Apply the shadow filter.
shadow_filter.Filter(render_buffer, &S);
- fft.Ifft(S, &s);
- std::transform(y.begin(), y.end(), s.begin() + kFftLengthBy2,
+ fft.Ifft(S, &s_scratch);
+ std::transform(y.begin(), y.end(), s_scratch.begin() + kFftLengthBy2,
e_shadow.begin(),
[&](float a, float b) { return a - b * kScale; });
std::for_each(e_shadow.begin(), e_shadow.end(),
@@ -131,8 +135,9 @@
// Update the delay.
aec_state.HandleEchoPathChange(EchoPathVariability(false, false));
aec_state.Update(main_filter.FilterFrequencyResponse(),
+ main_filter.FilterImpulseResponse(),
rtc::Optional<size_t>(), render_buffer, E2_main, Y2, x[0],
- false);
+ s, false);
}
std::copy(e_main.begin(), e_main.end(), e_last_block->begin());
diff --git a/webrtc/modules/audio_processing/aec3/residual_echo_estimator.cc b/webrtc/modules/audio_processing/aec3/residual_echo_estimator.cc
index 6ec00e4..d17afa6 100644
--- a/webrtc/modules/audio_processing/aec3/residual_echo_estimator.cc
+++ b/webrtc/modules/audio_processing/aec3/residual_echo_estimator.cc
@@ -111,7 +111,7 @@
const int filter_delay = *aec_state.FilterDelay();
LinearEstimate(S2_linear, aec_state.Erle(), filter_delay, R2);
AddEchoReverb(S2_linear, aec_state.SaturatedEcho(), filter_delay,
- aec_state.ReverbDecayFactor(), R2);
+ aec_state.ReverbDecay(), R2);
} else {
// Estimate the echo generating signal power.
std::array<float, kFftLengthBy2Plus1> X2;
@@ -142,7 +142,12 @@
AddEchoReverb(*R2, aec_state.SaturatedEcho(),
std::min(static_cast<size_t>(kAdaptiveFilterLength),
delay.value_or(kAdaptiveFilterLength)),
- aec_state.ReverbDecayFactor(), R2);
+ aec_state.ReverbDecay(), R2);
+ }
+
+ // If the echo is deemed inaudible, set the residual echo to zero.
+ if (aec_state.InaudibleEcho()) {
+ R2->fill(0.f);
}
// If the echo is saturated, estimate the echo power as the maximum echo power
diff --git a/webrtc/modules/audio_processing/aec3/residual_echo_estimator_unittest.cc b/webrtc/modules/audio_processing/aec3/residual_echo_estimator_unittest.cc
index b28cf51..b448c4d 100644
--- a/webrtc/modules/audio_processing/aec3/residual_echo_estimator_unittest.cc
+++ b/webrtc/modules/audio_processing/aec3/residual_echo_estimator_unittest.cc
@@ -52,6 +52,7 @@
Random random_generator(42U);
FftData X;
std::array<float, kBlockSize> x_old;
+ std::array<float, kBlockSize> s;
Aec3Fft fft;
for (auto& H2_k : H2) {
@@ -60,6 +61,11 @@
H2[2].fill(10.f);
H2[2][0] = 0.1f;
+ std::array<float, kAdaptiveFilterTimeDomainLength> h;
+ h.fill(0.f);
+
+ s.fill(100.f);
+
constexpr float kLevel = 10.f;
E2_shadow.fill(kLevel);
E2_main.fill(kLevel);
@@ -74,8 +80,8 @@
render_buffer.Insert(x);
aec_state.HandleEchoPathChange(echo_path_variability);
- aec_state.Update(H2, rtc::Optional<size_t>(2), render_buffer, E2_main, Y2,
- x[0], false);
+ aec_state.Update(H2, h, rtc::Optional<size_t>(2), render_buffer, E2_main,
+ Y2, x[0], s, false);
estimator.Estimate(true, aec_state, render_buffer, S2_linear, Y2, &R2);
}
diff --git a/webrtc/modules/audio_processing/aec3/subtractor.cc b/webrtc/modules/audio_processing/aec3/subtractor.cc
index a7bf84d..20ba510 100644
--- a/webrtc/modules/audio_processing/aec3/subtractor.cc
+++ b/webrtc/modules/audio_processing/aec3/subtractor.cc
@@ -25,15 +25,22 @@
const FftData& S,
rtc::ArrayView<const float> y,
std::array<float, kBlockSize>* e,
- FftData* E) {
- std::array<float, kFftLength> s;
- fft.Ifft(S, &s);
+ FftData* E,
+ std::array<float, kBlockSize>* s) {
+ std::array<float, kFftLength> s_scratch;
+ fft.Ifft(S, &s_scratch);
constexpr float kScale = 1.0f / kFftLengthBy2;
- std::transform(y.begin(), y.end(), s.begin() + kFftLengthBy2, e->begin(),
- [&](float a, float b) { return a - b * kScale; });
+ std::transform(y.begin(), y.end(), s_scratch.begin() + kFftLengthBy2,
+ e->begin(), [&](float a, float b) { return a - b * kScale; });
std::for_each(e->begin(), e->end(),
[](float& a) { a = rtc::SafeClamp(a, -32768.f, 32767.f); });
fft.ZeroPaddedFft(*e, E);
+
+ if (s) {
+ for (size_t k = 0; k < s->size(); ++k) {
+ (*s)[k] = kScale * s_scratch[k + kFftLengthBy2];
+ }
+ }
}
} // namespace
@@ -47,7 +54,7 @@
RTC_DCHECK(data_dumper_);
}
-Subtractor::~Subtractor() {}
+Subtractor::~Subtractor() = default;
void Subtractor::HandleEchoPathChange(
const EchoPathVariability& echo_path_variability) {
@@ -76,11 +83,11 @@
// Form the output of the main filter.
main_filter_.Filter(render_buffer, &S);
- PredictionError(fft_, S, y, &e_main, &E_main);
+ PredictionError(fft_, S, y, &e_main, &E_main, &output->s_main);
// Form the output of the shadow filter.
shadow_filter_.Filter(render_buffer, &S);
- PredictionError(fft_, S, y, &e_shadow, &E_shadow);
+ PredictionError(fft_, S, y, &e_shadow, &E_shadow, nullptr);
// Compute spectra for future use.
E_main.Spectrum(optimization_, &output->E2_main);
diff --git a/webrtc/modules/audio_processing/aec3/subtractor.h b/webrtc/modules/audio_processing/aec3/subtractor.h
index c194b2c..777e4ff 100644
--- a/webrtc/modules/audio_processing/aec3/subtractor.h
+++ b/webrtc/modules/audio_processing/aec3/subtractor.h
@@ -45,12 +45,18 @@
void HandleEchoPathChange(const EchoPathVariability& echo_path_variability);
- // Returns the block-wise frequency response of the main adaptive filter.
+ // Returns the block-wise frequency response for the main adaptive filter.
const std::vector<std::array<float, kFftLengthBy2Plus1>>&
FilterFrequencyResponse() const {
return main_filter_.FilterFrequencyResponse();
}
+ // Returns the estimate of the impulse response for the main adaptive filter.
+ const std::array<float, kAdaptiveFilterTimeDomainLength>&
+ FilterImpulseResponse() const {
+ return main_filter_.FilterImpulseResponse();
+ }
+
private:
const Aec3Fft fft_;
ApmDataDumper* data_dumper_;
diff --git a/webrtc/modules/audio_processing/aec3/subtractor_output.h b/webrtc/modules/audio_processing/aec3/subtractor_output.h
index e2d23b5..8755047 100644
--- a/webrtc/modules/audio_processing/aec3/subtractor_output.h
+++ b/webrtc/modules/audio_processing/aec3/subtractor_output.h
@@ -20,6 +20,7 @@
// Stores the values being returned from the echo subtractor.
struct SubtractorOutput {
+ std::array<float, kBlockSize> s_main;
std::array<float, kBlockSize> e_main;
std::array<float, kBlockSize> e_shadow;
FftData E_main;
@@ -27,6 +28,7 @@
std::array<float, kFftLengthBy2Plus1> E2_shadow;
void Reset() {
+ s_main.fill(0.f);
e_main.fill(0.f);
e_shadow.fill(0.f);
E_main.re.fill(0.f);
diff --git a/webrtc/modules/audio_processing/aec3/subtractor_unittest.cc b/webrtc/modules/audio_processing/aec3/subtractor_unittest.cc
index a5e2a4e..32fc054 100644
--- a/webrtc/modules/audio_processing/aec3/subtractor_unittest.cc
+++ b/webrtc/modules/audio_processing/aec3/subtractor_unittest.cc
@@ -68,8 +68,9 @@
aec_state.HandleEchoPathChange(EchoPathVariability(false, false));
aec_state.Update(subtractor.FilterFrequencyResponse(),
+ subtractor.FilterImpulseResponse(),
rtc::Optional<size_t>(delay_samples / kBlockSize),
- render_buffer, E2_main, Y2, x[0], false);
+ render_buffer, E2_main, Y2, x[0], output.s_main, false);
}
const float output_power = std::inner_product(