Major updates to the echo removal functionality in AEC3
This CL adds fairly significant changes to the echo removal
functionality, the main ones being.
-More centralized control over the echo removal.
-Updated echo suppression gain behavior.
-Significantly increased usage of the linear adaptive filter.
-New echo removal functionality when the linear filter is not usable.
This CL is chained to the CL https://codereview.webrtc.org/2784023002/
BUG=webrtc:6018
Review-Url: https://codereview.webrtc.org/2782423003
Cr-Original-Commit-Position: refs/heads/master@{#17575}
Cr-Mirrored-From: https://chromium.googlesource.com/external/webrtc
Cr-Mirrored-Commit: 86afe9d6618a57e4817e6f9261042e785b7ff0c8
diff --git a/modules/audio_processing/BUILD.gn b/modules/audio_processing/BUILD.gn
index 053bf1d..dba581b 100644
--- a/modules/audio_processing/BUILD.gn
+++ b/modules/audio_processing/BUILD.gn
@@ -73,8 +73,6 @@
"aec3/matched_filter_lag_aggregator.h",
"aec3/output_selector.cc",
"aec3/output_selector.h",
- "aec3/power_echo_model.cc",
- "aec3/power_echo_model.h",
"aec3/render_buffer.cc",
"aec3/render_buffer.h",
"aec3/render_delay_buffer.cc",
@@ -591,7 +589,6 @@
"aec3/matched_filter_lag_aggregator_unittest.cc",
"aec3/matched_filter_unittest.cc",
"aec3/output_selector_unittest.cc",
- "aec3/power_echo_model_unittest.cc",
"aec3/render_buffer_unittest.cc",
"aec3/render_delay_buffer_unittest.cc",
"aec3/render_delay_controller_metrics_unittest.cc",
diff --git a/modules/audio_processing/aec3/adaptive_fir_filter.cc b/modules/audio_processing/aec3/adaptive_fir_filter.cc
index 5876239..7f66ce5 100644
--- a/modules/audio_processing/aec3/adaptive_fir_filter.cc
+++ b/modules/audio_processing/aec3/adaptive_fir_filter.cc
@@ -59,42 +59,35 @@
}
}
-// Resets the filter.
-void ResetFilter(rtc::ArrayView<FftData> H) {
- for (auto& H_j : H) {
- H_j.Clear();
- }
-}
-
} // namespace
namespace aec3 {
// Adapts the filter partitions as H(t+1)=H(t)+G(t)*conj(X(t)).
-void AdaptPartitions(const RenderBuffer& X_buffer,
+void AdaptPartitions(const RenderBuffer& render_buffer,
const FftData& G,
rtc::ArrayView<FftData> H) {
- rtc::ArrayView<const FftData> X_buffer_data = X_buffer.Buffer();
- size_t index = X_buffer.Position();
+ rtc::ArrayView<const FftData> render_buffer_data = render_buffer.Buffer();
+ size_t index = render_buffer.Position();
for (auto& H_j : H) {
- const FftData& X = X_buffer_data[index];
+ const FftData& X = render_buffer_data[index];
for (size_t k = 0; k < kFftLengthBy2Plus1; ++k) {
H_j.re[k] += X.re[k] * G.re[k] + X.im[k] * G.im[k];
H_j.im[k] += X.re[k] * G.im[k] - X.im[k] * G.re[k];
}
- index = index < (X_buffer_data.size() - 1) ? index + 1 : 0;
+ index = index < (render_buffer_data.size() - 1) ? index + 1 : 0;
}
}
#if defined(WEBRTC_ARCH_X86_FAMILY)
// Adapts the filter partitions. (SSE2 variant)
-void AdaptPartitions_SSE2(const RenderBuffer& X_buffer,
+void AdaptPartitions_SSE2(const RenderBuffer& render_buffer,
const FftData& G,
rtc::ArrayView<FftData> H) {
- rtc::ArrayView<const FftData> X_buffer_data = X_buffer.Buffer();
+ rtc::ArrayView<const FftData> render_buffer_data = render_buffer.Buffer();
const int lim1 =
- std::min(X_buffer_data.size() - X_buffer.Position(), H.size());
+ std::min(render_buffer_data.size() - render_buffer.Position(), H.size());
const int lim2 = H.size();
constexpr int kNumFourBinBands = kFftLengthBy2 / 4;
FftData* H_j;
@@ -106,7 +99,7 @@
const __m128 G_im = _mm_loadu_ps(&G.im[k]);
H_j = &H[0];
- X = &X_buffer_data[X_buffer.Position()];
+ X = &render_buffer_data[render_buffer.Position()];
limit = lim1;
j = 0;
do {
@@ -127,13 +120,13 @@
_mm_storeu_ps(&H_j->im[k], h);
}
- X = &X_buffer_data[0];
+ X = &render_buffer_data[0];
limit = lim2;
} while (j < lim2);
}
H_j = &H[0];
- X = &X_buffer_data[X_buffer.Position()];
+ X = &render_buffer_data[render_buffer.Position()];
limit = lim1;
j = 0;
do {
@@ -144,46 +137,47 @@
X->im[kFftLengthBy2] * G.re[kFftLengthBy2];
}
- X = &X_buffer_data[0];
+ X = &render_buffer_data[0];
limit = lim2;
} while (j < lim2);
}
#endif
// Produces the filter output.
-void ApplyFilter(const RenderBuffer& X_buffer,
+void ApplyFilter(const RenderBuffer& render_buffer,
rtc::ArrayView<const FftData> H,
FftData* S) {
S->re.fill(0.f);
S->im.fill(0.f);
- rtc::ArrayView<const FftData> X_buffer_data = X_buffer.Buffer();
- size_t index = X_buffer.Position();
+ rtc::ArrayView<const FftData> render_buffer_data = render_buffer.Buffer();
+ size_t index = render_buffer.Position();
for (auto& H_j : H) {
- const FftData& X = X_buffer_data[index];
+ const FftData& X = render_buffer_data[index];
for (size_t k = 0; k < kFftLengthBy2Plus1; ++k) {
S->re[k] += X.re[k] * H_j.re[k] - X.im[k] * H_j.im[k];
S->im[k] += X.re[k] * H_j.im[k] + X.im[k] * H_j.re[k];
}
- index = index < (X_buffer_data.size() - 1) ? index + 1 : 0;
+ index = index < (render_buffer_data.size() - 1) ? index + 1 : 0;
}
}
#if defined(WEBRTC_ARCH_X86_FAMILY)
// Produces the filter output (SSE2 variant).
-void ApplyFilter_SSE2(const RenderBuffer& X_buffer,
+void ApplyFilter_SSE2(const RenderBuffer& render_buffer,
rtc::ArrayView<const FftData> H,
FftData* S) {
+ RTC_DCHECK_GE(H.size(), H.size() - 1);
S->re.fill(0.f);
S->im.fill(0.f);
- rtc::ArrayView<const FftData> X_buffer_data = X_buffer.Buffer();
+ rtc::ArrayView<const FftData> render_buffer_data = render_buffer.Buffer();
const int lim1 =
- std::min(X_buffer_data.size() - X_buffer.Position(), H.size());
+ std::min(render_buffer_data.size() - render_buffer.Position(), H.size());
const int lim2 = H.size();
constexpr int kNumFourBinBands = kFftLengthBy2 / 4;
const FftData* H_j = &H[0];
- const FftData* X = &X_buffer_data[X_buffer.Position()];
+ const FftData* X = &render_buffer_data[render_buffer.Position()];
int j = 0;
int limit = lim1;
@@ -209,11 +203,11 @@
}
}
limit = lim2;
- X = &X_buffer_data[0];
+ X = &render_buffer_data[0];
} while (j < lim2);
H_j = &H[0];
- X = &X_buffer_data[X_buffer.Position()];
+ X = &render_buffer_data[render_buffer.Position()];
j = 0;
limit = lim1;
do {
@@ -224,7 +218,7 @@
X->im[kFftLengthBy2] * H_j->re[kFftLengthBy2];
}
limit = lim2;
- X = &X_buffer_data[0];
+ X = &render_buffer_data[0];
} while (j < lim2);
}
#endif
@@ -232,64 +226,61 @@
} // namespace aec3
AdaptiveFirFilter::AdaptiveFirFilter(size_t size_partitions,
- bool use_filter_statistics,
Aec3Optimization optimization,
ApmDataDumper* data_dumper)
: data_dumper_(data_dumper),
fft_(),
optimization_(optimization),
- H_(size_partitions) {
+ H_(size_partitions),
+ H2_(size_partitions, std::array<float, kFftLengthBy2Plus1>()) {
RTC_DCHECK(data_dumper_);
- ResetFilter(H_);
- if (use_filter_statistics) {
- H2_.reset(new std::vector<std::array<float, kFftLengthBy2Plus1>>(
- size_partitions, std::array<float, kFftLengthBy2Plus1>()));
- for (auto H2_k : *H2_) {
- H2_k.fill(0.f);
- }
-
- erl_.reset(new std::array<float, kFftLengthBy2Plus1>());
- erl_->fill(0.f);
+ for (auto& H_j : H_) {
+ H_j.Clear();
}
+ for (auto& H2_k : H2_) {
+ H2_k.fill(0.f);
+ }
+ erl_.fill(0.f);
}
AdaptiveFirFilter::~AdaptiveFirFilter() = default;
void AdaptiveFirFilter::HandleEchoPathChange() {
- ResetFilter(H_);
- if (H2_) {
- for (auto H2_k : *H2_) {
- H2_k.fill(0.f);
- }
- RTC_DCHECK(erl_);
- erl_->fill(0.f);
+ for (auto& H_j : H_) {
+ H_j.Clear();
}
+ for (auto& H2_k : H2_) {
+ H2_k.fill(0.f);
+ }
+ erl_.fill(0.f);
}
-void AdaptiveFirFilter::Filter(const RenderBuffer& X_buffer, FftData* S) const {
+void AdaptiveFirFilter::Filter(const RenderBuffer& render_buffer,
+ FftData* S) const {
RTC_DCHECK(S);
switch (optimization_) {
#if defined(WEBRTC_ARCH_X86_FAMILY)
case Aec3Optimization::kSse2:
- aec3::ApplyFilter_SSE2(X_buffer, H_, S);
+ aec3::ApplyFilter_SSE2(render_buffer, H_, S);
break;
#endif
default:
- aec3::ApplyFilter(X_buffer, H_, S);
+ aec3::ApplyFilter(render_buffer, H_, S);
}
}
-void AdaptiveFirFilter::Adapt(const RenderBuffer& X_buffer, const FftData& G) {
+void AdaptiveFirFilter::Adapt(const RenderBuffer& render_buffer,
+ const FftData& G) {
// Adapt the filter.
switch (optimization_) {
#if defined(WEBRTC_ARCH_X86_FAMILY)
case Aec3Optimization::kSse2:
- aec3::AdaptPartitions_SSE2(X_buffer, G, H_);
+ aec3::AdaptPartitions_SSE2(render_buffer, G, H_);
break;
#endif
default:
- aec3::AdaptPartitions(X_buffer, G, H_);
+ aec3::AdaptPartitions(render_buffer, G, H_);
}
// Constrain the filter partitions in a cyclic manner.
@@ -298,13 +289,9 @@
? partition_to_constrain_ + 1
: 0;
- // Optionally update the frequency response and echo return loss for the
- // filter.
- if (H2_) {
- RTC_DCHECK(erl_);
- UpdateFrequencyResponse(H_, H2_.get());
- UpdateErlEstimator(*H2_, erl_.get());
- }
+ // Update the frequency response and echo return loss for the filter.
+ UpdateFrequencyResponse(H_, &H2_);
+ UpdateErlEstimator(H2_, &erl_);
}
} // namespace webrtc
diff --git a/modules/audio_processing/aec3/adaptive_fir_filter.h b/modules/audio_processing/aec3/adaptive_fir_filter.h
index a27fa6c..4fe10ea 100644
--- a/modules/audio_processing/aec3/adaptive_fir_filter.h
+++ b/modules/audio_processing/aec3/adaptive_fir_filter.h
@@ -26,21 +26,21 @@
namespace webrtc {
namespace aec3 {
// Adapts the filter partitions.
-void AdaptPartitions(const RenderBuffer& X_buffer,
+void AdaptPartitions(const RenderBuffer& render_buffer,
const FftData& G,
rtc::ArrayView<FftData> H);
#if defined(WEBRTC_ARCH_X86_FAMILY)
-void AdaptPartitions_SSE2(const RenderBuffer& X_buffer,
+void AdaptPartitions_SSE2(const RenderBuffer& render_buffer,
const FftData& G,
rtc::ArrayView<FftData> H);
#endif
// Produces the filter output.
-void ApplyFilter(const RenderBuffer& X_buffer,
+void ApplyFilter(const RenderBuffer& render_buffer,
rtc::ArrayView<const FftData> H,
FftData* S);
#if defined(WEBRTC_ARCH_X86_FAMILY)
-void ApplyFilter_SSE2(const RenderBuffer& X_buffer,
+void ApplyFilter_SSE2(const RenderBuffer& render_buffer,
rtc::ArrayView<const FftData> H,
FftData* S);
#endif
@@ -51,17 +51,16 @@
class AdaptiveFirFilter {
public:
AdaptiveFirFilter(size_t size_partitions,
- bool use_filter_statistics,
Aec3Optimization optimization,
ApmDataDumper* data_dumper);
~AdaptiveFirFilter();
// Produces the output of the filter.
- void Filter(const RenderBuffer& X_buffer, FftData* S) const;
+ void Filter(const RenderBuffer& render_buffer, FftData* S) const;
// Adapts the filter.
- void Adapt(const RenderBuffer& X_buffer, const FftData& G);
+ void Adapt(const RenderBuffer& render_buffer, const FftData& G);
// Receives reports that known echo path changes have occured and adjusts
// the filter adaptation accordingly.
@@ -70,25 +69,13 @@
// Returns the filter size.
size_t SizePartitions() const { return H_.size(); }
- // Returns the filter based echo return loss. This method can only be used if
- // the usage of filter statistics has been specified during the creation of
- // the adaptive filter.
- const std::array<float, kFftLengthBy2Plus1>& Erl() const {
- RTC_DCHECK(erl_) << "The filter must be created with use_filter_statistics "
- "set to true in order to be able to call retrieve the "
- "ERL.";
- return *erl_;
- }
+ // Returns the filter based echo return loss.
+ const std::array<float, kFftLengthBy2Plus1>& Erl() const { return erl_; }
- // Returns the frequency responses for the filter partitions. This method can
- // only be used if the usage of filter statistics has been specified during
- // the creation of the adaptive filter.
+ // Returns the frequency responses for the filter partitions.
const std::vector<std::array<float, kFftLengthBy2Plus1>>&
FilterFrequencyResponse() const {
- RTC_DCHECK(H2_) << "The filter must be created with use_filter_statistics "
- "set to true in order to be able to call retrieve the "
- "filter frequency responde.";
- return *H2_;
+ return H2_;
}
void DumpFilter(const char* name) {
@@ -103,8 +90,8 @@
const Aec3Fft fft_;
const Aec3Optimization optimization_;
std::vector<FftData> H_;
- std::unique_ptr<std::vector<std::array<float, kFftLengthBy2Plus1>>> H2_;
- std::unique_ptr<std::array<float, kFftLengthBy2Plus1>> erl_;
+ std::vector<std::array<float, kFftLengthBy2Plus1>> H2_;
+ std::array<float, kFftLengthBy2Plus1> erl_;
size_t partition_to_constrain_ = 0;
RTC_DISALLOW_IMPLICIT_CONSTRUCTORS(AdaptiveFirFilter);
diff --git a/modules/audio_processing/aec3/adaptive_fir_filter_unittest.cc b/modules/audio_processing/aec3/adaptive_fir_filter_unittest.cc
index c9dd864..85d9769 100644
--- a/modules/audio_processing/aec3/adaptive_fir_filter_unittest.cc
+++ b/modules/audio_processing/aec3/adaptive_fir_filter_unittest.cc
@@ -10,9 +10,6 @@
#include "webrtc/modules/audio_processing/aec3/adaptive_fir_filter.h"
-// TODO(peah): Reactivate once the next CL has landed.
-#if 0
-
#include <algorithm>
#include <numeric>
#include <string>
@@ -22,8 +19,9 @@
#endif
#include "webrtc/base/arraysize.h"
#include "webrtc/base/random.h"
-#include "webrtc/modules/audio_processing/aec3/aec_state.h"
#include "webrtc/modules/audio_processing/aec3/aec3_fft.h"
+#include "webrtc/modules/audio_processing/aec3/aec_state.h"
+#include "webrtc/modules/audio_processing/aec3/cascaded_biquad_filter.h"
#include "webrtc/modules/audio_processing/aec3/render_signal_analyzer.h"
#include "webrtc/modules/audio_processing/aec3/shadow_filter_update_gain.h"
#include "webrtc/modules/audio_processing/logging/apm_data_dumper.h"
@@ -49,12 +47,10 @@
TEST(AdaptiveFirFilter, TestOptimizations) {
bool use_sse2 = (WebRtc_GetCPUInfo(kSSE2) != 0);
if (use_sse2) {
- FftBuffer X_buffer(Aec3Optimization::kNone, 12, std::vector<size_t>(1, 12));
- std::array<float, kBlockSize> x_old;
- x_old.fill(0.f);
+ RenderBuffer render_buffer(Aec3Optimization::kNone, 3, 12,
+ std::vector<size_t>(1, 12));
Random random_generator(42U);
- std::vector<float> x(kBlockSize, 0.f);
- FftData X;
+ std::vector<std::vector<float>> x(3, std::vector<float>(kBlockSize, 0.f));
FftData S_C;
FftData S_SSE2;
FftData G;
@@ -69,12 +65,11 @@
}
for (size_t k = 0; k < 500; ++k) {
- RandomizeSampleVector(&random_generator, x);
- fft.PaddedFft(x, x_old, &X);
- X_buffer.Insert(X);
+ RandomizeSampleVector(&random_generator, x[0]);
+ render_buffer.Insert(x);
- ApplyFilter_SSE2(X_buffer, H_SSE2, &S_SSE2);
- ApplyFilter(X_buffer, H_C, &S_C);
+ ApplyFilter_SSE2(render_buffer, H_SSE2, &S_SSE2);
+ ApplyFilter(render_buffer, H_C, &S_C);
for (size_t j = 0; j < S_C.re.size(); ++j) {
EXPECT_FLOAT_EQ(S_C.re[j], S_SSE2.re[j]);
EXPECT_FLOAT_EQ(S_C.im[j], S_SSE2.im[j]);
@@ -85,8 +80,8 @@
std::for_each(G.im.begin(), G.im.end(),
[&](float& a) { a = random_generator.Rand<float>(); });
- AdaptPartitions_SSE2(X_buffer, G, H_SSE2);
- AdaptPartitions(X_buffer, G, H_C);
+ AdaptPartitions_SSE2(render_buffer, G, H_SSE2);
+ AdaptPartitions(render_buffer, G, H_C);
for (size_t k = 0; k < H_C.size(); ++k) {
for (size_t j = 0; j < H_C[k].re.size(); ++j) {
@@ -103,32 +98,17 @@
#if RTC_DCHECK_IS_ON && GTEST_HAS_DEATH_TEST && !defined(WEBRTC_ANDROID)
// Verifies that the check for non-null data dumper works.
TEST(AdaptiveFirFilter, NullDataDumper) {
- EXPECT_DEATH(AdaptiveFirFilter(9, true, DetectOptimization(), nullptr), "");
+ EXPECT_DEATH(AdaptiveFirFilter(9, DetectOptimization(), nullptr), "");
}
// Verifies that the check for non-null filter output works.
TEST(AdaptiveFirFilter, NullFilterOutput) {
ApmDataDumper data_dumper(42);
- AdaptiveFirFilter filter(9, true, DetectOptimization(), &data_dumper);
- FftBuffer X_buffer(Aec3Optimization::kNone, filter.SizePartitions(),
- std::vector<size_t>(1, filter.SizePartitions()));
- EXPECT_DEATH(filter.Filter(X_buffer, nullptr), "");
-}
-
-// Verifies that the check for whether filter statistics are being generated
-// works when retrieving the ERL.
-TEST(AdaptiveFirFilter, ErlAccessWhenNoFilterStatistics) {
- ApmDataDumper data_dumper(42);
- AdaptiveFirFilter filter(9, false, DetectOptimization(), &data_dumper);
- EXPECT_DEATH(filter.Erl(), "");
-}
-
-// Verifies that the check for whether filter statistics are being generated
-// works when retrieving the filter frequencyResponse.
-TEST(AdaptiveFirFilter, FilterFrequencyResponseAccessWhenNoFilterStatistics) {
- ApmDataDumper data_dumper(42);
- AdaptiveFirFilter filter(9, false, DetectOptimization(), &data_dumper);
- EXPECT_DEATH(filter.FilterFrequencyResponse(), "");
+ AdaptiveFirFilter filter(9, DetectOptimization(), &data_dumper);
+ RenderBuffer render_buffer(Aec3Optimization::kNone, 3,
+ filter.SizePartitions(),
+ std::vector<size_t>(1, filter.SizePartitions()));
+ EXPECT_DEATH(filter.Filter(render_buffer, nullptr), "");
}
#endif
@@ -137,7 +117,7 @@
// are turned on.
TEST(AdaptiveFirFilter, FilterStatisticsAccess) {
ApmDataDumper data_dumper(42);
- AdaptiveFirFilter filter(9, true, DetectOptimization(), &data_dumper);
+ AdaptiveFirFilter filter(9, DetectOptimization(), &data_dumper);
filter.Erl();
filter.FilterFrequencyResponse();
}
@@ -146,8 +126,7 @@
TEST(AdaptiveFirFilter, FilterSize) {
ApmDataDumper data_dumper(42);
for (size_t filter_size = 1; filter_size < 5; ++filter_size) {
- AdaptiveFirFilter filter(filter_size, false, DetectOptimization(),
- &data_dumper);
+ AdaptiveFirFilter filter(filter_size, DetectOptimization(), &data_dumper);
EXPECT_EQ(filter_size, filter.SizePartitions());
}
}
@@ -157,19 +136,18 @@
TEST(AdaptiveFirFilter, FilterAndAdapt) {
constexpr size_t kNumBlocksToProcess = 500;
ApmDataDumper data_dumper(42);
- AdaptiveFirFilter filter(9, true, DetectOptimization(), &data_dumper);
+ AdaptiveFirFilter filter(9, DetectOptimization(), &data_dumper);
Aec3Fft fft;
- FftBuffer X_buffer(Aec3Optimization::kNone, filter.SizePartitions(),
- std::vector<size_t>(1, filter.SizePartitions()));
- std::array<float, kBlockSize> x_old;
- x_old.fill(0.f);
+ RenderBuffer render_buffer(Aec3Optimization::kNone, 3,
+ filter.SizePartitions(),
+ std::vector<size_t>(1, filter.SizePartitions()));
ShadowFilterUpdateGain gain;
Random random_generator(42U);
- std::vector<float> x(kBlockSize, 0.f);
+ std::vector<std::vector<float>> x(3, std::vector<float>(kBlockSize, 0.f));
+ std::vector<float> n(kBlockSize, 0.f);
std::vector<float> y(kBlockSize, 0.f);
AecState aec_state;
RenderSignalAnalyzer render_signal_analyzer;
- FftData X;
std::vector<float> e(kBlockSize, 0.f);
std::array<float, kFftLength> s;
FftData S;
@@ -178,6 +156,10 @@
std::array<float, kFftLengthBy2Plus1> Y2;
std::array<float, kFftLengthBy2Plus1> E2_main;
std::array<float, kFftLengthBy2Plus1> E2_shadow;
+ // [B,A] = butter(2,100/8000,'high')
+ constexpr CascadedBiQuadFilter::BiQuadCoefficients
+ kHighPassFilterCoefficients = {{0.97261f, -1.94523f, 0.97261f},
+ {-1.94448f, 0.94598f}};
Y2.fill(0.f);
E2_main.fill(0.f);
E2_shadow.fill(0.f);
@@ -186,16 +168,27 @@
for (size_t delay_samples : {0, 64, 150, 200, 301}) {
DelayBuffer<float> delay_buffer(delay_samples);
+ CascadedBiQuadFilter x_hp_filter(kHighPassFilterCoefficients, 1);
+ CascadedBiQuadFilter y_hp_filter(kHighPassFilterCoefficients, 1);
+
SCOPED_TRACE(ProduceDebugText(delay_samples));
for (size_t k = 0; k < kNumBlocksToProcess; ++k) {
- RandomizeSampleVector(&random_generator, x);
- delay_buffer.Delay(x, y);
+ RandomizeSampleVector(&random_generator, x[0]);
+ delay_buffer.Delay(x[0], y);
- fft.PaddedFft(x, x_old, &X);
- X_buffer.Insert(X);
- render_signal_analyzer.Update(X_buffer, aec_state.FilterDelay());
+ RandomizeSampleVector(&random_generator, n);
+ constexpr float kNoiseScaling = 1.f / 100.f;
+ std::transform(
+ y.begin(), y.end(), n.begin(), y.begin(),
+ [kNoiseScaling](float a, float b) { return a + b * kNoiseScaling; });
- filter.Filter(X_buffer, &S);
+ x_hp_filter.Process(x[0]);
+ y_hp_filter.Process(y);
+
+ render_buffer.Insert(x);
+ render_signal_analyzer.Update(render_buffer, aec_state.FilterDelay());
+
+ filter.Filter(render_buffer, &S);
fft.Ifft(S, &s);
std::transform(y.begin(), y.end(), s.begin() + kFftLengthBy2, e.begin(),
[&](float a, float b) { return a - b * kScale; });
@@ -204,12 +197,13 @@
});
fft.ZeroPaddedFft(e, &E);
- gain.Compute(X_buffer, render_signal_analyzer, E, filter.SizePartitions(),
- false, &G);
- filter.Adapt(X_buffer, G);
+ gain.Compute(render_buffer, render_signal_analyzer, E,
+ filter.SizePartitions(), false, &G);
+ filter.Adapt(render_buffer, G);
+ aec_state.HandleEchoPathChange(EchoPathVariability(false, false));
aec_state.Update(filter.FilterFrequencyResponse(),
- rtc::Optional<size_t>(), X_buffer, E2_main, E2_shadow,
- Y2, x, EchoPathVariability(false, false), false);
+ rtc::Optional<size_t>(), render_buffer, E2_main, Y2,
+ x[0], false);
}
// Verify that the filter is able to perform well.
EXPECT_LT(1000 * std::inner_product(e.begin(), e.end(), e.begin(), 0.f),
@@ -220,5 +214,3 @@
}
} // namespace aec3
} // namespace webrtc
-
-#endif
diff --git a/modules/audio_processing/aec3/aec3_common.h b/modules/audio_processing/aec3/aec3_common.h
index 480f12c..ef7dcdf 100644
--- a/modules/audio_processing/aec3/aec3_common.h
+++ b/modules/audio_processing/aec3/aec3_common.h
@@ -26,12 +26,15 @@
enum class Aec3Optimization { kNone, kSse2 };
-constexpr int kMetricsReportingIntervalBlocks = 10 * 250;
+constexpr int kNumBlocksPerSecond = 250;
+
+constexpr int kMetricsReportingIntervalBlocks = 10 * kNumBlocksPerSecond;
constexpr int kMetricsComputationBlocks = 9;
constexpr int kMetricsCollectionBlocks =
kMetricsReportingIntervalBlocks - kMetricsComputationBlocks;
constexpr int kAdaptiveFilterLength = 12;
+constexpr int kResidualEchoPowerRenderWindowSize = 30;
constexpr size_t kFftLengthBy2 = 64;
constexpr size_t kFftLengthBy2Plus1 = kFftLengthBy2 + 1;
@@ -55,11 +58,15 @@
kMatchedFilterWindowSizeSubBlocks +
1);
+constexpr float kFixedEchoPathGain = 100;
+
constexpr size_t kRenderDelayBufferSize =
(3 * kDownsampledRenderBufferSize) / (4 * kSubBlockSize);
constexpr size_t kMaxApiCallsJitterBlocks = 10;
constexpr size_t kRenderTransferQueueSize = kMaxApiCallsJitterBlocks / 2;
+static_assert(2 * kRenderTransferQueueSize >= kMaxApiCallsJitterBlocks,
+ "Requirement to ensure buffer overflow detection");
constexpr size_t NumBandsForRate(int sample_rate_hz) {
return static_cast<size_t>(sample_rate_hz == 8000 ? 1
diff --git a/modules/audio_processing/aec3/aec_state.cc b/modules/audio_processing/aec3/aec_state.cc
index d2c0bdd..8e92f5f 100644
--- a/modules/audio_processing/aec3/aec_state.cc
+++ b/modules/audio_processing/aec3/aec_state.cc
@@ -14,6 +14,7 @@
#include <numeric>
#include <vector>
+#include "webrtc/base/array_view.h"
#include "webrtc/base/atomicops.h"
#include "webrtc/base/checks.h"
#include "webrtc/modules/audio_processing/logging/apm_data_dumper.h"
@@ -21,23 +22,23 @@
namespace webrtc {
namespace {
-constexpr float kMaxFilterEstimateStrength = 1000.f;
+constexpr size_t kEchoPathChangeConvergenceBlocks = 4 * kNumBlocksPerSecond;
+constexpr size_t kSaturationLeakageBlocks = 20;
-// Compute the delay of the adaptive filter as the partition with a distinct
-// peak.
-void AnalyzeFilter(
+// Computes delay of the adaptive filter.
+rtc::Optional<size_t> EstimateFilterDelay(
const std::vector<std::array<float, kFftLengthBy2Plus1>>&
- filter_frequency_response,
- std::array<bool, kFftLengthBy2Plus1>* bands_with_reliable_filter,
- std::array<float, kFftLengthBy2Plus1>* filter_estimate_strength,
- rtc::Optional<size_t>* filter_delay) {
- const auto& H2 = filter_frequency_response;
+ adaptive_filter_frequency_response) {
+ const auto& H2 = adaptive_filter_frequency_response;
size_t reliable_delays_sum = 0;
size_t num_reliable_delays = 0;
constexpr size_t kUpperBin = kFftLengthBy2 - 5;
+ constexpr float kMinPeakMargin = 10.f;
+ const size_t kTailPartition = H2.size() - 1;
for (size_t k = 1; k < kUpperBin; ++k) {
+ // Find the maximum of H2[j].
int peak = 0;
for (size_t j = 0; j < H2.size(); ++j) {
if (H2[j][k] > H2[peak][k]) {
@@ -45,43 +46,33 @@
}
}
- if (H2[peak][k] == 0.f) {
- (*filter_estimate_strength)[k] = 0.f;
- } else if (H2[H2.size() - 1][k] == 0.f) {
- (*filter_estimate_strength)[k] = kMaxFilterEstimateStrength;
- } else {
- (*filter_estimate_strength)[k] = std::min(
- kMaxFilterEstimateStrength, H2[peak][k] / H2[H2.size() - 1][k]);
- }
-
- constexpr float kMargin = 10.f;
- if (kMargin * H2[H2.size() - 1][k] < H2[peak][k]) {
- (*bands_with_reliable_filter)[k] = true;
+ // Count the peak as a delay only if the peak is sufficiently larger than
+ // the tail.
+ if (kMinPeakMargin * H2[kTailPartition][k] < H2[peak][k]) {
reliable_delays_sum += peak;
++num_reliable_delays;
- } else {
- (*bands_with_reliable_filter)[k] = false;
}
}
- (*bands_with_reliable_filter)[0] = (*bands_with_reliable_filter)[1];
- std::fill(bands_with_reliable_filter->begin() + kUpperBin,
- bands_with_reliable_filter->end(),
- (*bands_with_reliable_filter)[kUpperBin - 1]);
- (*filter_estimate_strength)[0] = (*filter_estimate_strength)[1];
- std::fill(filter_estimate_strength->begin() + kUpperBin,
- filter_estimate_strength->end(),
- (*filter_estimate_strength)[kUpperBin - 1]);
- *filter_delay =
- num_reliable_delays > 20
- ? rtc::Optional<size_t>(reliable_delays_sum / num_reliable_delays)
- : rtc::Optional<size_t>();
+ // Return no delay if not sufficient delays have been found.
+ if (num_reliable_delays < 21) {
+ return rtc::Optional<size_t>();
+ }
+
+ const size_t delay = reliable_delays_sum / num_reliable_delays;
+ // Sanity check that the peak is not caused by a false strong DC-component in
+ // the filter.
+ for (size_t k = 1; k < kUpperBin; ++k) {
+ if (H2[delay][k] > H2[delay][0]) {
+ RTC_DCHECK_GT(H2.size(), delay);
+ return rtc::Optional<size_t>(delay);
+ }
+ }
+ return rtc::Optional<size_t>();
}
-constexpr int kActiveRenderCounterInitial = 50;
-constexpr int kActiveRenderCounterMax = 200;
-constexpr int kEchoPathChangeCounterInitial = 50;
-constexpr int kEchoPathChangeCounterMax = 3 * 250;
+constexpr int kEchoPathChangeCounterInitial = kNumBlocksPerSecond / 5;
+constexpr int kEchoPathChangeCounterMax = 3 * kNumBlocksPerSecond;
} // namespace
@@ -90,76 +81,80 @@
AecState::AecState()
: data_dumper_(
new ApmDataDumper(rtc::AtomicOps::Increment(&instance_count_))),
- echo_path_change_counter_(kEchoPathChangeCounterInitial),
- active_render_counter_(kActiveRenderCounterInitial) {
- bands_with_reliable_filter_.fill(false);
- filter_estimate_strength_.fill(0.f);
-}
+ echo_path_change_counter_(kEchoPathChangeCounterInitial) {}
AecState::~AecState() = default;
+void AecState::HandleEchoPathChange(
+ const EchoPathVariability& echo_path_variability) {
+ if (echo_path_variability.AudioPathChanged()) {
+ blocks_since_last_saturation_ = 0;
+ active_render_blocks_ = 0;
+ echo_path_change_counter_ = kEchoPathChangeCounterMax;
+ usable_linear_estimate_ = false;
+ echo_leakage_detected_ = false;
+ capture_signal_saturation_ = false;
+ echo_saturation_ = false;
+ headset_detected_ = false;
+ previous_max_sample_ = 0.f;
+ }
+}
+
void AecState::Update(const std::vector<std::array<float, kFftLengthBy2Plus1>>&
- filter_frequency_response,
+ adaptive_filter_frequency_response,
const rtc::Optional<size_t>& external_delay_samples,
- const RenderBuffer& X_buffer,
+ const RenderBuffer& render_buffer,
const std::array<float, kFftLengthBy2Plus1>& E2_main,
- const std::array<float, kFftLengthBy2Plus1>& E2_shadow,
const std::array<float, kFftLengthBy2Plus1>& Y2,
rtc::ArrayView<const float> x,
- const EchoPathVariability& echo_path_variability,
bool echo_leakage_detected) {
- filter_length_ = filter_frequency_response.size();
- AnalyzeFilter(filter_frequency_response, &bands_with_reliable_filter_,
- &filter_estimate_strength_, &filter_delay_);
- // Compute the externally provided delay in partitions. The truncation is
- // intended here.
+ // Store input parameters.
+ echo_leakage_detected_ = echo_leakage_detected;
+
+ // Update counters.
+ const float x_energy = std::inner_product(x.begin(), x.end(), x.begin(), 0.f);
+ const bool active_render_block = x_energy > 10000.f * kFftLengthBy2;
+ active_render_blocks_ += active_render_block ? 1 : 0;
+ --echo_path_change_counter_;
+
+ // Estimate delays.
+ filter_delay_ = EstimateFilterDelay(adaptive_filter_frequency_response);
external_delay_ =
external_delay_samples
? rtc::Optional<size_t>(*external_delay_samples / kBlockSize)
: rtc::Optional<size_t>();
- const float x_energy = std::inner_product(x.begin(), x.end(), x.begin(), 0.f);
-
- active_render_blocks_ =
- echo_path_variability.AudioPathChanged() ? 0 : active_render_blocks_ + 1;
-
- echo_path_change_counter_ = echo_path_variability.AudioPathChanged()
- ? kEchoPathChangeCounterMax
- : echo_path_change_counter_ - 1;
- active_render_counter_ = x_energy > 10000.f * kFftLengthBy2
- ? kActiveRenderCounterMax
- : active_render_counter_ - 1;
-
- usable_linear_estimate_ = filter_delay_ && echo_path_change_counter_ <= 0;
-
- echo_leakage_detected_ = echo_leakage_detected;
-
- model_based_aec_feasible_ = usable_linear_estimate_ || external_delay_;
-
- if (usable_linear_estimate_) {
- const auto& X2 = X_buffer.Spectrum(*filter_delay_);
-
- // TODO(peah): Expose these as stats.
+ // Update the ERL and ERLE measures.
+ if (filter_delay_ && echo_path_change_counter_ <= 0) {
+ const auto& X2 = render_buffer.Spectrum(*filter_delay_);
erle_estimator_.Update(X2, Y2, E2_main);
erl_estimator_.Update(X2, Y2);
-
-// TODO(peah): Add working functionality for headset detection. Until the
-// functionality for that is working the headset detector is hardcoded to detect
-// no headset.
-#if 0
- const auto& erl = erl_estimator_.Erl();
- const int low_erl_band_count = std::count_if(
- erl.begin(), erl.end(), [](float a) { return a <= 0.1f; });
-
- const int noisy_band_count = std::count_if(
- filter_estimate_strength_.begin(), filter_estimate_strength_.end(),
- [](float a) { return a <= 10.f; });
- headset_detected_ = low_erl_band_count > 20 && noisy_band_count > 20;
-#endif
- headset_detected_ = false;
- } else {
- headset_detected_ = false;
}
+
+ // Detect and flag echo saturation.
+ RTC_DCHECK_LT(0, x.size());
+ const float max_sample = fabs(*std::max_element(
+ x.begin(), x.end(), [](float a, float b) { return a * a < b * b; }));
+ const bool saturated_echo =
+ previous_max_sample_ * kFixedEchoPathGain > 1600 && SaturatedCapture();
+ previous_max_sample_ = max_sample;
+
+ // Counts the blocks since saturation.
+ blocks_since_last_saturation_ =
+ saturated_echo ? 0 : blocks_since_last_saturation_ + 1;
+ echo_saturation_ = blocks_since_last_saturation_ < kSaturationLeakageBlocks;
+
+ // Flag whether the linear filter estimate is usable.
+ usable_linear_estimate_ =
+ (!echo_saturation_) &&
+ active_render_blocks_ > kEchoPathChangeConvergenceBlocks &&
+ filter_delay_ && echo_path_change_counter_ <= 0;
+
+ // After an amount of active render samples for which an echo should have been
+ // detected in the capture signal if the ERL was not infinite, flag that a
+ // headset is used.
+ headset_detected_ = !external_delay_ && !filter_delay_ &&
+ active_render_blocks_ >= kEchoPathChangeConvergenceBlocks;
}
} // namespace webrtc
diff --git a/modules/audio_processing/aec3/aec_state.h b/modules/audio_processing/aec3/aec_state.h
index 32e07ee..7905be0 100644
--- a/modules/audio_processing/aec3/aec_state.h
+++ b/modules/audio_processing/aec3/aec_state.h
@@ -40,16 +40,8 @@
// Returns whether there has been echo leakage detected.
bool EchoLeakageDetected() const { return echo_leakage_detected_; }
- // Returns whether it is possible at all to use the model based echo removal
- // functionalities.
- bool ModelBasedAecFeasible() const { return model_based_aec_feasible_; }
-
// Returns whether the render signal is currently active.
- bool ActiveRender() const { return active_render_counter_ > 0; }
-
- // Returns whether the number of active render blocks since an echo path
- // change.
- size_t ActiveRenderBlocks() const { return active_render_blocks_; }
+ bool ActiveRender() const { return active_render_blocks_ > 200; }
// Returns the ERLE.
const std::array<float, kFftLengthBy2Plus1>& Erle() const {
@@ -67,24 +59,12 @@
// Returns the externally provided delay.
rtc::Optional<size_t> ExternalDelay() const { return external_delay_; }
- // Returns the bands where the linear filter is reliable.
- const std::array<bool, kFftLengthBy2Plus1>& BandsWithReliableFilter() const {
- return bands_with_reliable_filter_;
- }
-
- // Reports whether the filter is poorly aligned.
- bool PoorlyAlignedFilter() const {
- return FilterDelay() ? *FilterDelay() > 0.75f * filter_length_ : false;
- }
-
- // Returns the strength of the filter.
- const std::array<float, kFftLengthBy2Plus1>& FilterEstimateStrength() const {
- return filter_estimate_strength_;
- }
-
// Returns whether the capture signal is saturated.
bool SaturatedCapture() const { return capture_signal_saturation_; }
+ // Returns whether the echo signal is saturated.
+ bool SaturatedEcho() const { return echo_saturation_; }
+
// Updates the capture signal saturation.
void UpdateCaptureSaturation(bool capture_signal_saturation) {
capture_signal_saturation_ = capture_signal_saturation;
@@ -93,16 +73,17 @@
// Returns whether a probable headset setup has been detected.
bool HeadsetDetected() const { return headset_detected_; }
+ // Takes appropriate action at an echo path change.
+ void HandleEchoPathChange(const EchoPathVariability& echo_path_variability);
+
// Updates the aec state.
void Update(const std::vector<std::array<float, kFftLengthBy2Plus1>>&
- filter_frequency_response,
+ adaptive_filter_frequency_response,
const rtc::Optional<size_t>& external_delay_samples,
- const RenderBuffer& X_buffer,
+ const RenderBuffer& render_buffer,
const std::array<float, kFftLengthBy2Plus1>& E2_main,
- const std::array<float, kFftLengthBy2Plus1>& E2_shadow,
const std::array<float, kFftLengthBy2Plus1>& Y2,
rtc::ArrayView<const float> x,
- const EchoPathVariability& echo_path_variability,
bool echo_leakage_detected);
private:
@@ -111,18 +92,16 @@
ErlEstimator erl_estimator_;
ErleEstimator erle_estimator_;
int echo_path_change_counter_;
- int active_render_counter_;
size_t active_render_blocks_ = 0;
bool usable_linear_estimate_ = false;
bool echo_leakage_detected_ = false;
- bool model_based_aec_feasible_ = false;
bool capture_signal_saturation_ = false;
+ bool echo_saturation_ = false;
bool headset_detected_ = false;
+ float previous_max_sample_ = 0.f;
rtc::Optional<size_t> filter_delay_;
rtc::Optional<size_t> external_delay_;
- std::array<bool, kFftLengthBy2Plus1> bands_with_reliable_filter_;
- std::array<float, kFftLengthBy2Plus1> filter_estimate_strength_;
- size_t filter_length_;
+ size_t blocks_since_last_saturation_ = 1000;
RTC_DISALLOW_COPY_AND_ASSIGN(AecState);
};
diff --git a/modules/audio_processing/aec3/aec_state_unittest.cc b/modules/audio_processing/aec3/aec_state_unittest.cc
index 312d451..a3aa4c1 100644
--- a/modules/audio_processing/aec3/aec_state_unittest.cc
+++ b/modules/audio_processing/aec3/aec_state_unittest.cc
@@ -10,9 +10,6 @@
#include "webrtc/modules/audio_processing/aec3/aec_state.h"
-// TODO(peah): Reactivate once the next CL has landed.
-#if 0
-
#include "webrtc/modules/audio_processing/logging/apm_data_dumper.h"
#include "webrtc/test/gtest.h"
@@ -22,13 +19,12 @@
TEST(AecState, NormalUsage) {
ApmDataDumper data_dumper(42);
AecState state;
- FftBuffer X_buffer(Aec3Optimization::kNone, 30, std::vector<size_t>(1, 30));
- std::array<float, kFftLengthBy2Plus1> E2_main;
- std::array<float, kFftLengthBy2Plus1> E2_shadow;
- std::array<float, kFftLengthBy2Plus1> Y2;
- std::array<float, kBlockSize> x;
+ RenderBuffer render_buffer(Aec3Optimization::kNone, 3, 30,
+ std::vector<size_t>(1, 30));
+ std::array<float, kFftLengthBy2Plus1> E2_main = {};
+ std::array<float, kFftLengthBy2Plus1> Y2 = {};
+ std::vector<std::vector<float>> x(3, std::vector<float>(kBlockSize, 0.f));
EchoPathVariability echo_path_variability(false, false);
- x.fill(0.f);
std::vector<std::array<float, kFftLengthBy2Plus1>>
converged_filter_frequency_response(10);
@@ -38,165 +34,116 @@
std::vector<std::array<float, kFftLengthBy2Plus1>>
diverged_filter_frequency_response = converged_filter_frequency_response;
converged_filter_frequency_response[2].fill(100.f);
+ converged_filter_frequency_response[2][0] = 1.f;
- // Verify that model based aec feasibility and linear AEC usability are false
- // when the filter is diverged and there is no external delay reported.
+ // Verify that linear AEC usability is false when the filter is diverged and
+ // there is no external delay reported.
state.Update(diverged_filter_frequency_response, rtc::Optional<size_t>(),
- X_buffer, E2_main, E2_shadow, Y2, x, echo_path_variability,
- false);
- EXPECT_FALSE(state.ModelBasedAecFeasible());
- EXPECT_FALSE(state.UsableLinearEstimate());
-
- // Verify that model based aec feasibility is true and that linear AEC
- // usability is false when the filter is diverged and there is an external
- // delay reported.
- state.Update(diverged_filter_frequency_response, rtc::Optional<size_t>(),
- X_buffer, E2_main, E2_shadow, Y2, x, echo_path_variability,
- false);
- EXPECT_FALSE(state.ModelBasedAecFeasible());
- for (int k = 0; k < 50; ++k) {
- state.Update(diverged_filter_frequency_response, rtc::Optional<size_t>(2),
- X_buffer, E2_main, E2_shadow, Y2, x, echo_path_variability,
- false);
- }
- EXPECT_TRUE(state.ModelBasedAecFeasible());
+ render_buffer, E2_main, Y2, x[0], false);
EXPECT_FALSE(state.UsableLinearEstimate());
// Verify that linear AEC usability is true when the filter is converged
- for (int k = 0; k < 50; ++k) {
+ std::fill(x[0].begin(), x[0].end(), 101.f);
+ for (int k = 0; k < 3000; ++k) {
state.Update(converged_filter_frequency_response, rtc::Optional<size_t>(2),
- X_buffer, E2_main, E2_shadow, Y2, x, echo_path_variability,
- false);
+ render_buffer, E2_main, Y2, x[0], false);
}
EXPECT_TRUE(state.UsableLinearEstimate());
// Verify that linear AEC usability becomes false after an echo path change is
// reported
- echo_path_variability = EchoPathVariability(true, false);
+ state.HandleEchoPathChange(EchoPathVariability(true, false));
state.Update(converged_filter_frequency_response, rtc::Optional<size_t>(2),
- X_buffer, E2_main, E2_shadow, Y2, x, echo_path_variability,
- false);
+ render_buffer, E2_main, Y2, x[0], false);
EXPECT_FALSE(state.UsableLinearEstimate());
// Verify that the active render detection works as intended.
- x.fill(101.f);
+ std::fill(x[0].begin(), x[0].end(), 101.f);
+ state.HandleEchoPathChange(EchoPathVariability(true, true));
state.Update(converged_filter_frequency_response, rtc::Optional<size_t>(2),
- X_buffer, E2_main, E2_shadow, Y2, x, echo_path_variability,
- false);
- EXPECT_TRUE(state.ActiveRender());
-
- x.fill(0.f);
- for (int k = 0; k < 200; ++k) {
- state.Update(converged_filter_frequency_response, rtc::Optional<size_t>(2),
- X_buffer, E2_main, E2_shadow, Y2, x, echo_path_variability,
- false);
- }
+ render_buffer, E2_main, Y2, x[0], false);
EXPECT_FALSE(state.ActiveRender());
- x.fill(101.f);
- state.Update(converged_filter_frequency_response, rtc::Optional<size_t>(2),
- X_buffer, E2_main, E2_shadow, Y2, x, echo_path_variability,
- false);
+ for (int k = 0; k < 1000; ++k) {
+ state.Update(converged_filter_frequency_response, rtc::Optional<size_t>(2),
+ render_buffer, E2_main, Y2, x[0], false);
+ }
EXPECT_TRUE(state.ActiveRender());
// Verify that echo leakage is properly reported.
state.Update(converged_filter_frequency_response, rtc::Optional<size_t>(2),
- X_buffer, E2_main, E2_shadow, Y2, x, echo_path_variability,
- false);
+ render_buffer, E2_main, Y2, x[0], false);
EXPECT_FALSE(state.EchoLeakageDetected());
state.Update(converged_filter_frequency_response, rtc::Optional<size_t>(2),
- X_buffer, E2_main, E2_shadow, Y2, x, echo_path_variability,
- true);
+ render_buffer, E2_main, Y2, x[0], true);
EXPECT_TRUE(state.EchoLeakageDetected());
- // Verify that the bands containing reliable filter estimates are properly
- // reported.
- echo_path_variability = EchoPathVariability(false, false);
- for (int k = 0; k < 200; ++k) {
- state.Update(converged_filter_frequency_response, rtc::Optional<size_t>(2),
- X_buffer, E2_main, E2_shadow, Y2, x, echo_path_variability,
- false);
- }
-
- FftData X;
- X.re.fill(10000.f);
- X.im.fill(0.f);
- for (size_t k = 0; k < X_buffer.Buffer().size(); ++k) {
- X_buffer.Insert(X);
- }
-
- Y2.fill(10.f * 1000.f * 1000.f);
- E2_main.fill(100.f * Y2[0]);
- E2_shadow.fill(100.f * Y2[0]);
- state.Update(converged_filter_frequency_response, rtc::Optional<size_t>(2),
- X_buffer, E2_main, E2_shadow, Y2, x, echo_path_variability,
- false);
-
- E2_main.fill(0.1f * Y2[0]);
- E2_shadow.fill(E2_main[0]);
- for (size_t k = 0; k < Y2.size(); k += 2) {
- E2_main[k] = Y2[k];
- E2_shadow[k] = Y2[k];
- }
- state.Update(converged_filter_frequency_response, rtc::Optional<size_t>(2),
- X_buffer, E2_main, E2_shadow, Y2, x, echo_path_variability,
- false);
-
- const std::array<bool, kFftLengthBy2Plus1>& reliable_bands =
- state.BandsWithReliableFilter();
-
- EXPECT_EQ(reliable_bands[0], reliable_bands[1]);
- for (size_t k = 1; k < kFftLengthBy2 - 5; ++k) {
- EXPECT_TRUE(reliable_bands[k]);
- }
- for (size_t k = kFftLengthBy2 - 5; k < reliable_bands.size(); ++k) {
- EXPECT_EQ(reliable_bands[kFftLengthBy2 - 6], reliable_bands[k]);
- }
-
// Verify that the ERL is properly estimated
- Y2.fill(10.f * X.re[0] * X.re[0]);
- for (size_t k = 0; k < 100000; ++k) {
+ for (auto& x_k : x) {
+ x_k = std::vector<float>(kBlockSize, 0.f);
+ }
+
+ x[0][0] = 5000.f;
+ for (size_t k = 0; k < render_buffer.Buffer().size(); ++k) {
+ render_buffer.Insert(x);
+ }
+
+ Y2.fill(10.f * 10000.f * 10000.f);
+ for (size_t k = 0; k < 1000; ++k) {
state.Update(converged_filter_frequency_response, rtc::Optional<size_t>(2),
- X_buffer, E2_main, E2_shadow, Y2, x, echo_path_variability,
- false);
+ render_buffer, E2_main, Y2, x[0], false);
}
ASSERT_TRUE(state.UsableLinearEstimate());
const std::array<float, kFftLengthBy2Plus1>& erl = state.Erl();
- std::for_each(erl.begin(), erl.end(),
- [](float a) { EXPECT_NEAR(10.f, a, 0.1); });
+ EXPECT_EQ(erl[0], erl[1]);
+ for (size_t k = 1; k < erl.size() - 1; ++k) {
+ EXPECT_NEAR(k % 2 == 0 ? 10.f : 1000.f, erl[k], 0.1);
+ }
+ EXPECT_EQ(erl[erl.size() - 2], erl[erl.size() - 1]);
// Verify that the ERLE is properly estimated
- E2_main.fill(1.f * X.re[0] * X.re[0]);
+ E2_main.fill(1.f * 10000.f * 10000.f);
Y2.fill(10.f * E2_main[0]);
- for (size_t k = 0; k < 10000; ++k) {
+ for (size_t k = 0; k < 1000; ++k) {
state.Update(converged_filter_frequency_response, rtc::Optional<size_t>(2),
- X_buffer, E2_main, E2_shadow, Y2, x, echo_path_variability,
- false);
+ render_buffer, E2_main, Y2, x[0], false);
}
ASSERT_TRUE(state.UsableLinearEstimate());
- std::for_each(state.Erle().begin(), state.Erle().end(),
- [](float a) { EXPECT_NEAR(8.f, a, 0.1); });
+ {
+ const auto& erle = state.Erle();
+ EXPECT_EQ(erle[0], erle[1]);
+ for (size_t k = 1; k < erle.size() - 1; ++k) {
+ EXPECT_NEAR(k % 2 == 0 ? 8.f : 1.f, erle[k], 0.1);
+ }
+ EXPECT_EQ(erle[erle.size() - 2], erle[erle.size() - 1]);
+ }
- E2_main.fill(1.f * X.re[0] * X.re[0]);
+ E2_main.fill(1.f * 10000.f * 10000.f);
Y2.fill(5.f * E2_main[0]);
- for (size_t k = 0; k < 10000; ++k) {
+ for (size_t k = 0; k < 1000; ++k) {
state.Update(converged_filter_frequency_response, rtc::Optional<size_t>(2),
- X_buffer, E2_main, E2_shadow, Y2, x, echo_path_variability,
- false);
+ render_buffer, E2_main, Y2, x[0], false);
}
+
ASSERT_TRUE(state.UsableLinearEstimate());
- std::for_each(state.Erle().begin(), state.Erle().end(),
- [](float a) { EXPECT_NEAR(5.f, a, 0.1); });
+ {
+ const auto& erle = state.Erle();
+ EXPECT_EQ(erle[0], erle[1]);
+ for (size_t k = 1; k < erle.size() - 1; ++k) {
+ EXPECT_NEAR(k % 2 == 0 ? 5.f : 1.f, erle[k], 0.1);
+ }
+ EXPECT_EQ(erle[erle.size() - 2], erle[erle.size() - 1]);
+ }
}
// Verifies the a non-significant delay is correctly identified.
TEST(AecState, NonSignificantDelay) {
AecState state;
- FftBuffer X_buffer(Aec3Optimization::kNone, 30, std::vector<size_t>(1, 30));
+ RenderBuffer render_buffer(Aec3Optimization::kNone, 3, 30,
+ std::vector<size_t>(1, 30));
std::array<float, kFftLengthBy2Plus1> E2_main;
- std::array<float, kFftLengthBy2Plus1> E2_shadow;
std::array<float, kFftLengthBy2Plus1> Y2;
std::array<float, kBlockSize> x;
EchoPathVariability echo_path_variability(false, false);
@@ -208,8 +155,9 @@
}
// Verify that a non-significant filter delay is identified correctly.
- state.Update(frequency_response, rtc::Optional<size_t>(), X_buffer, E2_main,
- E2_shadow, Y2, x, echo_path_variability, false);
+ state.HandleEchoPathChange(echo_path_variability);
+ state.Update(frequency_response, rtc::Optional<size_t>(), render_buffer,
+ E2_main, Y2, x, false);
EXPECT_FALSE(state.FilterDelay());
}
@@ -217,9 +165,9 @@
TEST(AecState, ConvergedFilterDelay) {
constexpr int kFilterLength = 10;
AecState state;
- FftBuffer X_buffer(Aec3Optimization::kNone, 30, std::vector<size_t>(1, 30));
+ RenderBuffer render_buffer(Aec3Optimization::kNone, 3, 30,
+ std::vector<size_t>(1, 30));
std::array<float, kFftLengthBy2Plus1> E2_main;
- std::array<float, kFftLengthBy2Plus1> E2_shadow;
std::array<float, kFftLengthBy2Plus1> Y2;
std::array<float, kBlockSize> x;
EchoPathVariability echo_path_variability(false, false);
@@ -234,9 +182,10 @@
v.fill(0.01f);
}
frequency_response[k].fill(100.f);
-
- state.Update(frequency_response, rtc::Optional<size_t>(), X_buffer, E2_main,
- E2_shadow, Y2, x, echo_path_variability, false);
+ frequency_response[k][0] = 0.f;
+ state.HandleEchoPathChange(echo_path_variability);
+ state.Update(frequency_response, rtc::Optional<size_t>(), render_buffer,
+ E2_main, Y2, x, false);
EXPECT_TRUE(k == (kFilterLength - 1) || state.FilterDelay());
if (k != (kFilterLength - 1)) {
EXPECT_EQ(k, state.FilterDelay());
@@ -255,27 +204,27 @@
E2_shadow.fill(0.f);
Y2.fill(0.f);
x.fill(0.f);
- FftBuffer X_buffer(Aec3Optimization::kNone, 30, std::vector<size_t>(1, 30));
+ RenderBuffer render_buffer(Aec3Optimization::kNone, 3, 30,
+ std::vector<size_t>(1, 30));
std::vector<std::array<float, kFftLengthBy2Plus1>> frequency_response(30);
for (auto& v : frequency_response) {
v.fill(0.01f);
}
for (size_t k = 0; k < frequency_response.size() - 1; ++k) {
+ state.HandleEchoPathChange(EchoPathVariability(false, false));
state.Update(frequency_response, rtc::Optional<size_t>(k * kBlockSize + 5),
- X_buffer, E2_main, E2_shadow, Y2, x,
- EchoPathVariability(false, false), false);
+ render_buffer, E2_main, Y2, x, false);
EXPECT_TRUE(state.ExternalDelay());
EXPECT_EQ(k, state.ExternalDelay());
}
// Verify that the externally reported delay is properly unset when it is no
// longer present.
- state.Update(frequency_response, rtc::Optional<size_t>(), X_buffer, E2_main,
- E2_shadow, Y2, x, EchoPathVariability(false, false), false);
+ state.HandleEchoPathChange(EchoPathVariability(false, false));
+ state.Update(frequency_response, rtc::Optional<size_t>(), render_buffer,
+ E2_main, Y2, x, false);
EXPECT_FALSE(state.ExternalDelay());
}
} // namespace webrtc
-
-#endif
diff --git a/modules/audio_processing/aec3/comfort_noise_generator.cc b/modules/audio_processing/aec3/comfort_noise_generator.cc
index f630b25..b8d7f28 100644
--- a/modules/audio_processing/aec3/comfort_noise_generator.cc
+++ b/modules/audio_processing/aec3/comfort_noise_generator.cc
@@ -188,6 +188,17 @@
}
}
+ // Limit the noise to a floor of -96 dBFS.
+ constexpr float kNoiseFloor = 440.f;
+ for (auto& n : N2_) {
+ n = std::max(n, kNoiseFloor);
+ }
+ if (N2_initial_) {
+ for (auto& n : *N2_initial_) {
+ n = std::max(n, kNoiseFloor);
+ }
+ }
+
// Choose N2 estimate to use.
const std::array<float, kFftLengthBy2Plus1>& N2 =
N2_initial_ ? *N2_initial_ : N2_;
diff --git a/modules/audio_processing/aec3/echo_remover.cc b/modules/audio_processing/aec3/echo_remover.cc
index 71e4526..2b28a21 100644
--- a/modules/audio_processing/aec3/echo_remover.cc
+++ b/modules/audio_processing/aec3/echo_remover.cc
@@ -9,6 +9,7 @@
*/
#include "webrtc/modules/audio_processing/aec3/echo_remover.h"
+#include <math.h>
#include <algorithm>
#include <memory>
#include <numeric>
@@ -24,7 +25,6 @@
#include "webrtc/modules/audio_processing/aec3/echo_remover_metrics.h"
#include "webrtc/modules/audio_processing/aec3/fft_data.h"
#include "webrtc/modules/audio_processing/aec3/output_selector.h"
-#include "webrtc/modules/audio_processing/aec3/power_echo_model.h"
#include "webrtc/modules/audio_processing/aec3/render_buffer.h"
#include "webrtc/modules/audio_processing/aec3/render_delay_buffer.h"
#include "webrtc/modules/audio_processing/aec3/residual_echo_estimator.h"
@@ -46,11 +46,6 @@
}
}
-float BlockPower(const std::array<float, kBlockSize> x) {
- return std::accumulate(x.begin(), x.end(), 0.f,
- [](float a, float b) -> float { return a + b * b; });
-}
-
// Class for removing the echo from the capture signal.
class EchoRemoverImpl final : public EchoRemover {
public:
@@ -83,8 +78,6 @@
SuppressionGain suppression_gain_;
ComfortNoiseGenerator cng_;
SuppressionFilter suppression_filter_;
- PowerEchoModel power_echo_model_;
- RenderBuffer X_buffer_;
RenderSignalAnalyzer render_signal_analyzer_;
OutputSelector output_selector_;
ResidualEchoEstimator residual_echo_estimator_;
@@ -106,12 +99,7 @@
subtractor_(data_dumper_.get(), optimization_),
suppression_gain_(optimization_),
cng_(optimization_),
- suppression_filter_(sample_rate_hz_),
- X_buffer_(optimization_,
- NumBandsForRate(sample_rate_hz_),
- std::max(subtractor_.MinFarendBufferLength(),
- power_echo_model_.MinFarendBufferLength()),
- subtractor_.NumBlocksInRenderSums()) {
+ suppression_filter_(sample_rate_hz_) {
RTC_DCHECK(ValidFullBandRate(sample_rate_hz));
}
@@ -134,23 +122,23 @@
const std::vector<float>& x0 = x[0];
std::vector<float>& y0 = (*y)[0];
- data_dumper_->DumpWav("aec3_processblock_capture_input", kBlockSize, &y0[0],
+ data_dumper_->DumpWav("aec3_echo_remover_capture_input", kBlockSize, &y0[0],
LowestBandRate(sample_rate_hz_), 1);
- data_dumper_->DumpWav("aec3_processblock_render_input", kBlockSize, &x0[0],
+ data_dumper_->DumpWav("aec3_echo_remover_render_input", kBlockSize, &x0[0],
LowestBandRate(sample_rate_hz_), 1);
aec_state_.UpdateCaptureSaturation(capture_signal_saturation);
if (echo_path_variability.AudioPathChanged()) {
subtractor_.HandleEchoPathChange(echo_path_variability);
- residual_echo_estimator_.HandleEchoPathChange(echo_path_variability);
+ aec_state_.HandleEchoPathChange(echo_path_variability);
}
std::array<float, kFftLengthBy2Plus1> Y2;
- std::array<float, kFftLengthBy2Plus1> S2_power;
std::array<float, kFftLengthBy2Plus1> R2;
std::array<float, kFftLengthBy2Plus1> S2_linear;
std::array<float, kFftLengthBy2Plus1> G;
+ float high_bands_gain;
FftData Y;
FftData comfort_noise;
FftData high_band_comfort_noise;
@@ -159,14 +147,13 @@
auto& E2_main = subtractor_output.E2_main;
auto& E2_shadow = subtractor_output.E2_shadow;
auto& e_main = subtractor_output.e_main;
- auto& e_shadow = subtractor_output.e_shadow;
// Analyze the render signal.
render_signal_analyzer_.Update(render_buffer, aec_state_.FilterDelay());
// Perform linear echo cancellation.
- subtractor_.Process(render_buffer, y0, render_signal_analyzer_,
- aec_state_.SaturatedCapture(), &subtractor_output);
+ subtractor_.Process(render_buffer, y0, render_signal_analyzer_, aec_state_,
+ &subtractor_output);
// Compute spectra.
fft_.ZeroPaddedFft(y0, &Y);
@@ -175,36 +162,29 @@
// Update the AEC state information.
aec_state_.Update(subtractor_.FilterFrequencyResponse(),
- echo_path_delay_samples, render_buffer, E2_main, E2_shadow,
- Y2, x0, echo_path_variability, echo_leakage_detected_);
-
- // Use the power model to estimate the echo.
- // TODO(peah): Remove in upcoming CL.
- // power_echo_model_.EstimateEcho(render_buffer, Y2, aec_state_, &S2_power);
- S2_power.fill(0.f);
+ echo_path_delay_samples, render_buffer, E2_main, Y2, x0,
+ echo_leakage_detected_);
// Choose the linear output.
- output_selector_.FormLinearOutput(e_main, y0);
+ output_selector_.FormLinearOutput(!aec_state_.HeadsetDetected(), e_main, y0);
data_dumper_->DumpWav("aec3_output_linear", kBlockSize, &y0[0],
LowestBandRate(sample_rate_hz_), 1);
const auto& E2 = output_selector_.UseSubtractorOutput() ? E2_main : Y2;
// Estimate the residual echo power.
- residual_echo_estimator_.Estimate(
- output_selector_.UseSubtractorOutput(), aec_state_, render_buffer,
- subtractor_.FilterFrequencyResponse(), E2_main, E2_shadow, S2_linear,
- S2_power, Y2, &R2);
+ residual_echo_estimator_.Estimate(output_selector_.UseSubtractorOutput(),
+ aec_state_, render_buffer, S2_linear, Y2,
+ &R2);
// Estimate the comfort noise.
cng_.Compute(aec_state_, Y2, &comfort_noise, &high_band_comfort_noise);
- // Detect basic doubletalk.
- const bool doubletalk = BlockPower(e_shadow) < BlockPower(e_main);
-
// A choose and apply echo suppression gain.
suppression_gain_.GetGain(E2, R2, cng_.NoiseSpectrum(),
- doubletalk ? 0.001f : 0.0001f, &G);
- suppression_filter_.ApplyGain(comfort_noise, high_band_comfort_noise, G, y);
+ aec_state_.SaturatedEcho(), x, y->size(),
+ &high_bands_gain, &G);
+ suppression_filter_.ApplyGain(comfort_noise, high_band_comfort_noise, G,
+ high_bands_gain, y);
// Update the metrics.
metrics_.Update(aec_state_, cng_.NoiseSpectrum(), G);
@@ -217,21 +197,16 @@
LowestBandRate(sample_rate_hz_), 1);
data_dumper_->DumpRaw("aec3_using_subtractor_output",
output_selector_.UseSubtractorOutput() ? 1 : 0);
- data_dumper_->DumpRaw("aec3_doubletalk", doubletalk ? 1 : 0);
data_dumper_->DumpRaw("aec3_E2", E2);
data_dumper_->DumpRaw("aec3_E2_main", E2_main);
data_dumper_->DumpRaw("aec3_E2_shadow", E2_shadow);
data_dumper_->DumpRaw("aec3_S2_linear", S2_linear);
- data_dumper_->DumpRaw("aec3_S2_power", S2_power);
data_dumper_->DumpRaw("aec3_Y2", Y2);
+ data_dumper_->DumpRaw("aec3_X2", render_buffer.Spectrum(0));
data_dumper_->DumpRaw("aec3_R2", R2);
data_dumper_->DumpRaw("aec3_erle", aec_state_.Erle());
data_dumper_->DumpRaw("aec3_erl", aec_state_.Erl());
- data_dumper_->DumpRaw("aec3_reliable_filter_bands",
- aec_state_.BandsWithReliableFilter());
data_dumper_->DumpRaw("aec3_active_render", aec_state_.ActiveRender());
- data_dumper_->DumpRaw("aec3_model_based_aec_feasible",
- aec_state_.ModelBasedAecFeasible());
data_dumper_->DumpRaw("aec3_usable_linear_estimate",
aec_state_.UsableLinearEstimate());
data_dumper_->DumpRaw(
diff --git a/modules/audio_processing/aec3/echo_remover_metrics.cc b/modules/audio_processing/aec3/echo_remover_metrics.cc
index 16a36f4..ed11959 100644
--- a/modules/audio_processing/aec3/echo_remover_metrics.cc
+++ b/modules/audio_processing/aec3/echo_remover_metrics.cc
@@ -222,9 +222,6 @@
"WebRTC.Audio.EchoCanceller.UsableLinearEstimate",
static_cast<int>(aec_state.UsableLinearEstimate() ? 1 : 0));
RTC_HISTOGRAM_BOOLEAN(
- "WebRTC.Audio.EchoCanceller.ModelBasedAecFeasible",
- static_cast<int>(aec_state.ModelBasedAecFeasible() ? 1 : 0));
- RTC_HISTOGRAM_BOOLEAN(
"WebRTC.Audio.EchoCanceller.ActiveRender",
static_cast<int>(
active_render_count_ > kMetricsCollectionBlocksBy2 ? 1 : 0));
diff --git a/modules/audio_processing/aec3/main_filter_update_gain.cc b/modules/audio_processing/aec3/main_filter_update_gain.cc
index dad1a7a..9cfb08b 100644
--- a/modules/audio_processing/aec3/main_filter_update_gain.cc
+++ b/modules/audio_processing/aec3/main_filter_update_gain.cc
@@ -49,13 +49,12 @@
FftData* gain_fft) {
RTC_DCHECK(gain_fft);
// Introducing shorter notation to improve readability.
- const RenderBuffer& X_buffer = render_buffer;
const FftData& E_main = subtractor_output.E_main;
const auto& E2_main = subtractor_output.E2_main;
const auto& E2_shadow = subtractor_output.E2_shadow;
FftData* G = gain_fft;
const size_t size_partitions = filter.SizePartitions();
- const auto& X2 = X_buffer.SpectralSum(size_partitions);
+ const auto& X2 = render_buffer.SpectralSum(size_partitions);
const auto& erl = filter.Erl();
++call_counter_;
@@ -70,16 +69,15 @@
G->re.fill(0.f);
G->im.fill(0.f);
} else {
- // Corresponds of WGN of power -46 dBFS.
- constexpr float kX2Min = 44015068.0f;
+ // Corresponds to WGN of power -39 dBFS.
+ constexpr float kNoiseGatePower = 220075344.f;
std::array<float, kFftLengthBy2Plus1> mu;
// mu = H_error / (0.5* H_error* X2 + n * E2).
for (size_t k = 0; k < kFftLengthBy2Plus1; ++k) {
- mu[k] =
- X2[k] > kX2Min
- ? H_error_[k] /
- (0.5f * H_error_[k] * X2[k] + size_partitions * E2_main[k])
- : 0.f;
+ mu[k] = X2[k] > kNoiseGatePower
+ ? H_error_[k] / (0.5f * H_error_[k] * X2[k] +
+ size_partitions * E2_main[k])
+ : 0.f;
}
// Avoid updating the filter close to narrow bands in the render signals.
diff --git a/modules/audio_processing/aec3/main_filter_update_gain_unittest.cc b/modules/audio_processing/aec3/main_filter_update_gain_unittest.cc
index 6ee34cd..2a4d4d6 100644
--- a/modules/audio_processing/aec3/main_filter_update_gain_unittest.cc
+++ b/modules/audio_processing/aec3/main_filter_update_gain_unittest.cc
@@ -10,9 +10,6 @@
#include "webrtc/modules/audio_processing/aec3/main_filter_update_gain.h"
-// TODO(peah): Reactivate once the next CL has landed.
-#if 0
-
#include <algorithm>
#include <numeric>
#include <string>
@@ -20,7 +17,7 @@
#include "webrtc/base/random.h"
#include "webrtc/modules/audio_processing/aec3/adaptive_fir_filter.h"
#include "webrtc/modules/audio_processing/aec3/aec_state.h"
-#include "webrtc/modules/audio_processing/aec3/fft_buffer.h"
+#include "webrtc/modules/audio_processing/aec3/render_buffer.h"
#include "webrtc/modules/audio_processing/aec3/render_signal_analyzer.h"
#include "webrtc/modules/audio_processing/aec3/shadow_filter_update_gain.h"
#include "webrtc/modules/audio_processing/aec3/subtractor_output.h"
@@ -42,31 +39,30 @@
std::array<float, kBlockSize>* y_last_block,
FftData* G_last_block) {
ApmDataDumper data_dumper(42);
- AdaptiveFirFilter main_filter(9, true, DetectOptimization(), &data_dumper);
- AdaptiveFirFilter shadow_filter(9, true, DetectOptimization(), &data_dumper);
+ AdaptiveFirFilter main_filter(9, DetectOptimization(), &data_dumper);
+ AdaptiveFirFilter shadow_filter(9, DetectOptimization(), &data_dumper);
Aec3Fft fft;
- FftBuffer X_buffer(Aec3Optimization::kNone, main_filter.SizePartitions(),
- std::vector<size_t>(1, main_filter.SizePartitions()));
+ RenderBuffer render_buffer(
+ Aec3Optimization::kNone, 3, main_filter.SizePartitions(),
+ std::vector<size_t>(1, main_filter.SizePartitions()));
std::array<float, kBlockSize> x_old;
x_old.fill(0.f);
ShadowFilterUpdateGain shadow_gain;
MainFilterUpdateGain main_gain;
Random random_generator(42U);
- std::vector<float> x(kBlockSize, 0.f);
+ std::vector<std::vector<float>> x(3, std::vector<float>(kBlockSize, 0.f));
std::vector<float> y(kBlockSize, 0.f);
AecState aec_state;
RenderSignalAnalyzer render_signal_analyzer;
- FftData X;
std::array<float, kFftLength> s;
FftData S;
FftData G;
SubtractorOutput output;
output.Reset();
FftData& E_main = output.E_main;
- FftData& E_shadow = output.E_shadow;
+ FftData E_shadow;
std::array<float, kFftLengthBy2Plus1> Y2;
std::array<float, kFftLengthBy2Plus1>& E2_main = output.E2_main;
- std::array<float, kFftLengthBy2Plus1>& E2_shadow = output.E2_shadow;
std::array<float, kBlockSize>& e_main = output.e_main;
std::array<float, kBlockSize>& e_shadow = output.e_shadow;
Y2.fill(0.f);
@@ -89,17 +85,16 @@
// Create the render signal.
if (use_silent_render_in_second_half && k > num_blocks_to_process / 2) {
- std::fill(x.begin(), x.end(), 0.f);
+ std::fill(x[0].begin(), x[0].end(), 0.f);
} else {
- RandomizeSampleVector(&random_generator, x);
+ RandomizeSampleVector(&random_generator, x[0]);
}
- delay_buffer.Delay(x, y);
- fft.PaddedFft(x, x_old, &X);
- X_buffer.Insert(X);
- render_signal_analyzer.Update(X_buffer, aec_state.FilterDelay());
+ delay_buffer.Delay(x[0], y);
+ render_buffer.Insert(x);
+ render_signal_analyzer.Update(render_buffer, aec_state.FilterDelay());
// Apply the main filter.
- main_filter.Filter(X_buffer, &S);
+ main_filter.Filter(render_buffer, &S);
fft.Ifft(S, &s);
std::transform(y.begin(), y.end(), s.begin() + kFftLengthBy2,
e_main.begin(),
@@ -110,7 +105,7 @@
fft.ZeroPaddedFft(e_main, &E_main);
// Apply the shadow filter.
- shadow_filter.Filter(X_buffer, &S);
+ shadow_filter.Filter(render_buffer, &S);
fft.Ifft(S, &s);
std::transform(y.begin(), y.end(), s.begin() + kFftLengthBy2,
e_shadow.begin(),
@@ -125,19 +120,20 @@
E_shadow.Spectrum(Aec3Optimization::kNone, &output.E2_shadow);
// Adapt the shadow filter.
- shadow_gain.Compute(X_buffer, render_signal_analyzer, E_shadow,
+ shadow_gain.Compute(render_buffer, render_signal_analyzer, E_shadow,
shadow_filter.SizePartitions(), saturation, &G);
- shadow_filter.Adapt(X_buffer, G);
+ shadow_filter.Adapt(render_buffer, G);
// Adapt the main filter
- main_gain.Compute(X_buffer, render_signal_analyzer, output, main_filter,
- saturation, &G);
- main_filter.Adapt(X_buffer, G);
+ main_gain.Compute(render_buffer, render_signal_analyzer, output,
+ main_filter, saturation, &G);
+ main_filter.Adapt(render_buffer, G);
// Update the delay.
+ aec_state.HandleEchoPathChange(EchoPathVariability(false, false));
aec_state.Update(main_filter.FilterFrequencyResponse(),
- rtc::Optional<size_t>(), X_buffer, E2_main, E2_shadow, Y2,
- x, EchoPathVariability(false, false), false);
+ rtc::Optional<size_t>(), render_buffer, E2_main, Y2, x[0],
+ false);
}
std::copy(e_main.begin(), e_main.end(), e_last_block->begin());
@@ -159,14 +155,16 @@
// Verifies that the check for non-null output gain parameter works.
TEST(MainFilterUpdateGain, NullDataOutputGain) {
ApmDataDumper data_dumper(42);
- AdaptiveFirFilter filter(9, true, DetectOptimization(), &data_dumper);
- FftBuffer X_buffer(Aec3Optimization::kNone, filter.SizePartitions(),
- std::vector<size_t>(1, filter.SizePartitions()));
+ AdaptiveFirFilter filter(9, DetectOptimization(), &data_dumper);
+ RenderBuffer render_buffer(Aec3Optimization::kNone, 3,
+ filter.SizePartitions(),
+ std::vector<size_t>(1, filter.SizePartitions()));
RenderSignalAnalyzer analyzer;
SubtractorOutput output;
MainFilterUpdateGain gain;
- EXPECT_DEATH(gain.Compute(X_buffer, analyzer, output, filter, false, nullptr),
- "");
+ EXPECT_DEATH(
+ gain.Compute(render_buffer, analyzer, output, filter, false, nullptr),
+ "");
}
#endif
@@ -288,5 +286,3 @@
}
} // namespace webrtc
-
-#endif
diff --git a/modules/audio_processing/aec3/output_selector.cc b/modules/audio_processing/aec3/output_selector.cc
index a8700cb..966c355 100644
--- a/modules/audio_processing/aec3/output_selector.cc
+++ b/modules/audio_processing/aec3/output_selector.cc
@@ -34,11 +34,6 @@
RTC_DCHECK_EQ(from_y_to_e ? 1.f : 0.f, averaging);
}
-float BlockPower(rtc::ArrayView<const float> x) {
- return std::accumulate(x.begin(), x.end(), 0.f,
- [](float a, float b) -> float { return a + b * b; });
-}
-
} // namespace
OutputSelector::OutputSelector() = default;
@@ -46,24 +41,16 @@
OutputSelector::~OutputSelector() = default;
void OutputSelector::FormLinearOutput(
+ bool use_subtractor_output,
rtc::ArrayView<const float> subtractor_output,
rtc::ArrayView<float> capture) {
RTC_DCHECK_EQ(subtractor_output.size(), capture.size());
rtc::ArrayView<const float>& e_main = subtractor_output;
rtc::ArrayView<float> y = capture;
- const bool subtractor_output_is_best =
- BlockPower(y) > 1.5f * BlockPower(e_main);
- output_change_counter_ = subtractor_output_is_best != use_subtractor_output_
- ? output_change_counter_ + 1
- : 0;
-
- if (subtractor_output_is_best != use_subtractor_output_ &&
- ((subtractor_output_is_best && output_change_counter_ > 3) ||
- (!subtractor_output_is_best && output_change_counter_ > 10))) {
- use_subtractor_output_ = subtractor_output_is_best;
+ if (use_subtractor_output != use_subtractor_output_) {
+ use_subtractor_output_ = use_subtractor_output;
SmoothFrameTransition(use_subtractor_output_, e_main, y);
- output_change_counter_ = 0;
} else if (use_subtractor_output_) {
std::copy(e_main.begin(), e_main.end(), y.begin());
}
diff --git a/modules/audio_processing/aec3/output_selector.h b/modules/audio_processing/aec3/output_selector.h
index 943e547..505bb3f 100644
--- a/modules/audio_processing/aec3/output_selector.h
+++ b/modules/audio_processing/aec3/output_selector.h
@@ -24,7 +24,8 @@
~OutputSelector();
// Forms the most appropriate output signal.
- void FormLinearOutput(rtc::ArrayView<const float> subtractor_output,
+ void FormLinearOutput(bool use_subtractor_output,
+ rtc::ArrayView<const float> subtractor_output,
rtc::ArrayView<float> capture);
// Returns true if the linear aec output is the one used.
@@ -32,7 +33,6 @@
private:
bool use_subtractor_output_ = false;
- int output_change_counter_ = 0;
RTC_DISALLOW_COPY_AND_ASSIGN(OutputSelector);
};
diff --git a/modules/audio_processing/aec3/output_selector_unittest.cc b/modules/audio_processing/aec3/output_selector_unittest.cc
index 49f671d..717f631 100644
--- a/modules/audio_processing/aec3/output_selector_unittest.cc
+++ b/modules/audio_processing/aec3/output_selector_unittest.cc
@@ -23,49 +23,47 @@
TEST(OutputSelector, ProperSwitching) {
OutputSelector selector;
- constexpr int kNumBlocksToSwitchToSubtractor = 3;
- constexpr int kNumBlocksToSwitchFromSubtractor = 10;
-
- std::array<float, kBlockSize> weaker;
- std::array<float, kBlockSize> stronger;
std::array<float, kBlockSize> y;
std::array<float, kBlockSize> e;
- weaker.fill(10.f);
- stronger.fill(20.f);
-
- bool y_is_weakest = false;
-
- const auto form_e_and_y = [&](bool y_equals_weaker) {
- if (y_equals_weaker) {
- std::copy(weaker.begin(), weaker.end(), y.begin());
- std::copy(stronger.begin(), stronger.end(), e.begin());
- } else {
- std::copy(stronger.begin(), stronger.end(), y.begin());
- std::copy(weaker.begin(), weaker.end(), e.begin());
- }
+ std::array<float, kBlockSize> e_ref;
+ std::array<float, kBlockSize> y_ref;
+ auto init_blocks = [](std::array<float, kBlockSize>* e,
+ std::array<float, kBlockSize>* y) {
+ e->fill(10.f);
+ y->fill(20.f);
};
- for (int k = 0; k < 30; ++k) {
- // Verify that it takes a while for the signals transition to take effect.
- const int num_blocks_to_switch = y_is_weakest
- ? kNumBlocksToSwitchFromSubtractor
- : kNumBlocksToSwitchToSubtractor;
- for (int j = 0; j < num_blocks_to_switch; ++j) {
- form_e_and_y(y_is_weakest);
- selector.FormLinearOutput(e, y);
- EXPECT_EQ(stronger, y);
- EXPECT_EQ(y_is_weakest, selector.UseSubtractorOutput());
- }
+ init_blocks(&e_ref, &y_ref);
- // Verify that the transition block is a mix between the signals.
- form_e_and_y(y_is_weakest);
- selector.FormLinearOutput(e, y);
- EXPECT_NE(weaker, y);
- EXPECT_NE(stronger, y);
- EXPECT_EQ(!y_is_weakest, selector.UseSubtractorOutput());
+ init_blocks(&e, &y);
+ selector.FormLinearOutput(false, e, y);
+ EXPECT_EQ(y_ref, y);
- y_is_weakest = !y_is_weakest;
- }
+ init_blocks(&e, &y);
+ selector.FormLinearOutput(true, e, y);
+ EXPECT_NE(e_ref, y);
+ EXPECT_NE(y_ref, y);
+
+ init_blocks(&e, &y);
+ selector.FormLinearOutput(true, e, y);
+ EXPECT_EQ(e_ref, y);
+
+ init_blocks(&e, &y);
+ selector.FormLinearOutput(true, e, y);
+ EXPECT_EQ(e_ref, y);
+
+ init_blocks(&e, &y);
+ selector.FormLinearOutput(false, e, y);
+ EXPECT_NE(e_ref, y);
+ EXPECT_NE(y_ref, y);
+
+ init_blocks(&e, &y);
+ selector.FormLinearOutput(false, e, y);
+ EXPECT_EQ(y_ref, y);
+
+ init_blocks(&e, &y);
+ selector.FormLinearOutput(false, e, y);
+ EXPECT_EQ(y_ref, y);
}
} // namespace webrtc
diff --git a/modules/audio_processing/aec3/power_echo_model.cc b/modules/audio_processing/aec3/power_echo_model.cc
deleted file mode 100644
index dee03d8..0000000
--- a/modules/audio_processing/aec3/power_echo_model.cc
+++ /dev/null
@@ -1,111 +0,0 @@
-/*
- * Copyright (c) 2017 The WebRTC project authors. All Rights Reserved.
- *
- * Use of this source code is governed by a BSD-style license
- * that can be found in the LICENSE file in the root of the source
- * tree. An additional intellectual property rights grant can be found
- * in the file PATENTS. All contributing project authors may
- * be found in the AUTHORS file in the root of the source tree.
- */
-#include "webrtc/modules/audio_processing/aec3/power_echo_model.h"
-
-#include <string.h>
-#include <algorithm>
-
-#include "webrtc/base/optional.h"
-
-namespace webrtc {
-namespace {
-
-// Computes the spectral power over that last 20 frames.
-void RecentMaximum(const RenderBuffer& X_buffer,
- std::array<float, kFftLengthBy2Plus1>* R2) {
- R2->fill(0.f);
- for (size_t j = 0; j < 20; ++j) {
- std::transform(R2->begin(), R2->end(), X_buffer.Spectrum(j).begin(),
- R2->begin(),
- [](float a, float b) { return std::max(a, b); });
- }
-}
-
-constexpr float kHInitial = 10.f;
-constexpr int kUpdateCounterInitial = 300;
-
-} // namespace
-
-PowerEchoModel::PowerEchoModel() {
- H2_.fill(CountedFloat(kHInitial, kUpdateCounterInitial));
-}
-
-PowerEchoModel::~PowerEchoModel() = default;
-
-void PowerEchoModel::HandleEchoPathChange(
- const EchoPathVariability& variability) {
- if (variability.gain_change) {
- H2_.fill(CountedFloat(kHInitial, kUpdateCounterInitial));
- }
-}
-
-void PowerEchoModel::EstimateEcho(
- const RenderBuffer& render_buffer,
- const std::array<float, kFftLengthBy2Plus1>& capture_spectrum,
- const AecState& aec_state,
- std::array<float, kFftLengthBy2Plus1>* echo_spectrum) {
- RTC_DCHECK(echo_spectrum);
-
- const RenderBuffer& X_buffer = render_buffer;
- const auto& Y2 = capture_spectrum;
- std::array<float, kFftLengthBy2Plus1>* S2 = echo_spectrum;
-
- // Choose delay to use.
- const rtc::Optional<size_t> delay =
- aec_state.FilterDelay()
- ? aec_state.FilterDelay()
- : (aec_state.ExternalDelay() ? rtc::Optional<size_t>(std::min<size_t>(
- *aec_state.ExternalDelay(),
- X_buffer.Buffer().size() - 1))
- : rtc::Optional<size_t>());
-
- // Compute R2.
- std::array<float, kFftLengthBy2Plus1> render_max;
- if (!delay) {
- RecentMaximum(render_buffer, &render_max);
- }
- const std::array<float, kFftLengthBy2Plus1>& X2_active =
- delay ? render_buffer.Spectrum(*delay) : render_max;
-
- if (!aec_state.SaturatedCapture()) {
- // Corresponds of WGN of power -46dBFS.
- constexpr float kX2Min = 44015068.0f;
- const int max_update_counter_value = delay ? 300 : 500;
-
- std::array<float, kFftLengthBy2Plus1> new_H2;
-
- // new_H2 = Y2 / X2.
- std::transform(X2_active.begin(), X2_active.end(), Y2.begin(),
- new_H2.begin(),
- [&](float a, float b) { return a > kX2Min ? b / a : -1.f; });
-
- // Lambda for updating H2 in a maximum statistics manner.
- auto H2_updater = [&](float a, CountedFloat b) {
- if (a > 0) {
- if (a > b.value) {
- b.counter = max_update_counter_value;
- b.value = a;
- } else if (--b.counter <= 0) {
- b.value = std::max(b.value * 0.9f, 1.f);
- }
- }
- return b;
- };
-
- std::transform(new_H2.begin(), new_H2.end(), H2_.begin(), H2_.begin(),
- H2_updater);
- }
-
- // S2 = H2*X2_active.
- std::transform(H2_.begin(), H2_.end(), X2_active.begin(), S2->begin(),
- [](CountedFloat a, float b) { return a.value * b; });
-}
-
-} // namespace webrtc
diff --git a/modules/audio_processing/aec3/power_echo_model.h b/modules/audio_processing/aec3/power_echo_model.h
deleted file mode 100644
index 9487e92..0000000
--- a/modules/audio_processing/aec3/power_echo_model.h
+++ /dev/null
@@ -1,61 +0,0 @@
-/*
- * Copyright (c) 2017 The WebRTC project authors. All Rights Reserved.
- *
- * Use of this source code is governed by a BSD-style license
- * that can be found in the LICENSE file in the root of the source
- * tree. An additional intellectual property rights grant can be found
- * in the file PATENTS. All contributing project authors may
- * be found in the AUTHORS file in the root of the source tree.
- */
-
-#ifndef WEBRTC_MODULES_AUDIO_PROCESSING_AEC3_POWER_ECHO_MODEL_H_
-#define WEBRTC_MODULES_AUDIO_PROCESSING_AEC3_POWER_ECHO_MODEL_H_
-
-#include <array>
-
-#include "webrtc/base/constructormagic.h"
-#include "webrtc/base/optional.h"
-#include "webrtc/modules/audio_processing/aec3/aec3_common.h"
-#include "webrtc/modules/audio_processing/aec3/aec_state.h"
-#include "webrtc/modules/audio_processing/aec3/echo_path_variability.h"
-#include "webrtc/modules/audio_processing/aec3/render_buffer.h"
-
-namespace webrtc {
-
-// Provides an echo model based on power spectral estimates that estimates the
-// echo spectrum.
-class PowerEchoModel {
- public:
- PowerEchoModel();
- ~PowerEchoModel();
-
- // Ajusts the model according to echo path changes.
- void HandleEchoPathChange(const EchoPathVariability& variability);
-
- // Updates the echo model and estimates the echo spectrum.
- void EstimateEcho(
- const RenderBuffer& render_buffer,
- const std::array<float, kFftLengthBy2Plus1>& capture_spectrum,
- const AecState& aec_state,
- std::array<float, kFftLengthBy2Plus1>* echo_spectrum);
-
- // Returns the minimum required farend buffer length.
- size_t MinFarendBufferLength() const { return kRenderBufferSize; }
-
- private:
- // Provides a float value that is coupled with a counter.
- struct CountedFloat {
- CountedFloat() : value(0.f), counter(0) {}
- CountedFloat(float value, int counter) : value(value), counter(counter) {}
- float value;
- int counter;
- };
-
- const size_t kRenderBufferSize = 100;
- std::array<CountedFloat, kFftLengthBy2Plus1> H2_;
-
- RTC_DISALLOW_COPY_AND_ASSIGN(PowerEchoModel);
-};
-} // namespace webrtc
-
-#endif // WEBRTC_MODULES_AUDIO_PROCESSING_AEC3_POWER_ECHO_MODEL_H_
diff --git a/modules/audio_processing/aec3/power_echo_model_unittest.cc b/modules/audio_processing/aec3/power_echo_model_unittest.cc
deleted file mode 100644
index f3c3634..0000000
--- a/modules/audio_processing/aec3/power_echo_model_unittest.cc
+++ /dev/null
@@ -1,45 +0,0 @@
-/*
- * Copyright (c) 2017 The WebRTC project authors. All Rights Reserved.
- *
- * Use of this source code is governed by a BSD-style license
- * that can be found in the LICENSE file in the root of the source
- * tree. An additional intellectual property rights grant can be found
- * in the file PATENTS. All contributing project authors may
- * be found in the AUTHORS file in the root of the source tree.
- */
-
-#include "webrtc/modules/audio_processing/aec3/power_echo_model.h"
-
-#include <array>
-#include <string>
-#include <vector>
-
-#include "webrtc/base/random.h"
-#include "webrtc/modules/audio_processing/aec3/aec_state.h"
-#include "webrtc/modules/audio_processing/aec3/aec3_common.h"
-#include "webrtc/modules/audio_processing/aec3/aec3_fft.h"
-#include "webrtc/modules/audio_processing/aec3/echo_path_variability.h"
-#include "webrtc/modules/audio_processing/test/echo_canceller_test_tools.h"
-
-#include "webrtc/test/gtest.h"
-
-namespace webrtc {
-
-#if RTC_DCHECK_IS_ON && GTEST_HAS_DEATH_TEST && !defined(WEBRTC_ANDROID)
-
-// Verifies that the check for non-null output parameter works.
-TEST(PowerEchoModel, NullEstimateEchoOutput) {
- PowerEchoModel model;
- std::array<float, kFftLengthBy2Plus1> Y2;
- AecState aec_state;
- RenderBuffer X_buffer(Aec3Optimization::kNone, 3,
- model.MinFarendBufferLength(),
- std::vector<size_t>(1, model.MinFarendBufferLength()));
-
- EXPECT_DEATH(model.EstimateEcho(X_buffer, Y2, aec_state, nullptr), "");
-}
-
-#endif
-
-
-} // namespace webrtc
diff --git a/modules/audio_processing/aec3/render_delay_buffer.cc b/modules/audio_processing/aec3/render_delay_buffer.cc
index f53a925..cf3e248 100644
--- a/modules/audio_processing/aec3/render_delay_buffer.cc
+++ b/modules/audio_processing/aec3/render_delay_buffer.cc
@@ -102,10 +102,11 @@
RenderDelayBufferImpl::RenderDelayBufferImpl(size_t num_bands)
: optimization_(DetectOptimization()),
- fft_buffer_(optimization_,
- num_bands,
- std::max(30, kAdaptiveFilterLength),
- std::vector<size_t>(1, kAdaptiveFilterLength)),
+ fft_buffer_(
+ optimization_,
+ num_bands,
+ std::max(kResidualEchoPowerRenderWindowSize, kAdaptiveFilterLength),
+ std::vector<size_t>(1, kAdaptiveFilterLength)),
api_call_jitter_buffer_(num_bands) {
buffer_.fill(std::vector<std::vector<float>>(
num_bands, std::vector<float>(kBlockSize, 0.f)));
@@ -175,23 +176,19 @@
// If there is a new delay set, clear the fft buffer.
fft_buffer_.Clear();
- const size_t max_delay = buffer_.size() - 1;
- if (max_delay < delay) {
+ if ((buffer_.size() - 1) < delay) {
// If the desired delay is larger than the delay buffer, shorten the delay
// buffer size to achieve the desired alignment with the available buffer
// size.
- const size_t delay_decrease = delay - max_delay;
- RTC_DCHECK_LT(delay_decrease, buffer_.size());
-
downsampled_render_buffer_.position =
- (downsampled_render_buffer_.position + kSubBlockSize * delay_decrease) %
+ (downsampled_render_buffer_.position +
+ kSubBlockSize * (delay - (buffer_.size() - 1))) %
downsampled_render_buffer_.buffer.size();
last_insert_index_ =
- (last_insert_index_ + buffer_.size() - delay_decrease) % buffer_.size();
-
- RTC_DCHECK_EQ(max_delay, delay_ - delay_decrease);
- delay_ = max_delay;
+ (last_insert_index_ - (delay - (buffer_.size() - 1)) + buffer_.size()) %
+ buffer_.size();
+ delay_ = buffer_.size() - 1;
} else {
delay_ = delay;
}
diff --git a/modules/audio_processing/aec3/render_delay_controller.cc b/modules/audio_processing/aec3/render_delay_controller.cc
index c19945d..3f7b108 100644
--- a/modules/audio_processing/aec3/render_delay_controller.cc
+++ b/modules/audio_processing/aec3/render_delay_controller.cc
@@ -110,7 +110,7 @@
// Compute and set new render delay buffer delay.
const size_t new_delay =
ComputeNewBufferDelay(delay_, echo_path_delay_samples_);
- if (new_delay != delay_ && align_call_counter_ > 250) {
+ if (new_delay != delay_ && align_call_counter_ > kNumBlocksPerSecond) {
delay_ = new_delay;
}
@@ -119,7 +119,7 @@
const int headroom = echo_path_delay_samples_ - delay_ * kBlockSize;
RTC_DCHECK_LE(0, headroom);
headroom_samples_ = rtc::Optional<size_t>(headroom);
- } else if (++blocks_since_last_delay_estimate_ > 250 * 20) {
+ } else if (++blocks_since_last_delay_estimate_ > 20 * kNumBlocksPerSecond) {
headroom_samples_ = rtc::Optional<size_t>();
}
diff --git a/modules/audio_processing/aec3/render_delay_controller_metrics.cc b/modules/audio_processing/aec3/render_delay_controller_metrics.cc
index b84b916..d0330cb 100644
--- a/modules/audio_processing/aec3/render_delay_controller_metrics.cc
+++ b/modules/audio_processing/aec3/render_delay_controller_metrics.cc
@@ -52,7 +52,7 @@
delay_blocks_ = delay_blocks;
}
}
- } else if (++initial_call_counter_ == 5 * 250) {
+ } else if (++initial_call_counter_ == 5 * kNumBlocksPerSecond) {
initial_update = false;
}
diff --git a/modules/audio_processing/aec3/render_signal_analyzer.h b/modules/audio_processing/aec3/render_signal_analyzer.h
index 9eba03e..a791f4d 100644
--- a/modules/audio_processing/aec3/render_signal_analyzer.h
+++ b/modules/audio_processing/aec3/render_signal_analyzer.h
@@ -28,7 +28,7 @@
~RenderSignalAnalyzer();
// Updates the render signal analysis with the most recent render signal.
- void Update(const RenderBuffer& X_buffer,
+ void Update(const RenderBuffer& render_buffer,
const rtc::Optional<size_t>& delay_partitions);
// Returns true if the render signal is poorly exciting.
diff --git a/modules/audio_processing/aec3/render_signal_analyzer_unittest.cc b/modules/audio_processing/aec3/render_signal_analyzer_unittest.cc
index 345f6c9..9b25f18 100644
--- a/modules/audio_processing/aec3/render_signal_analyzer_unittest.cc
+++ b/modules/audio_processing/aec3/render_signal_analyzer_unittest.cc
@@ -10,9 +10,6 @@
#include "webrtc/modules/audio_processing/aec3/render_signal_analyzer.h"
-// TODO(peah): Reactivate once the next CL has landed.
-#if 0
-
#include <math.h>
#include <array>
#include <vector>
@@ -21,8 +18,8 @@
#include "webrtc/base/random.h"
#include "webrtc/modules/audio_processing/aec3/aec3_common.h"
#include "webrtc/modules/audio_processing/aec3/aec3_fft.h"
-#include "webrtc/modules/audio_processing/aec3/fft_buffer.h"
#include "webrtc/modules/audio_processing/aec3/fft_data.h"
+#include "webrtc/modules/audio_processing/aec3/render_buffer.h"
#include "webrtc/modules/audio_processing/test/echo_canceller_test_tools.h"
#include "webrtc/test/gtest.h"
@@ -59,19 +56,20 @@
TEST(RenderSignalAnalyzer, NoFalseDetectionOfNarrowBands) {
RenderSignalAnalyzer analyzer;
Random random_generator(42U);
- std::vector<float> x(kBlockSize, 0.f);
+ std::vector<std::vector<float>> x(3, std::vector<float>(kBlockSize, 0.f));
std::array<float, kBlockSize> x_old;
FftData X;
Aec3Fft fft;
- FftBuffer X_buffer(Aec3Optimization::kNone, 1, std::vector<size_t>(1, 1));
+ RenderBuffer render_buffer(Aec3Optimization::kNone, 3, 1,
+ std::vector<size_t>(1, 1));
std::array<float, kFftLengthBy2Plus1> mask;
x_old.fill(0.f);
for (size_t k = 0; k < 100; ++k) {
- RandomizeSampleVector(&random_generator, x);
- fft.PaddedFft(x, x_old, &X);
- X_buffer.Insert(X);
- analyzer.Update(X_buffer, rtc::Optional<size_t>(0));
+ RandomizeSampleVector(&random_generator, x[0]);
+ fft.PaddedFft(x[0], x_old, &X);
+ render_buffer.Insert(x);
+ analyzer.Update(render_buffer, rtc::Optional<size_t>(0));
}
mask.fill(1.f);
@@ -85,11 +83,11 @@
TEST(RenderSignalAnalyzer, NarrowBandDetection) {
RenderSignalAnalyzer analyzer;
Random random_generator(42U);
- std::vector<float> x(kBlockSize, 0.f);
+ std::vector<std::vector<float>> x(3, std::vector<float>(kBlockSize, 0.f));
std::array<float, kBlockSize> x_old;
- FftData X;
Aec3Fft fft;
- FftBuffer X_buffer(Aec3Optimization::kNone, 1, std::vector<size_t>(1, 1));
+ RenderBuffer render_buffer(Aec3Optimization::kNone, 3, 1,
+ std::vector<size_t>(1, 1));
std::array<float, kFftLengthBy2Plus1> mask;
x_old.fill(0.f);
constexpr int kSinusFrequencyBin = 32;
@@ -98,12 +96,10 @@
size_t sample_counter = 0;
for (size_t k = 0; k < 100; ++k) {
ProduceSinusoid(16000, 16000 / 2 * kSinusFrequencyBin / kFftLengthBy2,
- &sample_counter, x);
- fft.PaddedFft(x, x_old, &X);
- X_buffer.Insert(X);
- analyzer.Update(
- X_buffer,
- known_delay ? rtc::Optional<size_t>(0) : rtc::Optional<size_t>());
+ &sample_counter, x[0]);
+ render_buffer.Insert(x);
+ analyzer.Update(render_buffer, known_delay ? rtc::Optional<size_t>(0)
+ : rtc::Optional<size_t>());
}
};
@@ -124,5 +120,3 @@
}
} // namespace webrtc
-
-#endif
diff --git a/modules/audio_processing/aec3/residual_echo_estimator.cc b/modules/audio_processing/aec3/residual_echo_estimator.cc
index 993a8da..fd848d3 100644
--- a/modules/audio_processing/aec3/residual_echo_estimator.cc
+++ b/modules/audio_processing/aec3/residual_echo_estimator.cc
@@ -10,7 +10,7 @@
#include "webrtc/modules/audio_processing/aec3/residual_echo_estimator.h"
-#include <math.h>
+#include <numeric>
#include <vector>
#include "webrtc/base/checks.h"
@@ -18,143 +18,75 @@
namespace webrtc {
namespace {
-constexpr float kSaturationLeakageFactor = 10.f;
-constexpr size_t kSaturationLeakageBlocks = 10;
-constexpr size_t kEchoPathChangeConvergenceBlocks = 3 * 250;
-
-// Estimates the residual echo power when there is no detection correlation
-// between the render and capture signals.
-void InfiniteErlPowerEstimate(
- size_t active_render_blocks,
- size_t blocks_since_last_saturation,
- const std::array<float, kFftLengthBy2Plus1>& S2_fallback,
- std::array<float, kFftLengthBy2Plus1>* R2) {
- if (active_render_blocks > 20 * 250) {
- // After an amount of active render samples for which an echo should have
- // been detected in the capture signal if the ERL was not infinite, set the
- // residual echo to 0.
- R2->fill(0.f);
- } else {
- // Before certainty has been reached about the presence of echo, use the
- // fallback echo power estimate as the residual echo estimate. Add a leakage
- // factor when there is saturation.
- std::copy(S2_fallback.begin(), S2_fallback.end(), R2->begin());
- if (blocks_since_last_saturation < kSaturationLeakageBlocks) {
- std::for_each(R2->begin(), R2->end(),
- [](float& a) { a *= kSaturationLeakageFactor; });
- }
+// Estimates the echo generating signal power as gated maximal power over a time
+// window.
+void EchoGeneratingPower(const RenderBuffer& render_buffer,
+ size_t min_delay,
+ size_t max_delay,
+ std::array<float, kFftLengthBy2Plus1>* X2) {
+ X2->fill(0.f);
+ for (size_t k = min_delay; k <= max_delay; ++k) {
+ std::transform(X2->begin(), X2->end(), render_buffer.Spectrum(k).begin(),
+ X2->begin(),
+ [](float a, float b) { return std::max(a, b); });
}
+
+ // Apply soft noise gate of -78 dBFS.
+ constexpr float kNoiseGatePower = 27509.42f;
+ std::for_each(X2->begin(), X2->end(), [kNoiseGatePower](float& a) {
+ if (kNoiseGatePower > a) {
+ a = std::max(0.f, a - 0.3f * (kNoiseGatePower - a));
+ }
+ });
}
-// Estimates the echo power in an half-duplex manner.
-void HalfDuplexPowerEstimate(bool active_render,
- const std::array<float, kFftLengthBy2Plus1>& Y2,
- std::array<float, kFftLengthBy2Plus1>* R2) {
- // Set the residual echo power to the power of the capture signal.
- if (active_render) {
- std::copy(Y2.begin(), Y2.end(), R2->begin());
- } else {
- R2->fill(0.f);
- }
-}
-
-// Estimates the residual echo power based on gains.
-void GainBasedPowerEstimate(
- size_t external_delay,
- const RenderBuffer& X_buffer,
- size_t blocks_since_last_saturation,
- size_t active_render_blocks,
- const std::array<bool, kFftLengthBy2Plus1>& bands_with_reliable_filter,
- const std::array<float, kFftLengthBy2Plus1>& echo_path_gain,
- const std::array<float, kFftLengthBy2Plus1>& S2_fallback,
- std::array<float, kFftLengthBy2Plus1>* R2) {
- const auto& X2 = X_buffer.Spectrum(external_delay);
-
- // Base the residual echo power on gain of the linear echo path estimate if
- // that is reliable, otherwise use the fallback echo path estimate. Add a
- // leakage factor when there is saturation.
- if (active_render_blocks > kEchoPathChangeConvergenceBlocks) {
- for (size_t k = 0; k < R2->size(); ++k) {
- (*R2)[k] = bands_with_reliable_filter[k] ? echo_path_gain[k] * X2[k]
- : S2_fallback[k];
- }
- } else {
- for (size_t k = 0; k < R2->size(); ++k) {
- (*R2)[k] = S2_fallback[k];
- }
- }
-
- if (blocks_since_last_saturation < kSaturationLeakageBlocks) {
- std::for_each(R2->begin(), R2->end(),
- [](float& a) { a *= kSaturationLeakageFactor; });
- }
-}
-
-// Estimates the residual echo power based on the linear echo path.
-void ErleBasedPowerEstimate(
- bool headset_detected,
- const RenderBuffer& X_buffer,
- bool using_subtractor_output,
- size_t linear_filter_based_delay,
- size_t blocks_since_last_saturation,
- bool poorly_aligned_filter,
- const std::array<bool, kFftLengthBy2Plus1>& bands_with_reliable_filter,
- const std::array<float, kFftLengthBy2Plus1>& echo_path_gain,
- const std::array<float, kFftLengthBy2Plus1>& S2_fallback,
+// Estimates the residual echo power based on the erle and the linear power
+// estimate.
+void LinearResidualPowerEstimate(
const std::array<float, kFftLengthBy2Plus1>& S2_linear,
- const std::array<float, kFftLengthBy2Plus1>& Y2,
const std::array<float, kFftLengthBy2Plus1>& erle,
- const std::array<float, kFftLengthBy2Plus1>& erl,
+ std::array<int, kFftLengthBy2Plus1>* R2_hold_counter,
std::array<float, kFftLengthBy2Plus1>* R2) {
- // Residual echo power after saturation.
- if (blocks_since_last_saturation < kSaturationLeakageBlocks) {
- for (size_t k = 0; k < R2->size(); ++k) {
- (*R2)[k] = kSaturationLeakageFactor *
- (bands_with_reliable_filter[k] && using_subtractor_output
- ? S2_linear[k]
- : std::min(S2_fallback[k], Y2[k]));
- }
- return;
- }
+ std::fill(R2_hold_counter->begin(), R2_hold_counter->end(), 10.f);
+ std::transform(erle.begin(), erle.end(), S2_linear.begin(), R2->begin(),
+ [](float a, float b) {
+ RTC_DCHECK_LT(0.f, a);
+ return b / a;
+ });
+}
- // Residual echo power when a headset is used.
- if (headset_detected) {
- const auto& X2 = X_buffer.Spectrum(linear_filter_based_delay);
- for (size_t k = 0; k < R2->size(); ++k) {
- RTC_DCHECK_LT(0.f, erle[k]);
- (*R2)[k] = bands_with_reliable_filter[k] && using_subtractor_output
- ? S2_linear[k] / erle[k]
- : std::min(S2_fallback[k], Y2[k]);
- (*R2)[k] = std::min((*R2)[k], X2[k] * erl[k]);
- }
- return;
- }
+// Estimates the residual echo power based on the estimate of the echo path
+// gain.
+void NonLinearResidualPowerEstimate(
+ const std::array<float, kFftLengthBy2Plus1>& X2,
+ const std::array<float, kFftLengthBy2Plus1>& Y2,
+ const std::array<float, kFftLengthBy2Plus1>& R2_old,
+ std::array<int, kFftLengthBy2Plus1>* R2_hold_counter,
+ std::array<float, kFftLengthBy2Plus1>* R2) {
+ // Compute preliminary residual echo.
+ // TODO(peah): Try to make this adaptive. Currently the gain is hardcoded to
+ // 20 dB.
+ std::transform(X2.begin(), X2.end(), R2->begin(),
+ [](float a) { return a * kFixedEchoPathGain; });
- // Residual echo power when the adaptive filter is poorly aligned.
- if (poorly_aligned_filter) {
- for (size_t k = 0; k < R2->size(); ++k) {
- (*R2)[k] = bands_with_reliable_filter[k] && using_subtractor_output
- ? S2_linear[k]
- : std::min(S2_fallback[k], Y2[k]);
- }
- return;
- }
-
- // Residual echo power when there is no recent saturation, no headset detected
- // and when the adaptive filter is well aligned.
for (size_t k = 0; k < R2->size(); ++k) {
- RTC_DCHECK_LT(0.f, erle[k]);
- const auto& X2 = X_buffer.Spectrum(linear_filter_based_delay);
- (*R2)[k] = bands_with_reliable_filter[k] && using_subtractor_output
- ? S2_linear[k] / erle[k]
- : std::min(echo_path_gain[k] * X2[k], Y2[k]);
+ // Update hold counter.
+ (*R2_hold_counter)[k] =
+ R2_old[k] < (*R2)[k] ? 0 : (*R2_hold_counter)[k] + 1;
+
+ // Compute the residual echo by holding a maximum echo powers and an echo
+ // fading corresponding to a room with an RT60 value of about 50 ms.
+ (*R2)[k] = (*R2_hold_counter)[k] < 2
+ ? std::max((*R2)[k], R2_old[k])
+ : std::min((*R2)[k] + R2_old[k] * 0.1f, Y2[k]);
}
}
} // namespace
ResidualEchoEstimator::ResidualEchoEstimator() {
- echo_path_gain_.fill(100.f);
+ R2_old_.fill(0.f);
+ R2_hold_counter_.fill(0);
}
ResidualEchoEstimator::~ResidualEchoEstimator() = default;
@@ -162,71 +94,53 @@
void ResidualEchoEstimator::Estimate(
bool using_subtractor_output,
const AecState& aec_state,
- const RenderBuffer& X_buffer,
- const std::vector<std::array<float, kFftLengthBy2Plus1>>& H2,
- const std::array<float, kFftLengthBy2Plus1>& E2_main,
- const std::array<float, kFftLengthBy2Plus1>& E2_shadow,
+ const RenderBuffer& render_buffer,
const std::array<float, kFftLengthBy2Plus1>& S2_linear,
- const std::array<float, kFftLengthBy2Plus1>& S2_fallback,
const std::array<float, kFftLengthBy2Plus1>& Y2,
std::array<float, kFftLengthBy2Plus1>* R2) {
RTC_DCHECK(R2);
- const rtc::Optional<size_t>& linear_filter_based_delay =
- aec_state.FilterDelay();
- // Update the echo path gain.
- if (linear_filter_based_delay) {
- std::copy(H2[*linear_filter_based_delay].begin(),
- H2[*linear_filter_based_delay].end(), echo_path_gain_.begin());
- constexpr float kEchoPathGainHeadroom = 10.f;
- std::for_each(
- echo_path_gain_.begin(), echo_path_gain_.end(),
- [kEchoPathGainHeadroom](float& a) { a *= kEchoPathGainHeadroom; });
+ // Return zero residual echo power when a headset is detected.
+ if (aec_state.HeadsetDetected()) {
+ R2->fill(0.f);
+ R2_old_.fill(0.f);
+ R2_hold_counter_.fill(0.f);
+ return;
}
- // Counts the blocks since saturation.
- if (aec_state.SaturatedCapture()) {
- blocks_since_last_saturation_ = 0;
+ // Estimate the echo generating signal power.
+ std::array<float, kFftLengthBy2Plus1> X2;
+ if (aec_state.ExternalDelay() || aec_state.FilterDelay()) {
+ const int delay =
+ static_cast<int>(aec_state.FilterDelay() ? *aec_state.FilterDelay()
+ : *aec_state.ExternalDelay());
+ // Computes the spectral power over that blocks surrounding the delauy..
+ EchoGeneratingPower(
+ render_buffer, std::max(0, delay - 1),
+ std::min(kResidualEchoPowerRenderWindowSize - 1, delay + 1), &X2);
} else {
- ++blocks_since_last_saturation_;
+ // Computes the spectral power over that last 30 blocks.
+ EchoGeneratingPower(render_buffer, 0,
+ kResidualEchoPowerRenderWindowSize - 1, &X2);
}
- const auto& bands_with_reliable_filter = aec_state.BandsWithReliableFilter();
-
- if (aec_state.UsableLinearEstimate()) {
- // Residual echo power estimation when the adaptive filter is reliable.
- RTC_DCHECK(linear_filter_based_delay);
- ErleBasedPowerEstimate(
- aec_state.HeadsetDetected(), X_buffer, using_subtractor_output,
- *linear_filter_based_delay, blocks_since_last_saturation_,
- aec_state.PoorlyAlignedFilter(), bands_with_reliable_filter,
- echo_path_gain_, S2_fallback, S2_linear, Y2, aec_state.Erle(),
- aec_state.Erl(), R2);
- } else if (aec_state.ModelBasedAecFeasible()) {
- // Residual echo power when the adaptive filter is not reliable but still an
- // external echo path delay is provided (and hence can be estimated).
- RTC_DCHECK(aec_state.ExternalDelay());
- GainBasedPowerEstimate(
- *aec_state.ExternalDelay(), X_buffer, blocks_since_last_saturation_,
- aec_state.ActiveRenderBlocks(), bands_with_reliable_filter,
- echo_path_gain_, S2_fallback, R2);
- } else if (aec_state.EchoLeakageDetected()) {
- // Residual echo power when an external residual echo detection algorithm
- // has deemed the echo canceller to leak echoes.
- HalfDuplexPowerEstimate(aec_state.ActiveRender(), Y2, R2);
+ // Estimate the residual echo power.
+ if ((aec_state.UsableLinearEstimate() && using_subtractor_output)) {
+ LinearResidualPowerEstimate(S2_linear, aec_state.Erle(), &R2_hold_counter_,
+ R2);
} else {
- // Residual echo power when none of the other cases are fulfilled.
- InfiniteErlPowerEstimate(aec_state.ActiveRenderBlocks(),
- blocks_since_last_saturation_, S2_fallback, R2);
+ NonLinearResidualPowerEstimate(X2, Y2, R2_old_, &R2_hold_counter_, R2);
}
-}
-void ResidualEchoEstimator::HandleEchoPathChange(
- const EchoPathVariability& echo_path_variability) {
- if (echo_path_variability.AudioPathChanged()) {
- blocks_since_last_saturation_ = 0;
- echo_path_gain_.fill(100.f);
+ // If the echo is saturated, estimate the echo power as the maximum echo power
+ // with a leakage factor.
+ if (aec_state.SaturatedEcho()) {
+ constexpr float kSaturationLeakageFactor = 100.f;
+ R2->fill((*std::max_element(R2->begin(), R2->end())) *
+ kSaturationLeakageFactor);
}
+
+ std::copy(R2->begin(), R2->end(), R2_old_.begin());
}
} // namespace webrtc
diff --git a/modules/audio_processing/aec3/residual_echo_estimator.h b/modules/audio_processing/aec3/residual_echo_estimator.h
index 1f520af..1334e63 100644
--- a/modules/audio_processing/aec3/residual_echo_estimator.h
+++ b/modules/audio_processing/aec3/residual_echo_estimator.h
@@ -30,20 +30,14 @@
void Estimate(bool using_subtractor_output,
const AecState& aec_state,
- const RenderBuffer& X_buffer,
- const std::vector<std::array<float, kFftLengthBy2Plus1>>& H2,
- const std::array<float, kFftLengthBy2Plus1>& E2_main,
- const std::array<float, kFftLengthBy2Plus1>& E2_shadow,
+ const RenderBuffer& render_buffer,
const std::array<float, kFftLengthBy2Plus1>& S2_linear,
- const std::array<float, kFftLengthBy2Plus1>& S2_fallback,
const std::array<float, kFftLengthBy2Plus1>& Y2,
std::array<float, kFftLengthBy2Plus1>* R2);
- void HandleEchoPathChange(const EchoPathVariability& echo_path_variability);
-
private:
- std::array<float, kFftLengthBy2Plus1> echo_path_gain_;
- size_t blocks_since_last_saturation_ = 1000;
+ std::array<float, kFftLengthBy2Plus1> R2_old_;
+ std::array<int, kFftLengthBy2Plus1> R2_hold_counter_;
RTC_DISALLOW_COPY_AND_ASSIGN(ResidualEchoEstimator);
};
diff --git a/modules/audio_processing/aec3/residual_echo_estimator_unittest.cc b/modules/audio_processing/aec3/residual_echo_estimator_unittest.cc
index 79e6ff0..824467d 100644
--- a/modules/audio_processing/aec3/residual_echo_estimator_unittest.cc
+++ b/modules/audio_processing/aec3/residual_echo_estimator_unittest.cc
@@ -10,8 +10,6 @@
#include "webrtc/modules/audio_processing/aec3/residual_echo_estimator.h"
-// TODO(peah): Reactivate once the next CL has landed.
-#if 0
#include "webrtc/base/random.h"
#include "webrtc/modules/audio_processing/aec3/aec_state.h"
#include "webrtc/modules/audio_processing/aec3/aec3_fft.h"
@@ -22,20 +20,16 @@
#if RTC_DCHECK_IS_ON && GTEST_HAS_DEATH_TEST && !defined(WEBRTC_ANDROID)
-// Verifies that the check for non-null output gains works.
-TEST(ResidualEchoEstimator, NullOutputGains) {
+// Verifies that the check for non-null output residual echo power works.
+TEST(ResidualEchoEstimator, NullResidualEchoPowerOutput) {
AecState aec_state;
- FftBuffer X_buffer(Aec3Optimization::kNone, 10, std::vector<size_t>(1, 10));
+ RenderBuffer render_buffer(Aec3Optimization::kNone, 3, 10,
+ std::vector<size_t>(1, 10));
std::vector<std::array<float, kFftLengthBy2Plus1>> H2;
- std::array<float, kFftLengthBy2Plus1> E2_main;
- std::array<float, kFftLengthBy2Plus1> E2_shadow;
std::array<float, kFftLengthBy2Plus1> S2_linear;
- std::array<float, kFftLengthBy2Plus1> S2_fallback;
std::array<float, kFftLengthBy2Plus1> Y2;
-
- EXPECT_DEATH(ResidualEchoEstimator().Estimate(true, aec_state, X_buffer, H2,
- E2_main, E2_shadow, S2_linear,
- S2_fallback, Y2, nullptr),
+ EXPECT_DEATH(ResidualEchoEstimator().Estimate(true, aec_state, render_buffer,
+ S2_linear, Y2, nullptr),
"");
}
@@ -44,7 +38,8 @@
TEST(ResidualEchoEstimator, BasicTest) {
ResidualEchoEstimator estimator;
AecState aec_state;
- FftBuffer X_buffer(Aec3Optimization::kNone, 10, std::vector<size_t>(1, 10));
+ RenderBuffer render_buffer(Aec3Optimization::kNone, 3, 10,
+ std::vector<size_t>(1, 10));
std::array<float, kFftLengthBy2Plus1> E2_main;
std::array<float, kFftLengthBy2Plus1> E2_shadow;
std::array<float, kFftLengthBy2Plus1> S2_linear;
@@ -52,7 +47,7 @@
std::array<float, kFftLengthBy2Plus1> Y2;
std::array<float, kFftLengthBy2Plus1> R2;
EchoPathVariability echo_path_variability(false, false);
- std::array<float, kBlockSize> x;
+ std::vector<std::vector<float>> x(3, std::vector<float>(kBlockSize, 0.f));
std::vector<std::array<float, kFftLengthBy2Plus1>> H2(10);
Random random_generator(42U);
FftData X;
@@ -63,6 +58,7 @@
H2_k.fill(0.01f);
}
H2[2].fill(10.f);
+ H2[2][0] = 0.1f;
constexpr float kLevel = 10.f;
E2_shadow.fill(kLevel);
@@ -71,21 +67,20 @@
S2_fallback.fill(kLevel);
Y2.fill(kLevel);
- for (int k = 0; k < 100; ++k) {
- RandomizeSampleVector(&random_generator, x);
- fft.PaddedFft(x, x_old, &X);
- X_buffer.Insert(X);
+ for (int k = 0; k < 2000; ++k) {
+ RandomizeSampleVector(&random_generator, x[0]);
+ std::for_each(x[0].begin(), x[0].end(), [](float& a) { a /= 30.f; });
+ fft.PaddedFft(x[0], x_old, &X);
+ render_buffer.Insert(x);
- aec_state.Update(H2, rtc::Optional<size_t>(2), X_buffer, E2_main, E2_shadow,
- Y2, x, echo_path_variability, false);
+ aec_state.HandleEchoPathChange(echo_path_variability);
+ aec_state.Update(H2, rtc::Optional<size_t>(2), render_buffer, E2_main, Y2,
+ x[0], false);
- estimator.Estimate(true, aec_state, X_buffer, H2, E2_main, E2_shadow,
- S2_linear, S2_fallback, Y2, &R2);
+ estimator.Estimate(true, aec_state, render_buffer, S2_linear, Y2, &R2);
}
std::for_each(R2.begin(), R2.end(),
[&](float a) { EXPECT_NEAR(kLevel, a, 0.1f); });
}
} // namespace webrtc
-
-#endif
diff --git a/modules/audio_processing/aec3/shadow_filter_update_gain.cc b/modules/audio_processing/aec3/shadow_filter_update_gain.cc
index ee6938b..85bc11f 100644
--- a/modules/audio_processing/aec3/shadow_filter_update_gain.cc
+++ b/modules/audio_processing/aec3/shadow_filter_update_gain.cc
@@ -18,7 +18,7 @@
namespace webrtc {
void ShadowFilterUpdateGain::Compute(
- const RenderBuffer& X_buffer,
+ const RenderBuffer& render_buffer,
const RenderSignalAnalyzer& render_signal_analyzer,
const FftData& E_shadow,
size_t size_partitions,
@@ -40,12 +40,14 @@
}
// Compute mu.
- constexpr float kX2Min = 44015068.0f;
+ // Corresponds to WGN of power -39 dBFS.
+ constexpr float kNoiseGatePower = 220075344.f;
constexpr float kMuFixed = .5f;
std::array<float, kFftLengthBy2Plus1> mu;
- const auto& X2 = X_buffer.SpectralSum(size_partitions);
- std::transform(X2.begin(), X2.end(), mu.begin(),
- [&](float a) { return a > kX2Min ? kMuFixed / a : 0.f; });
+ const auto& X2 = render_buffer.SpectralSum(size_partitions);
+ std::transform(X2.begin(), X2.end(), mu.begin(), [&](float a) {
+ return a > kNoiseGatePower ? kMuFixed / a : 0.f;
+ });
// Avoid updating the filter close to narrow bands in the render signals.
render_signal_analyzer.MaskRegionsAroundNarrowBands(&mu);
diff --git a/modules/audio_processing/aec3/shadow_filter_update_gain.h b/modules/audio_processing/aec3/shadow_filter_update_gain.h
index 979716e..a67b8fb 100644
--- a/modules/audio_processing/aec3/shadow_filter_update_gain.h
+++ b/modules/audio_processing/aec3/shadow_filter_update_gain.h
@@ -22,7 +22,7 @@
class ShadowFilterUpdateGain {
public:
// Computes the gain.
- void Compute(const RenderBuffer& X_buffer,
+ void Compute(const RenderBuffer& render_buffer,
const RenderSignalAnalyzer& render_signal_analyzer,
const FftData& E_shadow,
size_t size_partitions,
diff --git a/modules/audio_processing/aec3/shadow_filter_update_gain_unittest.cc b/modules/audio_processing/aec3/shadow_filter_update_gain_unittest.cc
index ee4e44a..82850f8 100644
--- a/modules/audio_processing/aec3/shadow_filter_update_gain_unittest.cc
+++ b/modules/audio_processing/aec3/shadow_filter_update_gain_unittest.cc
@@ -10,9 +10,6 @@
#include "webrtc/modules/audio_processing/aec3/shadow_filter_update_gain.h"
-// TODO(peah): Reactivate once the next CL has landed.
-#if 0
-
#include <algorithm>
#include <numeric>
#include <string>
@@ -37,20 +34,20 @@
std::array<float, kBlockSize>* y_last_block,
FftData* G_last_block) {
ApmDataDumper data_dumper(42);
- AdaptiveFirFilter main_filter(9, true, DetectOptimization(), &data_dumper);
- AdaptiveFirFilter shadow_filter(9, true, DetectOptimization(), &data_dumper);
+ AdaptiveFirFilter main_filter(9, DetectOptimization(), &data_dumper);
+ AdaptiveFirFilter shadow_filter(9, DetectOptimization(), &data_dumper);
Aec3Fft fft;
- FftBuffer X_buffer(Aec3Optimization::kNone, main_filter.SizePartitions(),
- std::vector<size_t>(1, main_filter.SizePartitions()));
+ RenderBuffer render_buffer(
+ Aec3Optimization::kNone, 3, main_filter.SizePartitions(),
+ std::vector<size_t>(1, main_filter.SizePartitions()));
std::array<float, kBlockSize> x_old;
x_old.fill(0.f);
ShadowFilterUpdateGain shadow_gain;
Random random_generator(42U);
- std::vector<float> x(kBlockSize, 0.f);
+ std::vector<std::vector<float>> x(3, std::vector<float>(kBlockSize, 0.f));
std::vector<float> y(kBlockSize, 0.f);
AecState aec_state;
RenderSignalAnalyzer render_signal_analyzer;
- FftData X;
std::array<float, kFftLength> s;
FftData S;
FftData G;
@@ -67,14 +64,13 @@
k) != blocks_with_saturation.end();
// Create the render signal.
- RandomizeSampleVector(&random_generator, x);
- delay_buffer.Delay(x, y);
- fft.PaddedFft(x, x_old, &X);
- X_buffer.Insert(X);
+ RandomizeSampleVector(&random_generator, x[0]);
+ delay_buffer.Delay(x[0], y);
+ render_buffer.Insert(x);
render_signal_analyzer.Update(
- X_buffer, rtc::Optional<size_t>(delay_samples / kBlockSize));
+ render_buffer, rtc::Optional<size_t>(delay_samples / kBlockSize));
- shadow_filter.Filter(X_buffer, &S);
+ shadow_filter.Filter(render_buffer, &S);
fft.Ifft(S, &s);
std::transform(y.begin(), y.end(), s.begin() + kFftLengthBy2,
e_shadow.begin(),
@@ -84,9 +80,9 @@
});
fft.ZeroPaddedFft(e_shadow, &E_shadow);
- shadow_gain.Compute(X_buffer, render_signal_analyzer, E_shadow,
+ shadow_gain.Compute(render_buffer, render_signal_analyzer, E_shadow,
shadow_filter.SizePartitions(), saturation, &G);
- shadow_filter.Adapt(X_buffer, G);
+ shadow_filter.Adapt(render_buffer, G);
}
std::copy(e_shadow.begin(), e_shadow.end(), e_last_block->begin());
@@ -108,11 +104,12 @@
// Verifies that the check for non-null output gain parameter works.
TEST(ShadowFilterUpdateGain, NullDataOutputGain) {
ApmDataDumper data_dumper(42);
- FftBuffer X_buffer(Aec3Optimization::kNone, 1, std::vector<size_t>(1, 1));
+ RenderBuffer render_buffer(Aec3Optimization::kNone, 3, 1,
+ std::vector<size_t>(1, 1));
RenderSignalAnalyzer analyzer;
FftData E;
ShadowFilterUpdateGain gain;
- EXPECT_DEATH(gain.Compute(X_buffer, analyzer, E, 1, false, nullptr), "");
+ EXPECT_DEATH(gain.Compute(render_buffer, analyzer, E, 1, false, nullptr), "");
}
#endif
@@ -188,5 +185,3 @@
}
} // namespace webrtc
-
-#endif
diff --git a/modules/audio_processing/aec3/subtractor.cc b/modules/audio_processing/aec3/subtractor.cc
index dd1d15e..4b46223 100644
--- a/modules/audio_processing/aec3/subtractor.cc
+++ b/modules/audio_processing/aec3/subtractor.cc
@@ -20,11 +20,11 @@
namespace {
-void ComputeError(const Aec3Fft& fft,
- const FftData& S,
- rtc::ArrayView<const float> y,
- std::array<float, kBlockSize>* e,
- FftData* E) {
+void PredictionError(const Aec3Fft& fft,
+ const FftData& S,
+ rtc::ArrayView<const float> y,
+ std::array<float, kBlockSize>* e,
+ FftData* E) {
std::array<float, kFftLength> s;
fft.Ifft(S, &s);
constexpr float kScale = 1.0f / kFftLengthBy2;
@@ -37,24 +37,13 @@
}
} // namespace
-std::vector<size_t> Subtractor::NumBlocksInRenderSums() const {
- if (kMainFilterSizePartitions != kShadowFilterSizePartitions) {
- return {kMainFilterSizePartitions, kShadowFilterSizePartitions};
- } else {
- return {kMainFilterSizePartitions};
- }
-}
-
Subtractor::Subtractor(ApmDataDumper* data_dumper,
Aec3Optimization optimization)
: fft_(),
data_dumper_(data_dumper),
optimization_(optimization),
- main_filter_(kMainFilterSizePartitions, true, optimization, data_dumper_),
- shadow_filter_(kShadowFilterSizePartitions,
- false,
- optimization,
- data_dumper_) {
+ main_filter_(kAdaptiveFilterLength, optimization, data_dumper_),
+ shadow_filter_(kAdaptiveFilterLength, optimization, data_dumper_) {
RTC_DCHECK(data_dumper_);
}
@@ -72,42 +61,43 @@
void Subtractor::Process(const RenderBuffer& render_buffer,
const rtc::ArrayView<const float> capture,
const RenderSignalAnalyzer& render_signal_analyzer,
- bool saturation,
+ const AecState& aec_state,
SubtractorOutput* output) {
RTC_DCHECK_EQ(kBlockSize, capture.size());
rtc::ArrayView<const float> y = capture;
- const RenderBuffer& X_buffer = render_buffer;
FftData& E_main = output->E_main;
- FftData& E_shadow = output->E_shadow;
+ FftData E_shadow;
std::array<float, kBlockSize>& e_main = output->e_main;
std::array<float, kBlockSize>& e_shadow = output->e_shadow;
FftData S;
FftData& G = S;
- // Form and analyze the output of the main filter.
- main_filter_.Filter(X_buffer, &S);
- ComputeError(fft_, S, y, &e_main, &E_main);
+ // Form the output of the main filter.
+ main_filter_.Filter(render_buffer, &S);
+ PredictionError(fft_, S, y, &e_main, &E_main);
- // Form and analyze the output of the shadow filter.
- shadow_filter_.Filter(X_buffer, &S);
- ComputeError(fft_, S, y, &e_shadow, &E_shadow);
+ // Form the output of the shadow filter.
+ shadow_filter_.Filter(render_buffer, &S);
+ PredictionError(fft_, S, y, &e_shadow, &E_shadow);
// Compute spectra for future use.
E_main.Spectrum(optimization_, &output->E2_main);
E_shadow.Spectrum(optimization_, &output->E2_shadow);
// Update the main filter.
- G_main_.Compute(X_buffer, render_signal_analyzer, *output, main_filter_,
- saturation, &G);
- main_filter_.Adapt(X_buffer, G);
+ G_main_.Compute(render_buffer, render_signal_analyzer, *output, main_filter_,
+ aec_state.SaturatedCapture(), &G);
+ main_filter_.Adapt(render_buffer, G);
data_dumper_->DumpRaw("aec3_subtractor_G_main", G.re);
data_dumper_->DumpRaw("aec3_subtractor_G_main", G.im);
// Update the shadow filter.
- G_shadow_.Compute(X_buffer, render_signal_analyzer, E_shadow,
- shadow_filter_.SizePartitions(), saturation, &G);
- shadow_filter_.Adapt(X_buffer, G);
+ G_shadow_.Compute(render_buffer, render_signal_analyzer, E_shadow,
+ shadow_filter_.SizePartitions(),
+ aec_state.SaturatedCapture(), &G);
+ shadow_filter_.Adapt(render_buffer, G);
+
data_dumper_->DumpRaw("aec3_subtractor_G_shadow", G.re);
data_dumper_->DumpRaw("aec3_subtractor_G_shadow", G.im);
diff --git a/modules/audio_processing/aec3/subtractor.h b/modules/audio_processing/aec3/subtractor.h
index 671f6c8..a127141 100644
--- a/modules/audio_processing/aec3/subtractor.h
+++ b/modules/audio_processing/aec3/subtractor.h
@@ -19,6 +19,7 @@
#include "webrtc/modules/audio_processing/aec3/adaptive_fir_filter.h"
#include "webrtc/modules/audio_processing/aec3/aec3_common.h"
#include "webrtc/modules/audio_processing/aec3/aec3_fft.h"
+#include "webrtc/modules/audio_processing/aec3/aec_state.h"
#include "webrtc/modules/audio_processing/aec3/echo_path_variability.h"
#include "webrtc/modules/audio_processing/aec3/main_filter_update_gain.h"
#include "webrtc/modules/audio_processing/aec3/render_buffer.h"
@@ -39,18 +40,9 @@
void Process(const RenderBuffer& render_buffer,
const rtc::ArrayView<const float> capture,
const RenderSignalAnalyzer& render_signal_analyzer,
- bool saturation,
+ const AecState& aec_state,
SubtractorOutput* output);
- // Returns a vector with the number of blocks included in the render buffer
- // sums.
- std::vector<size_t> NumBlocksInRenderSums() const;
-
- // Returns the minimum required farend buffer length.
- size_t MinFarendBufferLength() const {
- return std::max(kMainFilterSizePartitions, kShadowFilterSizePartitions);
- }
-
void HandleEchoPathChange(const EchoPathVariability& echo_path_variability);
// Returns the block-wise frequency response of the main adaptive filter.
@@ -60,9 +52,6 @@
}
private:
- const size_t kMainFilterSizePartitions = 12;
- const size_t kShadowFilterSizePartitions = 12;
-
const Aec3Fft fft_;
ApmDataDumper* data_dumper_;
const Aec3Optimization optimization_;
diff --git a/modules/audio_processing/aec3/subtractor_output.h b/modules/audio_processing/aec3/subtractor_output.h
index 90b9065..e2d23b5 100644
--- a/modules/audio_processing/aec3/subtractor_output.h
+++ b/modules/audio_processing/aec3/subtractor_output.h
@@ -23,7 +23,6 @@
std::array<float, kBlockSize> e_main;
std::array<float, kBlockSize> e_shadow;
FftData E_main;
- FftData E_shadow;
std::array<float, kFftLengthBy2Plus1> E2_main;
std::array<float, kFftLengthBy2Plus1> E2_shadow;
@@ -32,8 +31,6 @@
e_shadow.fill(0.f);
E_main.re.fill(0.f);
E_main.im.fill(0.f);
- E_shadow.re.fill(0.f);
- E_shadow.im.fill(0.f);
E2_main.fill(0.f);
E2_shadow.fill(0.f);
}
diff --git a/modules/audio_processing/aec3/subtractor_unittest.cc b/modules/audio_processing/aec3/subtractor_unittest.cc
index 34a9ae4..48c9c57 100644
--- a/modules/audio_processing/aec3/subtractor_unittest.cc
+++ b/modules/audio_processing/aec3/subtractor_unittest.cc
@@ -10,8 +10,6 @@
#include "webrtc/modules/audio_processing/aec3/subtractor.h"
-// TODO(peah): Reactivate once the next CL has landed.
-#if 0
#include <algorithm>
#include <numeric>
#include <string>
@@ -30,17 +28,15 @@
const std::vector<int>& blocks_with_echo_path_changes) {
ApmDataDumper data_dumper(42);
Subtractor subtractor(&data_dumper, DetectOptimization());
- std::vector<float> x(kBlockSize, 0.f);
+ std::vector<std::vector<float>> x(3, std::vector<float>(kBlockSize, 0.f));
std::vector<float> y(kBlockSize, 0.f);
std::array<float, kBlockSize> x_old;
SubtractorOutput output;
- FftBuffer X_buffer(
- Aec3Optimization::kNone, subtractor.MinFarendBufferLength(),
- std::vector<size_t>(1, subtractor.MinFarendBufferLength()));
+ RenderBuffer render_buffer(Aec3Optimization::kNone, 3, kAdaptiveFilterLength,
+ std::vector<size_t>(1, kAdaptiveFilterLength));
RenderSignalAnalyzer render_signal_analyzer;
Random random_generator(42U);
Aec3Fft fft;
- FftData X;
std::array<float, kFftLengthBy2Plus1> Y2;
std::array<float, kFftLengthBy2Plus1> E2_main;
std::array<float, kFftLengthBy2Plus1> E2_shadow;
@@ -52,15 +48,14 @@
DelayBuffer<float> delay_buffer(delay_samples);
for (int k = 0; k < num_blocks_to_process; ++k) {
- RandomizeSampleVector(&random_generator, x);
+ RandomizeSampleVector(&random_generator, x[0]);
if (uncorrelated_inputs) {
RandomizeSampleVector(&random_generator, y);
} else {
- delay_buffer.Delay(x, y);
+ delay_buffer.Delay(x[0], y);
}
- fft.PaddedFft(x, x_old, &X);
- X_buffer.Insert(X);
- render_signal_analyzer.Update(X_buffer, aec_state.FilterDelay());
+ render_buffer.Insert(x);
+ render_signal_analyzer.Update(render_buffer, aec_state.FilterDelay());
// Handle echo path changes.
if (std::find(blocks_with_echo_path_changes.begin(),
@@ -68,12 +63,13 @@
k) != blocks_with_echo_path_changes.end()) {
subtractor.HandleEchoPathChange(EchoPathVariability(true, true));
}
- subtractor.Process(X_buffer, y, render_signal_analyzer, false, &output);
+ subtractor.Process(render_buffer, y, render_signal_analyzer, aec_state,
+ &output);
+ aec_state.HandleEchoPathChange(EchoPathVariability(false, false));
aec_state.Update(subtractor.FilterFrequencyResponse(),
rtc::Optional<size_t>(delay_samples / kBlockSize),
- X_buffer, E2_main, E2_shadow, Y2, x,
- EchoPathVariability(false, false), false);
+ render_buffer, E2_main, Y2, x[0], false);
}
const float output_power = std::inner_product(
@@ -107,31 +103,29 @@
TEST(Subtractor, DISABLED_NullOutput) {
ApmDataDumper data_dumper(42);
Subtractor subtractor(&data_dumper, DetectOptimization());
- FftBuffer X_buffer(
- Aec3Optimization::kNone, subtractor.MinFarendBufferLength(),
- std::vector<size_t>(1, subtractor.MinFarendBufferLength()));
+ RenderBuffer render_buffer(Aec3Optimization::kNone, 3, kAdaptiveFilterLength,
+ std::vector<size_t>(1, kAdaptiveFilterLength));
RenderSignalAnalyzer render_signal_analyzer;
std::vector<float> y(kBlockSize, 0.f);
- EXPECT_DEATH(
- subtractor.Process(X_buffer, y, render_signal_analyzer, false, nullptr),
- "");
+ EXPECT_DEATH(subtractor.Process(render_buffer, y, render_signal_analyzer,
+ AecState(), nullptr),
+ "");
}
// Verifies the check for the capture signal size.
TEST(Subtractor, WrongCaptureSize) {
ApmDataDumper data_dumper(42);
Subtractor subtractor(&data_dumper, DetectOptimization());
- FftBuffer X_buffer(
- Aec3Optimization::kNone, subtractor.MinFarendBufferLength(),
- std::vector<size_t>(1, subtractor.MinFarendBufferLength()));
+ RenderBuffer render_buffer(Aec3Optimization::kNone, 3, kAdaptiveFilterLength,
+ std::vector<size_t>(1, kAdaptiveFilterLength));
RenderSignalAnalyzer render_signal_analyzer;
std::vector<float> y(kBlockSize - 1, 0.f);
SubtractorOutput output;
- EXPECT_DEATH(
- subtractor.Process(X_buffer, y, render_signal_analyzer, false, &output),
- "");
+ EXPECT_DEATH(subtractor.Process(render_buffer, y, render_signal_analyzer,
+ AecState(), &output),
+ "");
}
#endif
@@ -175,5 +169,3 @@
}
} // namespace webrtc
-
-#endif
diff --git a/modules/audio_processing/aec3/suppression_filter.cc b/modules/audio_processing/aec3/suppression_filter.cc
index 7f7a8d7..b172a1d 100644
--- a/modules/audio_processing/aec3/suppression_filter.cc
+++ b/modules/audio_processing/aec3/suppression_filter.cc
@@ -74,6 +74,7 @@
const FftData& comfort_noise,
const FftData& comfort_noise_high_band,
const std::array<float, kFftLengthBy2Plus1>& suppression_gain,
+ float high_bands_gain,
std::vector<std::vector<float>>* e) {
RTC_DCHECK(e);
RTC_DCHECK_EQ(e->size(), NumBandsForRate(sample_rate_hz_));
@@ -138,11 +139,7 @@
fft_.Ifft(E, &time_domain_high_band_noise);
// Scale and apply the noise to the signals.
- RTC_DCHECK_LT(3, suppression_gain.size());
- float high_bands_gain = *std::min_element(suppression_gain.begin() + 32,
- suppression_gain.end());
-
- float high_bands_noise_scaling =
+ const float high_bands_noise_scaling =
0.4f * std::max(1.f - high_bands_gain, 0.f);
std::transform(
diff --git a/modules/audio_processing/aec3/suppression_filter.h b/modules/audio_processing/aec3/suppression_filter.h
index 3171047..4aec2fc 100644
--- a/modules/audio_processing/aec3/suppression_filter.h
+++ b/modules/audio_processing/aec3/suppression_filter.h
@@ -27,6 +27,7 @@
void ApplyGain(const FftData& comfort_noise,
const FftData& comfort_noise_high_bands,
const std::array<float, kFftLengthBy2Plus1>& suppression_gain,
+ float high_bands_gain,
std::vector<std::vector<float>>* e);
private:
diff --git a/modules/audio_processing/aec3/suppression_filter_unittest.cc b/modules/audio_processing/aec3/suppression_filter_unittest.cc
index e8710b8..312391b 100644
--- a/modules/audio_processing/aec3/suppression_filter_unittest.cc
+++ b/modules/audio_processing/aec3/suppression_filter_unittest.cc
@@ -44,8 +44,9 @@
FftData cn_high_bands;
std::array<float, kFftLengthBy2Plus1> gain;
- EXPECT_DEATH(
- SuppressionFilter(16000).ApplyGain(cn, cn_high_bands, gain, nullptr), "");
+ EXPECT_DEATH(SuppressionFilter(16000).ApplyGain(cn, cn_high_bands, gain, 1.0f,
+ nullptr),
+ "");
}
// Verifies the check for allowed sample rate.
@@ -70,7 +71,7 @@
std::vector<std::vector<float>> e(3, std::vector<float>(kBlockSize, 0.f));
std::vector<std::vector<float>> e_ref = e;
- filter.ApplyGain(cn, cn_high_bands, gain, &e);
+ filter.ApplyGain(cn, cn_high_bands, gain, 1.f, &e);
for (size_t k = 0; k < e.size(); ++k) {
EXPECT_EQ(e_ref[k], e[k]);
@@ -102,7 +103,7 @@
e[0]);
e0_input =
std::inner_product(e[0].begin(), e[0].end(), e[0].begin(), e0_input);
- filter.ApplyGain(cn, cn_high_bands, gain, &e);
+ filter.ApplyGain(cn, cn_high_bands, gain, 1.f, &e);
e0_output =
std::inner_product(e[0].begin(), e[0].end(), e[0].begin(), e0_output);
}
@@ -136,7 +137,7 @@
e[0]);
e0_input =
std::inner_product(e[0].begin(), e[0].end(), e[0].begin(), e0_input);
- filter.ApplyGain(cn, cn_high_bands, gain, &e);
+ filter.ApplyGain(cn, cn_high_bands, gain, 1.f, &e);
e0_output =
std::inner_product(e[0].begin(), e[0].end(), e[0].begin(), e0_output);
}
@@ -166,7 +167,7 @@
}
}
- filter.ApplyGain(cn, cn_high_bands, gain, &e);
+ filter.ApplyGain(cn, cn_high_bands, gain, 1.f, &e);
if (k > 2) {
for (size_t j = 0; j < 2; ++j) {
for (size_t i = 0; i < kBlockSize; ++i) {
diff --git a/modules/audio_processing/aec3/suppression_gain.cc b/modules/audio_processing/aec3/suppression_gain.cc
index 74df7d9..0e50292 100644
--- a/modules/audio_processing/aec3/suppression_gain.cc
+++ b/modules/audio_processing/aec3/suppression_gain.cc
@@ -17,6 +17,7 @@
#include <math.h>
#include <algorithm>
#include <functional>
+#include <numeric>
#include "webrtc/base/checks.h"
@@ -33,9 +34,9 @@
// filter on the upper-frequency gains influencing the overall achieved
// gain. TODO(peah): Update this when new anti-aliasing filters are
// implemented.
- constexpr size_t kAntiAliasingImpactLimit = 64 * 0.7f;
+ constexpr size_t kAntiAliasingImpactLimit = (64 * 2000) / 8000;
std::for_each(gain_squared->begin() + kAntiAliasingImpactLimit,
- gain_squared->end(),
+ gain_squared->end() - 1,
[gain_squared, kAntiAliasingImpactLimit](float& a) {
a = std::min(a, (*gain_squared)[kAntiAliasingImpactLimit]);
});
@@ -43,8 +44,8 @@
}
constexpr int kNumIterations = 2;
-constexpr float kEchoMaskingMargin = 1.f / 10.f;
-constexpr float kBandMaskingFactor = 1.f / 2.f;
+constexpr float kEchoMaskingMargin = 1.f / 20.f;
+constexpr float kBandMaskingFactor = 1.f / 10.f;
constexpr float kTimeMaskingFactor = 1.f / 10.f;
} // namespace
@@ -137,8 +138,8 @@
std::transform(gain_squared->begin() + 1, gain_squared->end() - 1,
previous_gain_squared->begin(), gain_squared->begin() + 1,
[](float a, float b) {
- return b < 0.0001f ? std::min(a, 0.0001f)
- : std::min(a, b * 2.f);
+ return b < 0.001f ? std::min(a, 0.001f)
+ : std::min(a, b * 2.f);
});
// Process the gains to avoid artefacts caused by gain realization in the
@@ -249,8 +250,8 @@
std::transform(gain_squared->begin() + 1, gain_squared->end() - 1,
previous_gain_squared->begin(), gain_squared->begin() + 1,
[](float a, float b) {
- return b < 0.0001f ? std::min(a, 0.0001f)
- : std::min(a, b * 2.f);
+ return b < 0.001f ? std::min(a, 0.001f)
+ : std::min(a, b * 2.f);
});
// Process the gains to avoid artefacts caused by gain realization in the
@@ -274,6 +275,43 @@
} // namespace aec3
+// Computes an upper bound on the gain to apply for high frequencies.
+float HighFrequencyGainBound(bool saturated_echo,
+ const std::vector<std::vector<float>>& render) {
+ if (render.size() == 1) {
+ return 1.f;
+ }
+
+ // Always attenuate the upper bands when there is saturated echo.
+ if (saturated_echo) {
+ return 0.001f;
+ }
+
+ // Compute the upper and lower band energies.
+ float low_band_energy =
+ std::accumulate(render[0].begin(), render[0].end(), 0.f,
+ [](float a, float b) -> float { return a + b * b; });
+ float high_band_energies = 0.f;
+ for (size_t k = 1; k < render.size(); ++k) {
+ high_band_energies = std::max(
+ high_band_energies,
+ std::accumulate(render[k].begin(), render[k].end(), 0.f,
+ [](float a, float b) -> float { return a + b * b; }));
+ }
+
+ // If there is more power in the lower frequencies than the upper frequencies,
+ // or if the power in upper frequencies is low, do not bound the gain in the
+ // upper bands.
+ if (high_band_energies < low_band_energy ||
+ high_band_energies < kSubBlockSize * 10.f * 10.f) {
+ return 1.f;
+ }
+
+ // In all other cases, bound the gain for upper frequencies.
+ RTC_DCHECK_LE(low_band_energy, high_band_energies);
+ return 0.01f * sqrtf(low_band_energy / high_band_energies);
+}
+
SuppressionGain::SuppressionGain(Aec3Optimization optimization)
: optimization_(optimization) {
previous_gain_squared_.fill(1.f);
@@ -284,21 +322,41 @@
const std::array<float, kFftLengthBy2Plus1>& nearend_power,
const std::array<float, kFftLengthBy2Plus1>& residual_echo_power,
const std::array<float, kFftLengthBy2Plus1>& comfort_noise_power,
- float strong_nearend_margin,
- std::array<float, kFftLengthBy2Plus1>* gain) {
- RTC_DCHECK(gain);
+ bool saturated_echo,
+ const std::vector<std::vector<float>>& render,
+ size_t num_capture_bands,
+ float* high_bands_gain,
+ std::array<float, kFftLengthBy2Plus1>* low_band_gain) {
+ RTC_DCHECK(high_bands_gain);
+ RTC_DCHECK(low_band_gain);
+
+ // Choose margin to use.
+ const float margin = saturated_echo ? 0.001f : 0.01f;
switch (optimization_) {
#if defined(WEBRTC_ARCH_X86_FAMILY)
case Aec3Optimization::kSse2:
- aec3::ComputeGains_SSE2(nearend_power, residual_echo_power,
- comfort_noise_power, strong_nearend_margin,
- &previous_gain_squared_, &previous_masker_, gain);
+ aec3::ComputeGains_SSE2(
+ nearend_power, residual_echo_power, comfort_noise_power, margin,
+ &previous_gain_squared_, &previous_masker_, low_band_gain);
break;
#endif
default:
aec3::ComputeGains(nearend_power, residual_echo_power,
- comfort_noise_power, strong_nearend_margin,
- &previous_gain_squared_, &previous_masker_, gain);
+ comfort_noise_power, margin, &previous_gain_squared_,
+ &previous_masker_, low_band_gain);
+ }
+
+ if (num_capture_bands > 1) {
+ // Compute the gain for upper frequencies.
+ const float min_high_band_gain =
+ HighFrequencyGainBound(saturated_echo, render);
+ *high_bands_gain =
+ *std::min_element(low_band_gain->begin() + 32, low_band_gain->end());
+
+ *high_bands_gain = std::min(*high_bands_gain, min_high_band_gain);
+
+ } else {
+ *high_bands_gain = 1.f;
}
}
diff --git a/modules/audio_processing/aec3/suppression_gain.h b/modules/audio_processing/aec3/suppression_gain.h
index 4e070b6..6b36a63 100644
--- a/modules/audio_processing/aec3/suppression_gain.h
+++ b/modules/audio_processing/aec3/suppression_gain.h
@@ -12,6 +12,7 @@
#define WEBRTC_MODULES_AUDIO_PROCESSING_AEC3_SUPPRESSION_GAIN_H_
#include <array>
+#include <vector>
#include "webrtc/base/constructormagic.h"
#include "webrtc/modules/audio_processing/aec3/aec3_common.h"
@@ -48,8 +49,11 @@
void GetGain(const std::array<float, kFftLengthBy2Plus1>& nearend_power,
const std::array<float, kFftLengthBy2Plus1>& residual_echo_power,
const std::array<float, kFftLengthBy2Plus1>& comfort_noise_power,
- float strong_nearend_margin,
- std::array<float, kFftLengthBy2Plus1>* gain);
+ bool saturated_echo,
+ const std::vector<std::vector<float>>& render,
+ size_t num_capture_bands,
+ float* high_bands_gain,
+ std::array<float, kFftLengthBy2Plus1>* low_band_gain);
private:
const Aec3Optimization optimization_;
diff --git a/modules/audio_processing/aec3/suppression_gain_unittest.cc b/modules/audio_processing/aec3/suppression_gain_unittest.cc
index 9d41f18..f4feb74 100644
--- a/modules/audio_processing/aec3/suppression_gain_unittest.cc
+++ b/modules/audio_processing/aec3/suppression_gain_unittest.cc
@@ -25,9 +25,16 @@
std::array<float, kFftLengthBy2Plus1> E2;
std::array<float, kFftLengthBy2Plus1> R2;
std::array<float, kFftLengthBy2Plus1> N2;
- EXPECT_DEATH(
- SuppressionGain(DetectOptimization()).GetGain(E2, R2, N2, 0.1f, nullptr),
- "");
+ E2.fill(0.f);
+ R2.fill(0.f);
+ N2.fill(0.f);
+ float high_bands_gain;
+ EXPECT_DEATH(SuppressionGain(DetectOptimization())
+ .GetGain(E2, R2, N2, false,
+ std::vector<std::vector<float>>(
+ 3, std::vector<float>(kBlockSize, 0.f)),
+ 1, &high_bands_gain, nullptr),
+ "");
}
#endif
@@ -109,17 +116,19 @@
// Does a sanity check that the gains are correctly computed.
TEST(SuppressionGain, BasicGainComputation) {
SuppressionGain suppression_gain(DetectOptimization());
+ float high_bands_gain;
std::array<float, kFftLengthBy2Plus1> E2;
std::array<float, kFftLengthBy2Plus1> R2;
std::array<float, kFftLengthBy2Plus1> N2;
std::array<float, kFftLengthBy2Plus1> g;
+ std::vector<std::vector<float>> x(1, std::vector<float>(kBlockSize, 0.f));
// Ensure that a strong noise is detected to mask any echoes.
E2.fill(10.f);
R2.fill(0.1f);
N2.fill(100.f);
for (int k = 0; k < 10; ++k) {
- suppression_gain.GetGain(E2, R2, N2, 0.1f, &g);
+ suppression_gain.GetGain(E2, R2, N2, false, x, 1, &high_bands_gain, &g);
}
std::for_each(g.begin(), g.end(),
[](float a) { EXPECT_NEAR(1.f, a, 0.001); });
@@ -129,7 +138,7 @@
R2.fill(0.1f);
N2.fill(0.f);
for (int k = 0; k < 10; ++k) {
- suppression_gain.GetGain(E2, R2, N2, 0.1f, &g);
+ suppression_gain.GetGain(E2, R2, N2, false, x, 1, &high_bands_gain, &g);
}
std::for_each(g.begin(), g.end(),
[](float a) { EXPECT_NEAR(1.f, a, 0.001); });
@@ -139,7 +148,7 @@
R2.fill(100.f);
N2.fill(0.f);
for (int k = 0; k < 10; ++k) {
- suppression_gain.GetGain(E2, R2, N2, 0.1f, &g);
+ suppression_gain.GetGain(E2, R2, N2, false, x, 1, &high_bands_gain, &g);
}
std::for_each(g.begin(), g.end(),
[](float a) { EXPECT_NEAR(0.f, a, 0.001); });