blob: a5e77092a6d27c5930a7ee8b9348a6039b906027 [file] [log] [blame]
/*
* Copyright (c) 2018 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include "modules/audio_processing/aec3/signal_dependent_erle_estimator.h"
#include <algorithm>
#include <functional>
#include <numeric>
#include "modules/audio_processing/aec3/spectrum_buffer.h"
#include "rtc_base/numerics/safe_minmax.h"
namespace webrtc {
namespace {
constexpr std::array<size_t, SignalDependentErleEstimator::kSubbands + 1>
kBandBoundaries = {1, 8, 16, 24, 32, 48, kFftLengthBy2Plus1};
std::array<size_t, kFftLengthBy2Plus1> FormSubbandMap() {
std::array<size_t, kFftLengthBy2Plus1> map_band_to_subband;
size_t subband = 1;
for (size_t k = 0; k < map_band_to_subband.size(); ++k) {
RTC_DCHECK_LT(subband, kBandBoundaries.size());
if (k >= kBandBoundaries[subband]) {
subband++;
RTC_DCHECK_LT(k, kBandBoundaries[subband]);
}
map_band_to_subband[k] = subband - 1;
}
return map_band_to_subband;
}
// Defines the size in blocks of the sections that are used for dividing the
// linear filter. The sections are split in a non-linear manner so that lower
// sections that typically represent the direct path have a larger resolution
// than the higher sections which typically represent more reverberant acoustic
// paths.
std::vector<size_t> DefineFilterSectionSizes(size_t delay_headroom_blocks,
size_t num_blocks,
size_t num_sections) {
size_t filter_length_blocks = num_blocks - delay_headroom_blocks;
std::vector<size_t> section_sizes(num_sections);
size_t remaining_blocks = filter_length_blocks;
size_t remaining_sections = num_sections;
size_t estimator_size = 2;
size_t idx = 0;
while (remaining_sections > 1 &&
remaining_blocks > estimator_size * remaining_sections) {
RTC_DCHECK_LT(idx, section_sizes.size());
section_sizes[idx] = estimator_size;
remaining_blocks -= estimator_size;
remaining_sections--;
estimator_size *= 2;
idx++;
}
size_t last_groups_size = remaining_blocks / remaining_sections;
for (; idx < num_sections; idx++) {
section_sizes[idx] = last_groups_size;
}
section_sizes[num_sections - 1] +=
remaining_blocks - last_groups_size * remaining_sections;
return section_sizes;
}
// Forms the limits in blocks for each filter section. Those sections
// are used for analyzing the echo estimates and investigating which
// linear filter sections contribute most to the echo estimate energy.
std::vector<size_t> SetSectionsBoundaries(size_t delay_headroom_blocks,
size_t num_blocks,
size_t num_sections) {
std::vector<size_t> estimator_boundaries_blocks(num_sections + 1);
if (estimator_boundaries_blocks.size() == 2) {
estimator_boundaries_blocks[0] = 0;
estimator_boundaries_blocks[1] = num_blocks;
return estimator_boundaries_blocks;
}
RTC_DCHECK_GT(estimator_boundaries_blocks.size(), 2);
const std::vector<size_t> section_sizes =
DefineFilterSectionSizes(delay_headroom_blocks, num_blocks,
estimator_boundaries_blocks.size() - 1);
size_t idx = 0;
size_t current_size_block = 0;
RTC_DCHECK_EQ(section_sizes.size() + 1, estimator_boundaries_blocks.size());
estimator_boundaries_blocks[0] = delay_headroom_blocks;
for (size_t k = delay_headroom_blocks; k < num_blocks; ++k) {
current_size_block++;
if (current_size_block >= section_sizes[idx]) {
idx = idx + 1;
if (idx == section_sizes.size()) {
break;
}
estimator_boundaries_blocks[idx] = k + 1;
current_size_block = 0;
}
}
estimator_boundaries_blocks[section_sizes.size()] = num_blocks;
return estimator_boundaries_blocks;
}
std::array<float, SignalDependentErleEstimator::kSubbands>
SetMaxErleSubbands(float max_erle_l, float max_erle_h, size_t limit_subband_l) {
std::array<float, SignalDependentErleEstimator::kSubbands> max_erle;
std::fill(max_erle.begin(), max_erle.begin() + limit_subband_l, max_erle_l);
std::fill(max_erle.begin() + limit_subband_l, max_erle.end(), max_erle_h);
return max_erle;
}
} // namespace
SignalDependentErleEstimator::SignalDependentErleEstimator(
const EchoCanceller3Config& config,
size_t num_capture_channels)
: min_erle_(config.erle.min),
num_sections_(config.erle.num_sections),
num_blocks_(config.filter.refined.length_blocks),
delay_headroom_blocks_(config.delay.delay_headroom_samples / kBlockSize),
band_to_subband_(FormSubbandMap()),
max_erle_(SetMaxErleSubbands(config.erle.max_l,
config.erle.max_h,
band_to_subband_[kFftLengthBy2 / 2])),
section_boundaries_blocks_(SetSectionsBoundaries(delay_headroom_blocks_,
num_blocks_,
num_sections_)),
use_onset_detection_(config.erle.onset_detection),
erle_(num_capture_channels),
erle_onset_compensated_(num_capture_channels),
S2_section_accum_(
num_capture_channels,
std::vector<std::array<float, kFftLengthBy2Plus1>>(num_sections_)),
erle_estimators_(
num_capture_channels,
std::vector<std::array<float, kSubbands>>(num_sections_)),
erle_ref_(num_capture_channels),
correction_factors_(
num_capture_channels,
std::vector<std::array<float, kSubbands>>(num_sections_)),
num_updates_(num_capture_channels),
n_active_sections_(num_capture_channels) {
RTC_DCHECK_LE(num_sections_, num_blocks_);
RTC_DCHECK_GE(num_sections_, 1);
Reset();
}
SignalDependentErleEstimator::~SignalDependentErleEstimator() = default;
void SignalDependentErleEstimator::Reset() {
for (size_t ch = 0; ch < erle_.size(); ++ch) {
erle_[ch].fill(min_erle_);
erle_onset_compensated_[ch].fill(min_erle_);
for (auto& erle_estimator : erle_estimators_[ch]) {
erle_estimator.fill(min_erle_);
}
erle_ref_[ch].fill(min_erle_);
for (auto& factor : correction_factors_[ch]) {
factor.fill(1.0f);
}
num_updates_[ch].fill(0);
n_active_sections_[ch].fill(0);
}
}
// Updates the Erle estimate by analyzing the current input signals. It takes
// the render buffer and the filter frequency response in order to do an
// estimation of the number of sections of the linear filter that are needed
// for getting the majority of the energy in the echo estimate. Based on that
// number of sections, it updates the erle estimation by introducing a
// correction factor to the erle that is given as an input to this method.
void SignalDependentErleEstimator::Update(
const RenderBuffer& render_buffer,
rtc::ArrayView<const std::vector<std::array<float, kFftLengthBy2Plus1>>>
filter_frequency_responses,
rtc::ArrayView<const float, kFftLengthBy2Plus1> X2,
rtc::ArrayView<const std::array<float, kFftLengthBy2Plus1>> Y2,
rtc::ArrayView<const std::array<float, kFftLengthBy2Plus1>> E2,
rtc::ArrayView<const std::array<float, kFftLengthBy2Plus1>> average_erle,
rtc::ArrayView<const std::array<float, kFftLengthBy2Plus1>>
average_erle_onset_compensated,
const std::vector<bool>& converged_filters) {
RTC_DCHECK_GT(num_sections_, 1);
// Gets the number of filter sections that are needed for achieving 90 %
// of the power spectrum energy of the echo estimate.
ComputeNumberOfActiveFilterSections(render_buffer,
filter_frequency_responses);
// Updates the correction factors that is used for correcting the erle and
// adapt it to the particular characteristics of the input signal.
UpdateCorrectionFactors(X2, Y2, E2, converged_filters);
// Applies the correction factor to the input erle for getting a more refined
// erle estimation for the current input signal.
for (size_t ch = 0; ch < erle_.size(); ++ch) {
for (size_t k = 0; k < kFftLengthBy2; ++k) {
RTC_DCHECK_GT(correction_factors_[ch].size(), n_active_sections_[ch][k]);
float correction_factor =
correction_factors_[ch][n_active_sections_[ch][k]]
[band_to_subband_[k]];
erle_[ch][k] = rtc::SafeClamp(average_erle[ch][k] * correction_factor,
min_erle_, max_erle_[band_to_subband_[k]]);
if (use_onset_detection_) {
erle_onset_compensated_[ch][k] = rtc::SafeClamp(
average_erle_onset_compensated[ch][k] * correction_factor,
min_erle_, max_erle_[band_to_subband_[k]]);
}
}
}
}
void SignalDependentErleEstimator::Dump(
const std::unique_ptr<ApmDataDumper>& data_dumper) const {
for (auto& erle : erle_estimators_[0]) {
data_dumper->DumpRaw("aec3_all_erle", erle);
}
data_dumper->DumpRaw("aec3_ref_erle", erle_ref_[0]);
for (auto& factor : correction_factors_[0]) {
data_dumper->DumpRaw("aec3_erle_correction_factor", factor);
}
}
// Estimates for each band the smallest number of sections in the filter that
// together constitute 90% of the estimated echo energy.
void SignalDependentErleEstimator::ComputeNumberOfActiveFilterSections(
const RenderBuffer& render_buffer,
rtc::ArrayView<const std::vector<std::array<float, kFftLengthBy2Plus1>>>
filter_frequency_responses) {
RTC_DCHECK_GT(num_sections_, 1);
// Computes an approximation of the power spectrum if the filter would have
// been limited to a certain number of filter sections.
ComputeEchoEstimatePerFilterSection(render_buffer,
filter_frequency_responses);
// For each band, computes the number of filter sections that are needed for
// achieving the 90 % energy in the echo estimate.
ComputeActiveFilterSections();
}
void SignalDependentErleEstimator::UpdateCorrectionFactors(
rtc::ArrayView<const float, kFftLengthBy2Plus1> X2,
rtc::ArrayView<const std::array<float, kFftLengthBy2Plus1>> Y2,
rtc::ArrayView<const std::array<float, kFftLengthBy2Plus1>> E2,
const std::vector<bool>& converged_filters) {
for (size_t ch = 0; ch < converged_filters.size(); ++ch) {
if (converged_filters[ch]) {
constexpr float kX2BandEnergyThreshold = 44015068.0f;
constexpr float kSmthConstantDecreases = 0.1f;
constexpr float kSmthConstantIncreases = kSmthConstantDecreases / 2.f;
auto subband_powers = [](rtc::ArrayView<const float> power_spectrum,
rtc::ArrayView<float> power_spectrum_subbands) {
for (size_t subband = 0; subband < kSubbands; ++subband) {
RTC_DCHECK_LE(kBandBoundaries[subband + 1], power_spectrum.size());
power_spectrum_subbands[subband] = std::accumulate(
power_spectrum.begin() + kBandBoundaries[subband],
power_spectrum.begin() + kBandBoundaries[subband + 1], 0.f);
}
};
std::array<float, kSubbands> X2_subbands, E2_subbands, Y2_subbands;
subband_powers(X2, X2_subbands);
subband_powers(E2[ch], E2_subbands);
subband_powers(Y2[ch], Y2_subbands);
std::array<size_t, kSubbands> idx_subbands;
for (size_t subband = 0; subband < kSubbands; ++subband) {
// When aggregating the number of active sections in the filter for
// different bands we choose to take the minimum of all of them. As an
// example, if for one of the bands it is the direct path its refined
// contributor to the final echo estimate, we consider the direct path
// is as well the refined contributor for the subband that contains that
// particular band. That aggregate number of sections will be later used
// as the identifier of the erle estimator that needs to be updated.
RTC_DCHECK_LE(kBandBoundaries[subband + 1],
n_active_sections_[ch].size());
idx_subbands[subband] = *std::min_element(
n_active_sections_[ch].begin() + kBandBoundaries[subband],
n_active_sections_[ch].begin() + kBandBoundaries[subband + 1]);
}
std::array<float, kSubbands> new_erle;
std::array<bool, kSubbands> is_erle_updated;
is_erle_updated.fill(false);
new_erle.fill(0.f);
for (size_t subband = 0; subband < kSubbands; ++subband) {
if (X2_subbands[subband] > kX2BandEnergyThreshold &&
E2_subbands[subband] > 0) {
new_erle[subband] = Y2_subbands[subband] / E2_subbands[subband];
RTC_DCHECK_GT(new_erle[subband], 0);
is_erle_updated[subband] = true;
++num_updates_[ch][subband];
}
}
for (size_t subband = 0; subband < kSubbands; ++subband) {
const size_t idx = idx_subbands[subband];
RTC_DCHECK_LT(idx, erle_estimators_[ch].size());
float alpha = new_erle[subband] > erle_estimators_[ch][idx][subband]
? kSmthConstantIncreases
: kSmthConstantDecreases;
alpha = static_cast<float>(is_erle_updated[subband]) * alpha;
erle_estimators_[ch][idx][subband] +=
alpha * (new_erle[subband] - erle_estimators_[ch][idx][subband]);
erle_estimators_[ch][idx][subband] = rtc::SafeClamp(
erle_estimators_[ch][idx][subband], min_erle_, max_erle_[subband]);
}
for (size_t subband = 0; subband < kSubbands; ++subband) {
float alpha = new_erle[subband] > erle_ref_[ch][subband]
? kSmthConstantIncreases
: kSmthConstantDecreases;
alpha = static_cast<float>(is_erle_updated[subband]) * alpha;
erle_ref_[ch][subband] +=
alpha * (new_erle[subband] - erle_ref_[ch][subband]);
erle_ref_[ch][subband] = rtc::SafeClamp(erle_ref_[ch][subband],
min_erle_, max_erle_[subband]);
}
for (size_t subband = 0; subband < kSubbands; ++subband) {
constexpr int kNumUpdateThr = 50;
if (is_erle_updated[subband] &&
num_updates_[ch][subband] > kNumUpdateThr) {
const size_t idx = idx_subbands[subband];
RTC_DCHECK_GT(erle_ref_[ch][subband], 0.f);
// Computes the ratio between the erle that is updated using all the
// points and the erle that is updated only on signals that share the
// same number of active filter sections.
float new_correction_factor =
erle_estimators_[ch][idx][subband] / erle_ref_[ch][subband];
correction_factors_[ch][idx][subband] +=
0.1f *
(new_correction_factor - correction_factors_[ch][idx][subband]);
}
}
}
}
}
void SignalDependentErleEstimator::ComputeEchoEstimatePerFilterSection(
const RenderBuffer& render_buffer,
rtc::ArrayView<const std::vector<std::array<float, kFftLengthBy2Plus1>>>
filter_frequency_responses) {
const SpectrumBuffer& spectrum_render_buffer =
render_buffer.GetSpectrumBuffer();
const size_t num_render_channels = spectrum_render_buffer.buffer[0].size();
const size_t num_capture_channels = S2_section_accum_.size();
const float one_by_num_render_channels = 1.f / num_render_channels;
RTC_DCHECK_EQ(S2_section_accum_.size(), filter_frequency_responses.size());
for (size_t capture_ch = 0; capture_ch < num_capture_channels; ++capture_ch) {
RTC_DCHECK_EQ(S2_section_accum_[capture_ch].size() + 1,
section_boundaries_blocks_.size());
size_t idx_render = render_buffer.Position();
idx_render = spectrum_render_buffer.OffsetIndex(
idx_render, section_boundaries_blocks_[0]);
for (size_t section = 0; section < num_sections_; ++section) {
std::array<float, kFftLengthBy2Plus1> X2_section;
std::array<float, kFftLengthBy2Plus1> H2_section;
X2_section.fill(0.f);
H2_section.fill(0.f);
const size_t block_limit =
std::min(section_boundaries_blocks_[section + 1],
filter_frequency_responses[capture_ch].size());
for (size_t block = section_boundaries_blocks_[section];
block < block_limit; ++block) {
for (size_t render_ch = 0;
render_ch < spectrum_render_buffer.buffer[idx_render].size();
++render_ch) {
for (size_t k = 0; k < X2_section.size(); ++k) {
X2_section[k] +=
spectrum_render_buffer.buffer[idx_render][render_ch][k] *
one_by_num_render_channels;
}
}
std::transform(H2_section.begin(), H2_section.end(),
filter_frequency_responses[capture_ch][block].begin(),
H2_section.begin(), std::plus<float>());
idx_render = spectrum_render_buffer.IncIndex(idx_render);
}
std::transform(X2_section.begin(), X2_section.end(), H2_section.begin(),
S2_section_accum_[capture_ch][section].begin(),
std::multiplies<float>());
}
for (size_t section = 1; section < num_sections_; ++section) {
std::transform(S2_section_accum_[capture_ch][section - 1].begin(),
S2_section_accum_[capture_ch][section - 1].end(),
S2_section_accum_[capture_ch][section].begin(),
S2_section_accum_[capture_ch][section].begin(),
std::plus<float>());
}
}
}
void SignalDependentErleEstimator::ComputeActiveFilterSections() {
for (size_t ch = 0; ch < n_active_sections_.size(); ++ch) {
std::fill(n_active_sections_[ch].begin(), n_active_sections_[ch].end(), 0);
for (size_t k = 0; k < kFftLengthBy2Plus1; ++k) {
size_t section = num_sections_;
float target = 0.9f * S2_section_accum_[ch][num_sections_ - 1][k];
while (section > 0 && S2_section_accum_[ch][section - 1][k] >= target) {
n_active_sections_[ch][k] = --section;
}
}
}
}
} // namespace webrtc