| /* |
| * Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. |
| * |
| * Use of this source code is governed by a BSD-style license |
| * that can be found in the LICENSE file in the root of the source |
| * tree. An additional intellectual property rights grant can be found |
| * in the file PATENTS. All contributing project authors may |
| * be found in the AUTHORS file in the root of the source tree. |
| */ |
| |
| #include "modules/audio_processing/aec3/aec_state.h" |
| |
| #include <math.h> |
| |
| #include <algorithm> |
| #include <numeric> |
| #include <vector> |
| |
| #include "absl/types/optional.h" |
| #include "api/array_view.h" |
| #include "modules/audio_processing/aec3/aec3_common.h" |
| #include "modules/audio_processing/logging/apm_data_dumper.h" |
| #include "rtc_base/atomic_ops.h" |
| #include "rtc_base/checks.h" |
| |
| namespace webrtc { |
| namespace { |
| |
| constexpr size_t kBlocksSinceConvergencedFilterInit = 10000; |
| constexpr size_t kBlocksSinceConsistentEstimateInit = 10000; |
| |
| } // namespace |
| |
| int AecState::instance_count_ = 0; |
| |
| void AecState::GetResidualEchoScaling( |
| rtc::ArrayView<float> residual_scaling) const { |
| bool filter_has_had_time_to_converge; |
| if (config_.filter.conservative_initial_phase) { |
| filter_has_had_time_to_converge = |
| strong_not_saturated_render_blocks_ >= 1.5f * kNumBlocksPerSecond; |
| } else { |
| filter_has_had_time_to_converge = |
| strong_not_saturated_render_blocks_ >= 0.8f * kNumBlocksPerSecond; |
| } |
| echo_audibility_.GetResidualEchoScaling(filter_has_had_time_to_converge, |
| residual_scaling); |
| } |
| |
| absl::optional<float> AecState::ErleUncertainty() const { |
| if (SaturatedEcho()) { |
| return 1.f; |
| } |
| |
| return absl::nullopt; |
| } |
| |
| AecState::AecState(const EchoCanceller3Config& config) |
| : data_dumper_( |
| new ApmDataDumper(rtc::AtomicOps::Increment(&instance_count_))), |
| config_(config), |
| initial_state_(config_), |
| delay_state_(config_), |
| transparent_state_(config_), |
| filter_quality_state_(config_), |
| legacy_filter_quality_state_(config_), |
| legacy_saturation_detector_(config_), |
| erl_estimator_(2 * kNumBlocksPerSecond), |
| erle_estimator_(2 * kNumBlocksPerSecond, config_), |
| filter_analyzer_(config_), |
| echo_audibility_( |
| config_.echo_audibility.use_stationarity_properties_at_init), |
| reverb_model_estimator_(config_) {} |
| |
| AecState::~AecState() = default; |
| |
| void AecState::HandleEchoPathChange( |
| const EchoPathVariability& echo_path_variability) { |
| const auto full_reset = [&]() { |
| filter_analyzer_.Reset(); |
| capture_signal_saturation_ = false; |
| strong_not_saturated_render_blocks_ = 0; |
| blocks_with_active_render_ = 0; |
| initial_state_.Reset(); |
| transparent_state_.Reset(); |
| legacy_saturation_detector_.Reset(); |
| erle_estimator_.Reset(true); |
| erl_estimator_.Reset(); |
| filter_quality_state_.Reset(); |
| }; |
| |
| // TODO(peah): Refine the reset scheme according to the type of gain and |
| // delay adjustment. |
| |
| if (echo_path_variability.delay_change != |
| EchoPathVariability::DelayAdjustment::kNone) { |
| full_reset(); |
| } else if (echo_path_variability.gain_change) { |
| erle_estimator_.Reset(false); |
| } |
| subtractor_output_analyzer_.HandleEchoPathChange(); |
| } |
| |
| void AecState::Update( |
| const absl::optional<DelayEstimate>& external_delay, |
| const std::vector<std::array<float, kFftLengthBy2Plus1>>& |
| adaptive_filter_frequency_response, |
| const std::vector<float>& adaptive_filter_impulse_response, |
| const RenderBuffer& render_buffer, |
| const std::array<float, kFftLengthBy2Plus1>& E2_main, |
| const std::array<float, kFftLengthBy2Plus1>& Y2, |
| const SubtractorOutput& subtractor_output, |
| rtc::ArrayView<const float> y) { |
| // Analyze the filter output. |
| subtractor_output_analyzer_.Update(subtractor_output); |
| |
| // Analyze the properties of the filter. |
| filter_analyzer_.Update(adaptive_filter_impulse_response, render_buffer); |
| |
| // Estimate the direct path delay of the filter. |
| if (config_.filter.use_linear_filter) { |
| delay_state_.Update(filter_analyzer_, external_delay, |
| strong_not_saturated_render_blocks_); |
| } |
| |
| const std::vector<float>& aligned_render_block = |
| render_buffer.Block(-delay_state_.DirectPathFilterDelay())[0]; |
| |
| // Update render counters. |
| const float render_energy = std::inner_product( |
| aligned_render_block.begin(), aligned_render_block.end(), |
| aligned_render_block.begin(), 0.f); |
| const bool active_render = |
| render_energy > (config_.render_levels.active_render_limit * |
| config_.render_levels.active_render_limit) * |
| kFftLengthBy2; |
| blocks_with_active_render_ += active_render ? 1 : 0; |
| strong_not_saturated_render_blocks_ += |
| active_render && !SaturatedCapture() ? 1 : 0; |
| |
| std::array<float, kFftLengthBy2Plus1> X2_reverb; |
| render_reverb_.Apply( |
| render_buffer.GetSpectrumBuffer(), delay_state_.DirectPathFilterDelay(), |
| config_.ep_strength.reverb_based_on_render ? ReverbDecay() : 0.f, |
| X2_reverb); |
| |
| if (config_.echo_audibility.use_stationarity_properties) { |
| // Update the echo audibility evaluator. |
| echo_audibility_.Update(render_buffer, |
| render_reverb_.GetReverbContributionPowerSpectrum(), |
| delay_state_.DirectPathFilterDelay(), |
| delay_state_.ExternalDelayReported()); |
| } |
| |
| // Update the ERL and ERLE measures. |
| if (initial_state_.TransitionTriggered()) { |
| erle_estimator_.Reset(false); |
| } |
| |
| const auto& X2 = render_buffer.Spectrum(delay_state_.DirectPathFilterDelay()); |
| const auto& X2_input_erle = X2_reverb; |
| |
| erle_estimator_.Update(render_buffer, adaptive_filter_frequency_response, |
| X2_input_erle, Y2, E2_main, |
| subtractor_output_analyzer_.ConvergedFilter(), |
| config_.erle.onset_detection); |
| |
| erl_estimator_.Update(subtractor_output_analyzer_.ConvergedFilter(), X2, Y2); |
| |
| // Detect and flag echo saturation. |
| saturation_detector_.Update(aligned_render_block, SaturatedCapture(), |
| UsableLinearEstimate(), subtractor_output, |
| EchoPathGain()); |
| |
| // Update the decision on whether to use the initial state parameter set. |
| initial_state_.Update(active_render, SaturatedCapture()); |
| |
| // Detect whether the transparent mode should be activated. |
| transparent_state_.Update(delay_state_.DirectPathFilterDelay(), |
| filter_analyzer_.Consistent(), |
| subtractor_output_analyzer_.ConvergedFilter(), |
| subtractor_output_analyzer_.DivergedFilter(), |
| active_render, SaturatedCapture()); |
| |
| // Analyze the quality of the filter. |
| filter_quality_state_.Update(active_render, TransparentMode(), |
| SaturatedCapture(), |
| filter_analyzer_.Consistent(), external_delay, |
| subtractor_output_analyzer_.ConvergedFilter()); |
| |
| // Update the reverb estimate. |
| const bool stationary_block = |
| config_.echo_audibility.use_stationarity_properties && |
| echo_audibility_.IsBlockStationary(); |
| |
| reverb_model_estimator_.Update(filter_analyzer_.GetAdjustedFilter(), |
| adaptive_filter_frequency_response, |
| erle_estimator_.GetInstLinearQualityEstimate(), |
| delay_state_.DirectPathFilterDelay(), |
| UsableLinearEstimate(), stationary_block); |
| |
| erle_estimator_.Dump(data_dumper_); |
| reverb_model_estimator_.Dump(data_dumper_.get()); |
| data_dumper_->DumpRaw("aec3_erl", Erl()); |
| data_dumper_->DumpRaw("aec3_erl_time_domain", ErlTimeDomain()); |
| data_dumper_->DumpRaw("aec3_erle", Erle()); |
| data_dumper_->DumpRaw("aec3_usable_linear_estimate", UsableLinearEstimate()); |
| data_dumper_->DumpRaw("aec3_transparent_mode", TransparentMode()); |
| data_dumper_->DumpRaw("aec3_filter_delay", filter_analyzer_.DelayBlocks()); |
| |
| data_dumper_->DumpRaw("aec3_consistent_filter", |
| filter_analyzer_.Consistent()); |
| data_dumper_->DumpRaw("aec3_initial_state", |
| initial_state_.InitialStateActive()); |
| data_dumper_->DumpRaw("aec3_capture_saturation", SaturatedCapture()); |
| data_dumper_->DumpRaw("aec3_echo_saturation", SaturatedEcho()); |
| data_dumper_->DumpRaw("aec3_converged_filter", |
| subtractor_output_analyzer_.ConvergedFilter()); |
| data_dumper_->DumpRaw("aec3_diverged_filter", |
| subtractor_output_analyzer_.DivergedFilter()); |
| |
| data_dumper_->DumpRaw("aec3_external_delay_avaliable", |
| external_delay ? 1 : 0); |
| data_dumper_->DumpRaw("aec3_filter_tail_freq_resp_est", |
| GetReverbFrequencyResponse()); |
| } |
| |
| AecState::InitialState::InitialState(const EchoCanceller3Config& config) |
| : conservative_initial_phase_(config.filter.conservative_initial_phase), |
| initial_state_seconds_(config.filter.initial_state_seconds) { |
| Reset(); |
| } |
| void AecState::InitialState::InitialState::Reset() { |
| initial_state_ = true; |
| strong_not_saturated_render_blocks_ = 0; |
| } |
| void AecState::InitialState::InitialState::Update(bool active_render, |
| bool saturated_capture) { |
| strong_not_saturated_render_blocks_ += |
| active_render && !saturated_capture ? 1 : 0; |
| |
| // Flag whether the initial state is still active. |
| bool prev_initial_state = initial_state_; |
| if (conservative_initial_phase_) { |
| initial_state_ = |
| strong_not_saturated_render_blocks_ < 5 * kNumBlocksPerSecond; |
| } else { |
| initial_state_ = strong_not_saturated_render_blocks_ < |
| initial_state_seconds_ * kNumBlocksPerSecond; |
| } |
| |
| // Flag whether the transition from the initial state has started. |
| transition_triggered_ = !initial_state_ && prev_initial_state; |
| } |
| |
| AecState::FilterDelay::FilterDelay(const EchoCanceller3Config& config) |
| : delay_headroom_samples_(config.delay.delay_headroom_samples) {} |
| |
| void AecState::FilterDelay::Update( |
| const FilterAnalyzer& filter_analyzer, |
| const absl::optional<DelayEstimate>& external_delay, |
| size_t blocks_with_proper_filter_adaptation) { |
| // Update the delay based on the external delay. |
| if (external_delay && |
| (!external_delay_ || external_delay_->delay != external_delay->delay)) { |
| external_delay_ = external_delay; |
| external_delay_reported_ = true; |
| } |
| |
| // Override the estimated delay if it is not certain that the filter has had |
| // time to converge. |
| const bool delay_estimator_may_not_have_converged = |
| blocks_with_proper_filter_adaptation < 2 * kNumBlocksPerSecond; |
| if (delay_estimator_may_not_have_converged && external_delay_) { |
| filter_delay_blocks_ = delay_headroom_samples_ / kBlockSize; |
| } else { |
| filter_delay_blocks_ = filter_analyzer.DelayBlocks(); |
| } |
| } |
| |
| AecState::TransparentMode::TransparentMode(const EchoCanceller3Config& config) |
| : bounded_erl_(config.ep_strength.bounded_erl), |
| linear_and_stable_echo_path_( |
| config.echo_removal_control.linear_and_stable_echo_path), |
| active_blocks_since_sane_filter_(kBlocksSinceConsistentEstimateInit), |
| non_converged_sequence_size_(kBlocksSinceConvergencedFilterInit) {} |
| |
| void AecState::TransparentMode::Reset() { |
| non_converged_sequence_size_ = kBlocksSinceConvergencedFilterInit; |
| diverged_sequence_size_ = 0; |
| strong_not_saturated_render_blocks_ = 0; |
| if (linear_and_stable_echo_path_) { |
| recent_convergence_during_activity_ = false; |
| } |
| } |
| |
| void AecState::TransparentMode::Update(int filter_delay_blocks, |
| bool consistent_filter, |
| bool converged_filter, |
| bool diverged_filter, |
| bool active_render, |
| bool saturated_capture) { |
| ++capture_block_counter_; |
| strong_not_saturated_render_blocks_ += |
| active_render && !saturated_capture ? 1 : 0; |
| |
| if (consistent_filter && filter_delay_blocks < 5) { |
| sane_filter_observed_ = true; |
| active_blocks_since_sane_filter_ = 0; |
| } else if (active_render) { |
| ++active_blocks_since_sane_filter_; |
| } |
| |
| bool sane_filter_recently_seen; |
| if (!sane_filter_observed_) { |
| sane_filter_recently_seen = |
| capture_block_counter_ <= 5 * kNumBlocksPerSecond; |
| } else { |
| sane_filter_recently_seen = |
| active_blocks_since_sane_filter_ <= 30 * kNumBlocksPerSecond; |
| } |
| |
| if (converged_filter) { |
| recent_convergence_during_activity_ = true; |
| active_non_converged_sequence_size_ = 0; |
| non_converged_sequence_size_ = 0; |
| ++num_converged_blocks_; |
| } else { |
| if (++non_converged_sequence_size_ > 20 * kNumBlocksPerSecond) { |
| num_converged_blocks_ = 0; |
| } |
| |
| if (active_render && |
| ++active_non_converged_sequence_size_ > 60 * kNumBlocksPerSecond) { |
| recent_convergence_during_activity_ = false; |
| } |
| } |
| |
| if (!diverged_filter) { |
| diverged_sequence_size_ = 0; |
| } else if (++diverged_sequence_size_ >= 60) { |
| // TODO(peah): Change these lines to ensure proper triggering of usable |
| // filter. |
| non_converged_sequence_size_ = kBlocksSinceConvergencedFilterInit; |
| } |
| |
| if (active_non_converged_sequence_size_ > 60 * kNumBlocksPerSecond) { |
| finite_erl_recently_detected_ = false; |
| } |
| if (num_converged_blocks_ > 50) { |
| finite_erl_recently_detected_ = true; |
| } |
| |
| if (bounded_erl_) { |
| transparency_activated_ = false; |
| } else if (finite_erl_recently_detected_) { |
| transparency_activated_ = false; |
| } else if (sane_filter_recently_seen && recent_convergence_during_activity_) { |
| transparency_activated_ = false; |
| } else { |
| const bool filter_should_have_converged = |
| strong_not_saturated_render_blocks_ > 6 * kNumBlocksPerSecond; |
| transparency_activated_ = filter_should_have_converged; |
| } |
| } |
| |
| AecState::FilteringQualityAnalyzer::FilteringQualityAnalyzer( |
| const EchoCanceller3Config& config) {} |
| |
| void AecState::FilteringQualityAnalyzer::Reset() { |
| usable_linear_estimate_ = false; |
| filter_update_blocks_since_reset_ = 0; |
| } |
| |
| void AecState::FilteringQualityAnalyzer::Update( |
| bool active_render, |
| bool transparent_mode, |
| bool saturated_capture, |
| bool consistent_estimate_, |
| const absl::optional<DelayEstimate>& external_delay, |
| bool converged_filter) { |
| // Update blocks counter. |
| const bool filter_update = active_render && !saturated_capture; |
| filter_update_blocks_since_reset_ += filter_update ? 1 : 0; |
| filter_update_blocks_since_start_ += filter_update ? 1 : 0; |
| |
| // Store convergence flag when observed. |
| convergence_seen_ = convergence_seen_ || converged_filter; |
| |
| // Verify requirements for achieving a decent filter. The requirements for |
| // filter adaptation at call startup are more restrictive than after an |
| // in-call reset. |
| const bool sufficient_data_to_converge_at_startup = |
| filter_update_blocks_since_start_ > kNumBlocksPerSecond * 0.4f; |
| const bool sufficient_data_to_converge_at_reset = |
| sufficient_data_to_converge_at_startup && |
| filter_update_blocks_since_reset_ > kNumBlocksPerSecond * 0.2f; |
| |
| // The linear filter can only be used it has had time to converge. |
| usable_linear_estimate_ = sufficient_data_to_converge_at_startup && |
| sufficient_data_to_converge_at_reset; |
| |
| // The linear filter can only be used if an external delay or convergence have |
| // been identified |
| usable_linear_estimate_ = |
| usable_linear_estimate_ && (external_delay || convergence_seen_); |
| |
| // If transparent mode is on, deactivate usign the linear filter. |
| usable_linear_estimate_ = usable_linear_estimate_ && !transparent_mode; |
| } |
| |
| AecState::LegacyFilteringQualityAnalyzer::LegacyFilteringQualityAnalyzer( |
| const EchoCanceller3Config& config) |
| : conservative_initial_phase_(config.filter.conservative_initial_phase), |
| required_blocks_for_convergence_( |
| kNumBlocksPerSecond * (conservative_initial_phase_ ? 1.5f : 0.8f)), |
| linear_and_stable_echo_path_( |
| config.echo_removal_control.linear_and_stable_echo_path), |
| non_converged_sequence_size_(kBlocksSinceConvergencedFilterInit) {} |
| |
| void AecState::LegacyFilteringQualityAnalyzer::Reset() { |
| usable_linear_estimate_ = false; |
| strong_not_saturated_render_blocks_ = 0; |
| if (linear_and_stable_echo_path_) { |
| recent_convergence_during_activity_ = false; |
| } |
| diverged_sequence_size_ = 0; |
| // TODO(peah): Change to ensure proper triggering of usable filter. |
| non_converged_sequence_size_ = 10000; |
| recent_convergence_ = true; |
| } |
| |
| void AecState::LegacyFilteringQualityAnalyzer::Update( |
| bool saturated_echo, |
| bool active_render, |
| bool saturated_capture, |
| bool transparent_mode, |
| const absl::optional<DelayEstimate>& external_delay, |
| bool converged_filter, |
| bool diverged_filter) { |
| diverged_sequence_size_ = diverged_filter ? diverged_sequence_size_ + 1 : 0; |
| if (diverged_sequence_size_ >= 60) { |
| // TODO(peah): Change these lines to ensure proper triggering of usable |
| // filter. |
| non_converged_sequence_size_ = 10000; |
| recent_convergence_ = true; |
| } |
| |
| if (converged_filter) { |
| non_converged_sequence_size_ = 0; |
| recent_convergence_ = true; |
| active_non_converged_sequence_size_ = 0; |
| recent_convergence_during_activity_ = true; |
| } else { |
| if (++non_converged_sequence_size_ >= 60 * kNumBlocksPerSecond) { |
| recent_convergence_ = false; |
| } |
| |
| if (active_render && |
| ++active_non_converged_sequence_size_ > 60 * kNumBlocksPerSecond) { |
| recent_convergence_during_activity_ = false; |
| } |
| } |
| |
| strong_not_saturated_render_blocks_ += |
| active_render && !saturated_capture ? 1 : 0; |
| const bool filter_has_had_time_to_converge = |
| strong_not_saturated_render_blocks_ > required_blocks_for_convergence_; |
| |
| usable_linear_estimate_ = filter_has_had_time_to_converge && external_delay; |
| |
| if (!conservative_initial_phase_ && recent_convergence_during_activity_) { |
| usable_linear_estimate_ = true; |
| } |
| |
| if (!linear_and_stable_echo_path_ && !recent_convergence_) { |
| usable_linear_estimate_ = false; |
| } |
| |
| if (saturated_echo || transparent_mode) { |
| usable_linear_estimate_ = false; |
| } |
| } |
| |
| void AecState::SaturationDetector::Update( |
| rtc::ArrayView<const float> x, |
| bool saturated_capture, |
| bool usable_linear_estimate, |
| const SubtractorOutput& subtractor_output, |
| float echo_path_gain) { |
| saturated_echo_ = saturated_capture; |
| if (usable_linear_estimate) { |
| constexpr float kSaturationThreshold = 20000.f; |
| saturated_echo_ = |
| saturated_echo_ && |
| (subtractor_output.s_main_max_abs > kSaturationThreshold || |
| subtractor_output.s_shadow_max_abs > kSaturationThreshold); |
| } else { |
| const float max_sample = fabs(*std::max_element( |
| x.begin(), x.end(), [](float a, float b) { return a * a < b * b; })); |
| |
| const float kMargin = 10.f; |
| float peak_echo_amplitude = max_sample * echo_path_gain * kMargin; |
| saturated_echo_ = saturated_echo_ && peak_echo_amplitude > 32000; |
| } |
| } |
| |
| AecState::LegacySaturationDetector::LegacySaturationDetector( |
| const EchoCanceller3Config& config) |
| : echo_can_saturate_(config.ep_strength.echo_can_saturate), |
| not_saturated_sequence_size_(1000) {} |
| |
| void AecState::LegacySaturationDetector::Reset() { |
| not_saturated_sequence_size_ = 0; |
| } |
| |
| void AecState::LegacySaturationDetector::Update(rtc::ArrayView<const float> x, |
| bool saturated_capture, |
| float echo_path_gain) { |
| if (!echo_can_saturate_) { |
| saturated_echo_ = false; |
| return; |
| } |
| |
| RTC_DCHECK_LT(0, x.size()); |
| if (saturated_capture) { |
| const float max_sample = fabs(*std::max_element( |
| x.begin(), x.end(), [](float a, float b) { return a * a < b * b; })); |
| |
| // Set flag for potential presence of saturated echo |
| const float kMargin = 10.f; |
| float peak_echo_amplitude = max_sample * echo_path_gain * kMargin; |
| if (peak_echo_amplitude > 32000) { |
| not_saturated_sequence_size_ = 0; |
| saturated_echo_ = true; |
| return; |
| } |
| } |
| |
| saturated_echo_ = ++not_saturated_sequence_size_ < 5; |
| } |
| |
| } // namespace webrtc |