| /* |
| * Copyright (c) 2020 The WebRTC project authors. All Rights Reserved. |
| * |
| * Use of this source code is governed by a BSD-style license |
| * that can be found in the LICENSE file in the root of the source |
| * tree. An additional intellectual property rights grant can be found |
| * in the file PATENTS. All contributing project authors may |
| * be found in the AUTHORS file in the root of the source tree. |
| */ |
| |
| #include "modules/audio_processing/aec3/transparent_mode.h" |
| |
| #include "rtc_base/checks.h" |
| #include "rtc_base/logging.h" |
| #include "system_wrappers/include/field_trial.h" |
| |
| namespace webrtc { |
| namespace { |
| |
| constexpr size_t kBlocksSinceConvergencedFilterInit = 10000; |
| constexpr size_t kBlocksSinceConsistentEstimateInit = 10000; |
| constexpr float kInitialTransparentStateProbability = 0.2f; |
| |
| bool DeactivateTransparentMode() { |
| return field_trial::IsEnabled("WebRTC-Aec3TransparentModeKillSwitch"); |
| } |
| |
| bool ActivateTransparentModeHmm() { |
| return field_trial::IsEnabled("WebRTC-Aec3TransparentModeHmm"); |
| } |
| |
| } // namespace |
| |
| // Classifier that toggles transparent mode which reduces echo suppression when |
| // headsets are used. |
| class TransparentModeImpl : public TransparentMode { |
| public: |
| bool Active() const override { return transparency_activated_; } |
| |
| void Reset() override { |
| // Determines if transparent mode is used. |
| transparency_activated_ = false; |
| |
| // The estimated probability of being transparent mode. |
| prob_transparent_state_ = kInitialTransparentStateProbability; |
| } |
| |
| void Update(int /* filter_delay_blocks */, |
| bool /* any_filter_consistent */, |
| bool /* any_filter_converged */, |
| bool any_coarse_filter_converged, |
| bool /* all_filters_diverged */, |
| bool active_render, |
| bool /* saturated_capture */) override { |
| // The classifier is implemented as a Hidden Markov Model (HMM) with two |
| // hidden states: "normal" and "transparent". The estimated probabilities of |
| // the two states are updated by observing filter convergence during active |
| // render. The filters are less likely to be reported as converged when |
| // there is no echo present in the microphone signal. |
| |
| // The constants have been obtained by observing active_render and |
| // any_coarse_filter_converged under varying call scenarios. They |
| // have further been hand tuned to prefer normal state during uncertain |
| // regions (to avoid echo leaks). |
| |
| // The model is only updated during active render. |
| if (!active_render) |
| return; |
| |
| // Probability of switching from one state to the other. |
| constexpr float kSwitch = 0.000001f; |
| |
| // Probability of observing converged filters in states "normal" and |
| // "transparent" during active render. |
| constexpr float kConvergedNormal = 0.01f; |
| constexpr float kConvergedTransparent = 0.001f; |
| |
| // Probability of transitioning to transparent state from normal state and |
| // transparent state respectively. |
| constexpr float kA[2] = {kSwitch, 1.f - kSwitch}; |
| |
| // Probability of the two observations (converged filter or not converged |
| // filter) in normal state and transparent state respectively. |
| constexpr float kB[2][2] = { |
| {1.f - kConvergedNormal, kConvergedNormal}, |
| {1.f - kConvergedTransparent, kConvergedTransparent}}; |
| |
| // Probability of the two states before the update. |
| const float prob_transparent = prob_transparent_state_; |
| const float prob_normal = 1.f - prob_transparent; |
| |
| // Probability of transitioning to transparent state. |
| const float prob_transition_transparent = |
| prob_normal * kA[0] + prob_transparent * kA[1]; |
| const float prob_transition_normal = 1.f - prob_transition_transparent; |
| |
| // Observed output. |
| const int out = static_cast<int>(any_coarse_filter_converged); |
| |
| // Joint probabilites of the observed output and respective states. |
| const float prob_joint_normal = prob_transition_normal * kB[0][out]; |
| const float prob_joint_transparent = |
| prob_transition_transparent * kB[1][out]; |
| |
| // Conditional probability of transparent state and the observed output. |
| RTC_DCHECK_GT(prob_joint_normal + prob_joint_transparent, 0.f); |
| prob_transparent_state_ = |
| prob_joint_transparent / (prob_joint_normal + prob_joint_transparent); |
| |
| // Transparent mode is only activated when its state probability is high. |
| // Dead zone between activation/deactivation thresholds to avoid switching |
| // back and forth. |
| if (prob_transparent_state_ > 0.95f) { |
| transparency_activated_ = true; |
| } else if (prob_transparent_state_ < 0.5f) { |
| transparency_activated_ = false; |
| } |
| } |
| |
| private: |
| bool transparency_activated_ = false; |
| float prob_transparent_state_ = kInitialTransparentStateProbability; |
| }; |
| |
| // Legacy classifier for toggling transparent mode. |
| class LegacyTransparentModeImpl : public TransparentMode { |
| public: |
| explicit LegacyTransparentModeImpl(const EchoCanceller3Config& config) |
| : linear_and_stable_echo_path_( |
| config.echo_removal_control.linear_and_stable_echo_path), |
| active_blocks_since_sane_filter_(kBlocksSinceConsistentEstimateInit), |
| non_converged_sequence_size_(kBlocksSinceConvergencedFilterInit) {} |
| |
| bool Active() const override { return transparency_activated_; } |
| |
| void Reset() override { |
| non_converged_sequence_size_ = kBlocksSinceConvergencedFilterInit; |
| diverged_sequence_size_ = 0; |
| strong_not_saturated_render_blocks_ = 0; |
| if (linear_and_stable_echo_path_) { |
| recent_convergence_during_activity_ = false; |
| } |
| } |
| |
| void Update(int filter_delay_blocks, |
| bool any_filter_consistent, |
| bool any_filter_converged, |
| bool /* any_coarse_filter_converged */, |
| bool all_filters_diverged, |
| bool active_render, |
| bool saturated_capture) override { |
| ++capture_block_counter_; |
| strong_not_saturated_render_blocks_ += |
| active_render && !saturated_capture ? 1 : 0; |
| |
| if (any_filter_consistent && filter_delay_blocks < 5) { |
| sane_filter_observed_ = true; |
| active_blocks_since_sane_filter_ = 0; |
| } else if (active_render) { |
| ++active_blocks_since_sane_filter_; |
| } |
| |
| bool sane_filter_recently_seen; |
| if (!sane_filter_observed_) { |
| sane_filter_recently_seen = |
| capture_block_counter_ <= 5 * kNumBlocksPerSecond; |
| } else { |
| sane_filter_recently_seen = |
| active_blocks_since_sane_filter_ <= 30 * kNumBlocksPerSecond; |
| } |
| |
| if (any_filter_converged) { |
| recent_convergence_during_activity_ = true; |
| active_non_converged_sequence_size_ = 0; |
| non_converged_sequence_size_ = 0; |
| ++num_converged_blocks_; |
| } else { |
| if (++non_converged_sequence_size_ > 20 * kNumBlocksPerSecond) { |
| num_converged_blocks_ = 0; |
| } |
| |
| if (active_render && |
| ++active_non_converged_sequence_size_ > 60 * kNumBlocksPerSecond) { |
| recent_convergence_during_activity_ = false; |
| } |
| } |
| |
| if (!all_filters_diverged) { |
| diverged_sequence_size_ = 0; |
| } else if (++diverged_sequence_size_ >= 60) { |
| // TODO(peah): Change these lines to ensure proper triggering of usable |
| // filter. |
| non_converged_sequence_size_ = kBlocksSinceConvergencedFilterInit; |
| } |
| |
| if (active_non_converged_sequence_size_ > 60 * kNumBlocksPerSecond) { |
| finite_erl_recently_detected_ = false; |
| } |
| if (num_converged_blocks_ > 50) { |
| finite_erl_recently_detected_ = true; |
| } |
| |
| if (finite_erl_recently_detected_) { |
| transparency_activated_ = false; |
| } else if (sane_filter_recently_seen && |
| recent_convergence_during_activity_) { |
| transparency_activated_ = false; |
| } else { |
| const bool filter_should_have_converged = |
| strong_not_saturated_render_blocks_ > 6 * kNumBlocksPerSecond; |
| transparency_activated_ = filter_should_have_converged; |
| } |
| } |
| |
| private: |
| const bool linear_and_stable_echo_path_; |
| size_t capture_block_counter_ = 0; |
| bool transparency_activated_ = false; |
| size_t active_blocks_since_sane_filter_; |
| bool sane_filter_observed_ = false; |
| bool finite_erl_recently_detected_ = false; |
| size_t non_converged_sequence_size_; |
| size_t diverged_sequence_size_ = 0; |
| size_t active_non_converged_sequence_size_ = 0; |
| size_t num_converged_blocks_ = 0; |
| bool recent_convergence_during_activity_ = false; |
| size_t strong_not_saturated_render_blocks_ = 0; |
| }; |
| |
| std::unique_ptr<TransparentMode> TransparentMode::Create( |
| const EchoCanceller3Config& config) { |
| if (config.ep_strength.bounded_erl || DeactivateTransparentMode()) { |
| RTC_LOG(LS_INFO) << "AEC3 Transparent Mode: Disabled"; |
| return nullptr; |
| } |
| if (ActivateTransparentModeHmm()) { |
| RTC_LOG(LS_INFO) << "AEC3 Transparent Mode: HMM"; |
| return std::make_unique<TransparentModeImpl>(); |
| } |
| RTC_LOG(LS_INFO) << "AEC3 Transparent Mode: Legacy"; |
| return std::make_unique<LegacyTransparentModeImpl>(config); |
| } |
| |
| } // namespace webrtc |