Reduce complexity in the APM pipeline when the output is not used This CL selectively turns off parts of the audio processing when the output of APM is not used. The parts turned off are such that don't need to continuously need to be trained, but rather can be temporarily deactivated. The purpose of this CL is to allow CPU to be reduced when the client is muted. The CL will be follow by additional CLs, adding similar functionality in the echo canceller and the noiser suppressor Bug: b/177830919 Change-Id: I72d24505197a53872562c0955f3e7b670c43df6b Reviewed-on: https://webrtc-review.googlesource.com/c/src/+/209703 Commit-Queue: Per Åhgren <peah@webrtc.org> Reviewed-by: Sam Zackrisson <saza@webrtc.org> Cr-Commit-Position: refs/heads/master@{#33431}

commit: aa6adffba325f4b698a1e94aeab020bfdc47adec [log] [tgz]
author: Per Åhgren <peah@webrtc.org> Thu Mar 11 08:57:07 2021
committer: Commit Bot <commit-bot@chromium.org> Thu Mar 11 10:06:58 2021
tree: 9087c5b64e513d40f9b189153c2db43dd17e8737
parent: 54dbc3be3fdbb77554d47a4bcede94f6cb8ad602 [diff]
diff --git a/modules/audio_processing/audio_processing_impl.cc b/modules/audio_processing/audio_processing_impl.cc
index 79a3151..93dc080 100644
--- a/modules/audio_processing/audio_processing_impl.cc
+++ b/modules/audio_processing/audio_processing_impl.cc

@@ -115,6 +115,10 @@
   RTC_CHECK_NOTREACHED();
 }
 
+bool MinimizeProcessingForUnusedOutput() {
+  return !field_trial::IsEnabled("WebRTC-MutedStateKillSwitch");
+}
+
 // Maximum lengths that frame of samples being passed from the render side to
 // the capture side can have (does not apply to AEC3).
 static const size_t kMaxAllowedValuesOfSamplesPerBand = 160;
@@ -267,7 +271,9 @@
                      "WebRTC-ApmExperimentalMultiChannelRenderKillSwitch"),
                  !field_trial::IsEnabled(
                      "WebRTC-ApmExperimentalMultiChannelCaptureKillSwitch"),
-                 EnforceSplitBandHpf()),
+                 EnforceSplitBandHpf(),
+                 MinimizeProcessingForUnusedOutput()),
+      capture_(),
       capture_nonlocked_() {
   RTC_LOG(LS_INFO) << "Injected APM submodules:"
                       "\nEcho control factory: "
@@ -667,7 +673,9 @@
 
 void AudioProcessingImpl::HandleCaptureOutputUsedSetting(
     bool capture_output_used) {
-  capture_.capture_output_used = capture_output_used;
+  capture_.capture_output_used =
+      capture_output_used || !constants_.minimize_processing_for_unused_output;
+
   if (submodules_.agc_manager.get()) {
     submodules_.agc_manager->HandleCaptureOutputUsedChange(
         capture_.capture_output_used);
@@ -874,11 +882,7 @@
 void AudioProcessingImpl::HandleOverrunInCaptureRuntimeSettingsQueue() {
   // Fall back to a safe state for the case when a setting for capture output
   // usage setting has been missed.
-  capture_.capture_output_used = true;
-  if (submodules_.echo_controller) {
-    submodules_.echo_controller->SetCaptureOutputUsage(
-        capture_.capture_output_used);
-  }
+  HandleCaptureOutputUsedSetting(/*capture_output_used=*/true);
 }
 
 void AudioProcessingImpl::HandleRenderRuntimeSettings() {
@@ -1226,87 +1230,101 @@
         capture_buffer, /*stream_has_echo*/ false));
   }
 
-  if (submodule_states_.CaptureMultiBandProcessingPresent() &&
-      SampleRateSupportsMultiBand(
-          capture_nonlocked_.capture_processing_format.sample_rate_hz())) {
-    capture_buffer->MergeFrequencyBands();
-  }
-
-  if (capture_.capture_fullband_audio) {
-    const auto& ec = submodules_.echo_controller;
-    bool ec_active = ec ? ec->ActiveProcessing() : false;
-    // Only update the fullband buffer if the multiband processing has changed
-    // the signal. Keep the original signal otherwise.
-    if (submodule_states_.CaptureMultiBandProcessingActive(ec_active)) {
-      capture_buffer->CopyTo(capture_.capture_fullband_audio.get());
+  capture_.stats.output_rms_dbfs = absl::nullopt;
+  if (capture_.capture_output_used) {
+    if (submodule_states_.CaptureMultiBandProcessingPresent() &&
+        SampleRateSupportsMultiBand(
+            capture_nonlocked_.capture_processing_format.sample_rate_hz())) {
+      capture_buffer->MergeFrequencyBands();
     }
-    capture_buffer = capture_.capture_fullband_audio.get();
+
+    if (capture_.capture_fullband_audio) {
+      const auto& ec = submodules_.echo_controller;
+      bool ec_active = ec ? ec->ActiveProcessing() : false;
+      // Only update the fullband buffer if the multiband processing has changed
+      // the signal. Keep the original signal otherwise.
+      if (submodule_states_.CaptureMultiBandProcessingActive(ec_active)) {
+        capture_buffer->CopyTo(capture_.capture_fullband_audio.get());
+      }
+      capture_buffer = capture_.capture_fullband_audio.get();
+    }
+
+    if (config_.residual_echo_detector.enabled) {
+      RTC_DCHECK(submodules_.echo_detector);
+      submodules_.echo_detector->AnalyzeCaptureAudio(
+          rtc::ArrayView<const float>(capture_buffer->channels()[0],
+                                      capture_buffer->num_frames()));
+    }
+
+    // TODO(aluebs): Investigate if the transient suppression placement should
+    // be before or after the AGC.
+    if (submodules_.transient_suppressor) {
+      float voice_probability =
+          submodules_.agc_manager.get()
+              ? submodules_.agc_manager->voice_probability()
+              : 1.f;
+
+      submodules_.transient_suppressor->Suppress(
+          capture_buffer->channels()[0], capture_buffer->num_frames(),
+          capture_buffer->num_channels(),
+          capture_buffer->split_bands_const(0)[kBand0To8kHz],
+          capture_buffer->num_frames_per_band(),
+          capture_.keyboard_info.keyboard_data,
+          capture_.keyboard_info.num_keyboard_frames, voice_probability,
+          capture_.key_pressed);
+    }
+
+    // Experimental APM sub-module that analyzes |capture_buffer|.
+    if (submodules_.capture_analyzer) {
+      submodules_.capture_analyzer->Analyze(capture_buffer);
+    }
+
+    if (submodules_.gain_controller2) {
+      submodules_.gain_controller2->NotifyAnalogLevel(
+          recommended_stream_analog_level_locked());
+      submodules_.gain_controller2->Process(capture_buffer);
+    }
+
+    if (submodules_.capture_post_processor) {
+      submodules_.capture_post_processor->Process(capture_buffer);
+    }
+
+    // The level estimator operates on the recombined data.
+    if (config_.level_estimation.enabled) {
+      submodules_.output_level_estimator->ProcessStream(*capture_buffer);
+      capture_.stats.output_rms_dbfs =
+          submodules_.output_level_estimator->RMS();
+    }
+
+    capture_output_rms_.Analyze(rtc::ArrayView<const float>(
+        capture_buffer->channels_const()[0],
+        capture_nonlocked_.capture_processing_format.num_frames()));
+    if (log_rms) {
+      RmsLevel::Levels levels = capture_output_rms_.AverageAndPeak();
+      RTC_HISTOGRAM_COUNTS_LINEAR(
+          "WebRTC.Audio.ApmCaptureOutputLevelAverageRms", levels.average, 1,
+          RmsLevel::kMinLevelDb, 64);
+      RTC_HISTOGRAM_COUNTS_LINEAR("WebRTC.Audio.ApmCaptureOutputLevelPeakRms",
+                                  levels.peak, 1, RmsLevel::kMinLevelDb, 64);
+    }
+
+    if (submodules_.agc_manager) {
+      int level = recommended_stream_analog_level_locked();
+      data_dumper_->DumpRaw("experimental_gain_control_stream_analog_level", 1,
+                            &level);
+    }
+
+    // Compute echo-detector stats.
+    if (config_.residual_echo_detector.enabled) {
+      RTC_DCHECK(submodules_.echo_detector);
+      auto ed_metrics = submodules_.echo_detector->GetMetrics();
+      capture_.stats.residual_echo_likelihood = ed_metrics.echo_likelihood;
+      capture_.stats.residual_echo_likelihood_recent_max =
+          ed_metrics.echo_likelihood_recent_max;
+    }
   }
 
-  if (config_.residual_echo_detector.enabled) {
-    RTC_DCHECK(submodules_.echo_detector);
-    submodules_.echo_detector->AnalyzeCaptureAudio(rtc::ArrayView<const float>(
-        capture_buffer->channels()[0], capture_buffer->num_frames()));
-  }
-
-  // TODO(aluebs): Investigate if the transient suppression placement should be
-  // before or after the AGC.
-  if (submodules_.transient_suppressor) {
-    float voice_probability = submodules_.agc_manager.get()
-                                  ? submodules_.agc_manager->voice_probability()
-                                  : 1.f;
-
-    submodules_.transient_suppressor->Suppress(
-        capture_buffer->channels()[0], capture_buffer->num_frames(),
-        capture_buffer->num_channels(),
-        capture_buffer->split_bands_const(0)[kBand0To8kHz],
-        capture_buffer->num_frames_per_band(),
-        capture_.keyboard_info.keyboard_data,
-        capture_.keyboard_info.num_keyboard_frames, voice_probability,
-        capture_.key_pressed);
-  }
-
-  // Experimental APM sub-module that analyzes |capture_buffer|.
-  if (submodules_.capture_analyzer) {
-    submodules_.capture_analyzer->Analyze(capture_buffer);
-  }
-
-  if (submodules_.gain_controller2) {
-    submodules_.gain_controller2->NotifyAnalogLevel(
-        recommended_stream_analog_level_locked());
-    submodules_.gain_controller2->Process(capture_buffer);
-  }
-
-  if (submodules_.capture_post_processor) {
-    submodules_.capture_post_processor->Process(capture_buffer);
-  }
-
-  // The level estimator operates on the recombined data.
-  if (config_.level_estimation.enabled) {
-    submodules_.output_level_estimator->ProcessStream(*capture_buffer);
-    capture_.stats.output_rms_dbfs = submodules_.output_level_estimator->RMS();
-  } else {
-    capture_.stats.output_rms_dbfs = absl::nullopt;
-  }
-
-  capture_output_rms_.Analyze(rtc::ArrayView<const float>(
-      capture_buffer->channels_const()[0],
-      capture_nonlocked_.capture_processing_format.num_frames()));
-  if (log_rms) {
-    RmsLevel::Levels levels = capture_output_rms_.AverageAndPeak();
-    RTC_HISTOGRAM_COUNTS_LINEAR("WebRTC.Audio.ApmCaptureOutputLevelAverageRms",
-                                levels.average, 1, RmsLevel::kMinLevelDb, 64);
-    RTC_HISTOGRAM_COUNTS_LINEAR("WebRTC.Audio.ApmCaptureOutputLevelPeakRms",
-                                levels.peak, 1, RmsLevel::kMinLevelDb, 64);
-  }
-
-  if (submodules_.agc_manager) {
-    int level = recommended_stream_analog_level_locked();
-    data_dumper_->DumpRaw("experimental_gain_control_stream_analog_level", 1,
-                          &level);
-  }
-
-  // Compute echo-related stats.
+  // Compute echo-controller stats.
   if (submodules_.echo_controller) {
     auto ec_metrics = submodules_.echo_controller->GetMetrics();
     capture_.stats.echo_return_loss = ec_metrics.echo_return_loss;
@@ -1314,13 +1332,6 @@
         ec_metrics.echo_return_loss_enhancement;
     capture_.stats.delay_ms = ec_metrics.delay_ms;
   }
-  if (config_.residual_echo_detector.enabled) {
-    RTC_DCHECK(submodules_.echo_detector);
-    auto ed_metrics = submodules_.echo_detector->GetMetrics();
-    capture_.stats.residual_echo_likelihood = ed_metrics.echo_likelihood;
-    capture_.stats.residual_echo_likelihood_recent_max =
-        ed_metrics.echo_likelihood_recent_max;
-  }
 
   // Pass stats for reporting.
   stats_reporter_.UpdateStatistics(capture_.stats);

diff --git a/modules/audio_processing/audio_processing_impl.h b/modules/audio_processing/audio_processing_impl.h
index 8306ac7..c4bbf11 100644
--- a/modules/audio_processing/audio_processing_impl.h
+++ b/modules/audio_processing/audio_processing_impl.h

@@ -419,13 +419,17 @@
   const struct ApmConstants {
     ApmConstants(bool multi_channel_render_support,
                  bool multi_channel_capture_support,
-                 bool enforce_split_band_hpf)
+                 bool enforce_split_band_hpf,
+                 bool minimize_processing_for_unused_output)
         : multi_channel_render_support(multi_channel_render_support),
           multi_channel_capture_support(multi_channel_capture_support),
-          enforce_split_band_hpf(enforce_split_band_hpf) {}
+          enforce_split_band_hpf(enforce_split_band_hpf),
+          minimize_processing_for_unused_output(
+              minimize_processing_for_unused_output) {}
     bool multi_channel_render_support;
     bool multi_channel_capture_support;
     bool enforce_split_band_hpf;
+    bool minimize_processing_for_unused_output;
   } constants_;
 
   struct ApmCaptureState {
commit	aa6adffba325f4b698a1e94aeab020bfdc47adec	[log] [tgz]
author	Per Åhgren <peah@webrtc.org>	Thu Mar 11 08:57:07 2021
committer	Commit Bot <commit-bot@chromium.org>	Thu Mar 11 10:06:58 2021
tree	9087c5b64e513d40f9b189153c2db43dd17e8737
parent	54dbc3be3fdbb77554d47a4bcede94f6cb8ad602 [diff]