Level estimation and saturation protection stub. The level estimator (AdaptiveModeLevelEstimator) produces a biased estimate of the speech level. In our model, we use another module (the SaturationProtector) to compute the bias. This CL contains the estimator and a stub of the saturation protector. Bug: webrtc:7494 Change-Id: I0df736d0346063f544fa680b4cc84177ea548545 Reviewed-on: https://webrtc-review.googlesource.com/64820 Commit-Queue: Alex Loiko <aleloi@webrtc.org> Reviewed-by: Ivo Creusen <ivoc@webrtc.org> Cr-Commit-Position: refs/heads/master@{#22641}

commit: 1e48e8095c8bb4daf6554233e8bc3d36563c817a [log] [tgz]
author: Alex Loiko <aleloi@webrtc.org> Wed Mar 28 07:45:29 2018
committer: Commit Bot <commit-bot@chromium.org> Wed Mar 28 08:41:45 2018
tree: 3d963c226615952ecc8f5a7c8d8d52576fa0a1d6
parent: e24c41ea45fef7a49a24c5d905957aabcd3ba028 [diff]
diff --git a/modules/audio_processing/BUILD.gn b/modules/audio_processing/BUILD.gn
index c8992b0..b36e76b 100644
--- a/modules/audio_processing/BUILD.gn
+++ b/modules/audio_processing/BUILD.gn

@@ -548,6 +548,7 @@
       "../../test:test_support",
       "../audio_coding:neteq_input_audio_tools",
       "aec_dump:mock_aec_dump_unittests",
+      "agc2:adaptive_digital_unittests",
       "agc2:fixed_digital_unittests",
       "test/conversational_speech:unittest",
       "vad:vad_unittests",

diff --git a/modules/audio_processing/agc2/BUILD.gn b/modules/audio_processing/agc2/BUILD.gn
index 61d17e7..df5ec6cf 100644
--- a/modules/audio_processing/agc2/BUILD.gn
+++ b/modules/audio_processing/agc2/BUILD.gn

@@ -25,6 +25,8 @@
     "adaptive_mode_level_estimator.h",
     "noise_level_estimator.cc",
     "noise_level_estimator.h",
+    "saturation_protector.cc",
+    "saturation_protector.h",
   ]
 
   configs += [ "..:apm_debug_dump" ]
@@ -126,3 +128,25 @@
     "../../../rtc_base:rtc_base_tests_utils",
   ]
 }
+
+rtc_source_set("adaptive_digital_unittests") {
+  testonly = true
+  configs += [ "..:apm_debug_dump" ]
+
+  sources = [
+    "adaptive_mode_level_estimator_unittest.cc",
+  ]
+  deps = [
+    ":adaptive_digital",
+    ":common",
+    ":test_utils",
+    "..:apm_logging",
+    "..:audio_frame_view",
+    "../../../api:array_view",
+    "../../../common_audio",
+    "../../../rtc_base:checks",
+    "../../../rtc_base:rtc_base_approved",
+    "../../../rtc_base:rtc_base_tests_utils",
+    "../vad:vad_with_level",
+  ]
+}

diff --git a/modules/audio_processing/agc2/adaptive_mode_level_estimator.cc b/modules/audio_processing/agc2/adaptive_mode_level_estimator.cc
index e293bab..b190607 100644
--- a/modules/audio_processing/agc2/adaptive_mode_level_estimator.cc
+++ b/modules/audio_processing/agc2/adaptive_mode_level_estimator.cc

@@ -17,7 +17,9 @@
 namespace webrtc {
 
 AdaptiveModeLevelEstimator::AdaptiveModeLevelEstimator(
-    ApmDataDumper* apm_data_dumper) {}
+    ApmDataDumper* apm_data_dumper)
+    : saturation_protector_(apm_data_dumper),
+      apm_data_dumper_(apm_data_dumper) {}
 
 void AdaptiveModeLevelEstimator::UpdateEstimation(
     const VadWithLevel::LevelAndProbability& vad_data) {
@@ -27,10 +29,40 @@
   RTC_DCHECK_LT(vad_data.speech_peak_dbfs, 50.f);
   RTC_DCHECK_GE(vad_data.speech_probability, 0.f);
   RTC_DCHECK_LE(vad_data.speech_probability, 1.f);
+
+  if (vad_data.speech_probability < kVadConfidenceThreshold) {
+    DebugDumpEstimate();
+    return;
+  }
+
+  const bool buffer_is_full = buffer_size_ms_ >= kFullBufferSizeMs;
+  if (!buffer_is_full) {
+    buffer_size_ms_ += kFrameDurationMs;
+  }
+
+  const float leak_factor = buffer_is_full ? kFullBufferLeakFactor : 1.f;
+
+  estimate_numerator_ = estimate_numerator_ * leak_factor +
+                        vad_data.speech_rms_dbfs * vad_data.speech_probability;
+  estimate_denominator_ =
+      estimate_denominator_ * leak_factor + vad_data.speech_probability;
+
+  last_estimate_with_offset_dbfs_ = estimate_numerator_ / estimate_denominator_;
+
+  saturation_protector_.UpdateMargin(vad_data, last_estimate_with_offset_dbfs_);
+  DebugDumpEstimate();
 }
 
 float AdaptiveModeLevelEstimator::LatestLevelEstimate() const {
-  // TODO(webrtc:7494): This is a stub. Add implementation.
-  return 0.f;
+  return rtc::SafeClamp<float>(
+      last_estimate_with_offset_dbfs_ + saturation_protector_.LastMargin(),
+      -90.f, 0.f);
+}
+
+void AdaptiveModeLevelEstimator::DebugDumpEstimate() {
+  apm_data_dumper_->DumpRaw("agc2_adaptive_level_estimate_with_offset_dbfs",
+                            last_estimate_with_offset_dbfs_);
+  apm_data_dumper_->DumpRaw("agc2_adaptive_level_estimate_dbfs",
+                            LatestLevelEstimate());
 }
 }  // namespace webrtc

diff --git a/modules/audio_processing/agc2/adaptive_mode_level_estimator.h b/modules/audio_processing/agc2/adaptive_mode_level_estimator.h
index b8dcf1a..dfcaa53 100644
--- a/modules/audio_processing/agc2/adaptive_mode_level_estimator.h
+++ b/modules/audio_processing/agc2/adaptive_mode_level_estimator.h

@@ -11,6 +11,7 @@
 #ifndef MODULES_AUDIO_PROCESSING_AGC2_ADAPTIVE_MODE_LEVEL_ESTIMATOR_H_
 #define MODULES_AUDIO_PROCESSING_AGC2_ADAPTIVE_MODE_LEVEL_ESTIMATOR_H_
 
+#include "modules/audio_processing/agc2/saturation_protector.h"
 #include "modules/audio_processing/vad/vad_with_level.h"
 
 namespace webrtc {
@@ -21,6 +22,16 @@
   explicit AdaptiveModeLevelEstimator(ApmDataDumper* apm_data_dumper);
   void UpdateEstimation(const VadWithLevel::LevelAndProbability& vad_data);
   float LatestLevelEstimate() const;
+
+ private:
+  void DebugDumpEstimate();
+
+  int buffer_size_ms_ = 0;
+  float last_estimate_with_offset_dbfs_ = kInitialSpeechLevelEstimateDbfs;
+  float estimate_numerator_ = 0.f;
+  float estimate_denominator_ = 0.f;
+  SaturationProtector saturation_protector_;
+  ApmDataDumper* const apm_data_dumper_;
 };
 
 }  // namespace webrtc

diff --git a/modules/audio_processing/agc2/adaptive_mode_level_estimator_unittest.cc b/modules/audio_processing/agc2/adaptive_mode_level_estimator_unittest.cc
new file mode 100644
index 0000000..71909d0
--- /dev/null
+++ b/modules/audio_processing/agc2/adaptive_mode_level_estimator_unittest.cc

@@ -0,0 +1,115 @@
+/*
+ *  Copyright (c) 2018 The WebRTC project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include "modules/audio_processing/agc2/adaptive_mode_level_estimator.h"
+
+#include "modules/audio_processing/agc2/agc2_common.h"
+#include "modules/audio_processing/logging/apm_data_dumper.h"
+#include "rtc_base/gunit.h"
+
+namespace webrtc {
+namespace {
+void RunOnConstantLevel(int num_iterations,
+                        VadWithLevel::LevelAndProbability vad_data,
+                        AdaptiveModeLevelEstimator* level_estimator) {
+  for (int i = 0; i < num_iterations; ++i) {
+    level_estimator->UpdateEstimation(vad_data);  // By copy
+  }
+}
+}  // namespace
+
+TEST(AutomaticGainController2AdaptiveModeLevelEstimator,
+     EstimatorShouldNotCrash) {
+  ApmDataDumper apm_data_dumper(0);
+  AdaptiveModeLevelEstimator level_estimator(&apm_data_dumper);
+
+  VadWithLevel::LevelAndProbability vad_data(1.f, -20.f, -10.f);
+  level_estimator.UpdateEstimation(vad_data);
+  static_cast<void>(level_estimator.LatestLevelEstimate());
+}
+
+TEST(AutomaticGainController2AdaptiveModeLevelEstimator, LevelShouldStabilize) {
+  ApmDataDumper apm_data_dumper(0);
+  AdaptiveModeLevelEstimator level_estimator(&apm_data_dumper);
+
+  constexpr float kSpeechRmsDbfs = -15.f;
+  RunOnConstantLevel(
+      100,
+      VadWithLevel::LevelAndProbability(
+          1.f, kSpeechRmsDbfs - kInitialSaturationMarginDb, kSpeechRmsDbfs),
+      &level_estimator);
+
+  EXPECT_NEAR(level_estimator.LatestLevelEstimate(), kSpeechRmsDbfs, 0.1f);
+}
+
+TEST(AutomaticGainController2AdaptiveModeLevelEstimator,
+     EstimatorIgnoresZeroProbabilityFrames) {
+  ApmDataDumper apm_data_dumper(0);
+  AdaptiveModeLevelEstimator level_estimator(&apm_data_dumper);
+
+  // Run for one second of fake audio.
+  constexpr float kSpeechRmsDbfs = -25.f;
+  RunOnConstantLevel(
+      100,
+      VadWithLevel::LevelAndProbability(
+          1.f, kSpeechRmsDbfs - kInitialSaturationMarginDb, kSpeechRmsDbfs),
+      &level_estimator);
+
+  // Run for one more second, but mark as not speech.
+  constexpr float kNoiseRmsDbfs = 0.f;
+  RunOnConstantLevel(
+      100, VadWithLevel::LevelAndProbability(0.f, kNoiseRmsDbfs, kNoiseRmsDbfs),
+      &level_estimator);
+
+  // Level should not have changed.
+  EXPECT_NEAR(level_estimator.LatestLevelEstimate(), kSpeechRmsDbfs, 0.1f);
+}
+
+TEST(AutomaticGainController2AdaptiveModeLevelEstimator, TimeToAdapt) {
+  ApmDataDumper apm_data_dumper(0);
+  AdaptiveModeLevelEstimator level_estimator(&apm_data_dumper);
+
+  // Run for one 'window size' interval
+  constexpr float kInitialSpeechRmsDbfs = -30.f;
+  RunOnConstantLevel(
+      kFullBufferSizeMs / kFrameDurationMs,
+      VadWithLevel::LevelAndProbability(
+          1.f, kInitialSpeechRmsDbfs - kInitialSaturationMarginDb,
+          kInitialSpeechRmsDbfs),
+      &level_estimator);
+
+  // Run for one half 'window size' interval. This should not be enough to
+  // adapt.
+  constexpr float kDifferentSpeechRmsDbfs = -10.f;
+  // It should at most differ by 25% after one 'window size' interval.
+  const float kMaxDifferenceDb =
+      0.25 * std::abs(kDifferentSpeechRmsDbfs - kInitialSpeechRmsDbfs);
+  RunOnConstantLevel(
+      static_cast<int>(kFullBufferSizeMs / kFrameDurationMs / 2),
+      VadWithLevel::LevelAndProbability(
+          1.f, kDifferentSpeechRmsDbfs - kInitialSaturationMarginDb,
+          kDifferentSpeechRmsDbfs),
+      &level_estimator);
+  EXPECT_GT(
+      std::abs(kDifferentSpeechRmsDbfs - level_estimator.LatestLevelEstimate()),
+      kMaxDifferenceDb);
+
+  // Run for some more time. Afterwards, we should have adapted.
+  RunOnConstantLevel(
+      static_cast<int>(3 * kFullBufferSizeMs / kFrameDurationMs),
+      VadWithLevel::LevelAndProbability(
+          1.f, kDifferentSpeechRmsDbfs - kInitialSaturationMarginDb,
+          kDifferentSpeechRmsDbfs),
+      &level_estimator);
+  EXPECT_NEAR(level_estimator.LatestLevelEstimate(), kDifferentSpeechRmsDbfs,
+              kMaxDifferenceDb);
+}
+
+}  // namespace webrtc

diff --git a/modules/audio_processing/agc2/agc2_common.h b/modules/audio_processing/agc2/agc2_common.h
index ad0ab4e..d4aa3fb 100644
--- a/modules/audio_processing/agc2/agc2_common.h
+++ b/modules/audio_processing/agc2/agc2_common.h

@@ -27,6 +27,18 @@
 
 constexpr float kAttackFilterConstant = 0.f;
 
+// Used in the Level Estimator for deciding when to update the speech
+// level estimate.
+constexpr float kVadConfidenceThreshold = 0.9f;
+
+// The amount of 'memory' of the Level Estimator. Decides leak factors.
+constexpr float kFullBufferSizeMs = 1000.f;
+constexpr float kFullBufferLeakFactor = 1.f - 1.f / kFullBufferSizeMs;
+
+constexpr float kInitialSpeechLevelEstimateDbfs = -30.f;
+
+constexpr float kInitialSaturationMarginDb = 17.f;
+
 // This is computed from kDecayMs by
 // 10 ** (-1/20 * subframe_duration / kDecayMs).
 // |subframe_duration| is |kFrameDurationMs / kSubFramesInFrame|.

diff --git a/modules/audio_processing/agc2/noise_level_estimator.h b/modules/audio_processing/agc2/noise_level_estimator.h
index f22bfd8..f9e4abc 100644
--- a/modules/audio_processing/agc2/noise_level_estimator.h
+++ b/modules/audio_processing/agc2/noise_level_estimator.h

@@ -20,7 +20,7 @@
  public:
   NoiseLevelEstimator() {}
 
-  // Returns the estimated noise level in DbFS.
+  // Returns the estimated noise level in dBFS.
   float Analyze(AudioFrameView<const float> frame);
 
  private:

diff --git a/modules/audio_processing/agc2/saturation_protector.cc b/modules/audio_processing/agc2/saturation_protector.cc
new file mode 100644
index 0000000..a6f1a83
--- /dev/null
+++ b/modules/audio_processing/agc2/saturation_protector.cc

@@ -0,0 +1,29 @@
+/*
+ *  Copyright (c) 2018 The WebRTC project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include "modules/audio_processing/agc2/saturation_protector.h"
+
+#include <algorithm>
+
+#include "modules/audio_processing/logging/apm_data_dumper.h"
+#include "rtc_base/numerics/safe_minmax.h"
+
+namespace webrtc {
+
+SaturationProtector::SaturationProtector(ApmDataDumper* apm_data_dumper) {}
+
+void SaturationProtector::UpdateMargin(
+    const VadWithLevel::LevelAndProbability& vad_data,
+    float last_speech_level_estimate) {}
+
+float SaturationProtector::LastMargin() const {
+  return kInitialSaturationMarginDb;
+}
+}  // namespace webrtc

diff --git a/modules/audio_processing/agc2/saturation_protector.h b/modules/audio_processing/agc2/saturation_protector.h
new file mode 100644
index 0000000..dcf5184
--- /dev/null
+++ b/modules/audio_processing/agc2/saturation_protector.h

@@ -0,0 +1,41 @@
+/*
+ *  Copyright (c) 2018 The WebRTC project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#ifndef MODULES_AUDIO_PROCESSING_AGC2_SATURATION_PROTECTOR_H_
+#define MODULES_AUDIO_PROCESSING_AGC2_SATURATION_PROTECTOR_H_
+
+#include <array>
+
+#include "modules/audio_processing/agc2/agc2_common.h"
+#include "modules/audio_processing/vad/vad_with_level.h"
+
+namespace webrtc {
+
+class ApmDataDumper;
+
+class SaturationProtector {
+ public:
+  explicit SaturationProtector(ApmDataDumper* apm_data_dumper);
+
+  // Update and return margin estimate. This method should be called
+  // whenever a frame is reliably classified as 'speech'.
+  //
+  // Returned value is in DB scale.
+  void UpdateMargin(const VadWithLevel::LevelAndProbability& vad_data,
+                    float last_speech_level_estimate_dbfs);
+
+  // Returns latest computed margin. Used in cases when speech is not
+  // detected.
+  float LastMargin() const;
+};
+
+}  // namespace webrtc
+
+#endif  // MODULES_AUDIO_PROCESSING_AGC2_SATURATION_PROTECTOR_H_
commit	1e48e8095c8bb4daf6554233e8bc3d36563c817a	[log] [tgz]
author	Alex Loiko <aleloi@webrtc.org>	Wed Mar 28 07:45:29 2018
committer	Commit Bot <commit-bot@chromium.org>	Wed Mar 28 08:41:45 2018
tree	3d963c226615952ecc8f5a7c8d8d52576fa0a1d6
parent	e24c41ea45fef7a49a24c5d905957aabcd3ba028 [diff]