Saturation Protector in AGC2.

Another submodule of the Automatic Gain Controller 2. It refines the
biased estimate of the Adaptive Mode Level Estimator. It works by
generating a delayed stream of peak levels. The delayed peaks are
compared to the level estimate.

Bug: webrtc:7494
Change-Id: If4c2c19088d1ca73fb93511dad4e1c8ccabcaf03
Reviewed-on: https://webrtc-review.googlesource.com/65461
Reviewed-by: Ivo Creusen <ivoc@webrtc.org>
Commit-Queue: Alex Loiko <aleloi@webrtc.org>
Cr-Commit-Position: refs/heads/master@{#22732}
diff --git a/modules/audio_processing/agc2/BUILD.gn b/modules/audio_processing/agc2/BUILD.gn
index df5ec6cf..ca71b92 100644
--- a/modules/audio_processing/agc2/BUILD.gn
+++ b/modules/audio_processing/agc2/BUILD.gn
@@ -135,6 +135,7 @@
 
   sources = [
     "adaptive_mode_level_estimator_unittest.cc",
+    "saturation_protector_unittest.cc",
   ]
   deps = [
     ":adaptive_digital",
diff --git a/modules/audio_processing/agc2/adaptive_mode_level_estimator.cc b/modules/audio_processing/agc2/adaptive_mode_level_estimator.cc
index b190607..6aa2e91 100644
--- a/modules/audio_processing/agc2/adaptive_mode_level_estimator.cc
+++ b/modules/audio_processing/agc2/adaptive_mode_level_estimator.cc
@@ -64,5 +64,6 @@
                             last_estimate_with_offset_dbfs_);
   apm_data_dumper_->DumpRaw("agc2_adaptive_level_estimate_dbfs",
                             LatestLevelEstimate());
+  saturation_protector_.DebugDumpEstimate();
 }
 }  // namespace webrtc
diff --git a/modules/audio_processing/agc2/adaptive_mode_level_estimator.h b/modules/audio_processing/agc2/adaptive_mode_level_estimator.h
index dfcaa53..9762f1f 100644
--- a/modules/audio_processing/agc2/adaptive_mode_level_estimator.h
+++ b/modules/audio_processing/agc2/adaptive_mode_level_estimator.h
@@ -26,7 +26,7 @@
  private:
   void DebugDumpEstimate();
 
-  int buffer_size_ms_ = 0;
+  size_t buffer_size_ms_ = 0;
   float last_estimate_with_offset_dbfs_ = kInitialSpeechLevelEstimateDbfs;
   float estimate_numerator_ = 0.f;
   float estimate_denominator_ = 0.f;
diff --git a/modules/audio_processing/agc2/agc2_common.h b/modules/audio_processing/agc2/agc2_common.h
index d4aa3fb..168c66c 100644
--- a/modules/audio_processing/agc2/agc2_common.h
+++ b/modules/audio_processing/agc2/agc2_common.h
@@ -19,7 +19,7 @@
 
 constexpr float kMinFloatS16Value = -32768.f;
 constexpr float kMaxFloatS16Value = 32767.f;
-constexpr double kMaxAbsFloatS16Value = 32768.0;
+constexpr float kMaxAbsFloatS16Value = 32768.0f;
 
 constexpr size_t kFrameDurationMs = 10;
 constexpr size_t kSubFramesInFrame = 20;
@@ -32,13 +32,27 @@
 constexpr float kVadConfidenceThreshold = 0.9f;
 
 // The amount of 'memory' of the Level Estimator. Decides leak factors.
-constexpr float kFullBufferSizeMs = 1000.f;
+constexpr size_t kFullBufferSizeMs = 1000;
 constexpr float kFullBufferLeakFactor = 1.f - 1.f / kFullBufferSizeMs;
 
 constexpr float kInitialSpeechLevelEstimateDbfs = -30.f;
 
+// Saturation Protector settings.
 constexpr float kInitialSaturationMarginDb = 17.f;
 
+constexpr size_t kPeakEnveloperSuperFrameLengthMs = 500;
+
+constexpr size_t kPeakEnveloperBufferSize =
+    kFullBufferSizeMs / kPeakEnveloperSuperFrameLengthMs + 1;
+
+// This value is 10 ** (-1/20 * frame_size_ms / satproc_attack_ms),
+// where satproc_attack_ms is 5000.
+constexpr float kSaturationProtectorAttackConstant = 0.9988493699365052f;
+
+// This value is 10 ** (-1/20 * frame_size_ms / satproc_decay_ms),
+// where satproc_decay_ms is 1000.
+constexpr float kSaturationProtectorDecayConstant = 0.9997697679981565f;
+
 // This is computed from kDecayMs by
 // 10 ** (-1/20 * subframe_duration / kDecayMs).
 // |subframe_duration| is |kFrameDurationMs / kSubFramesInFrame|.
diff --git a/modules/audio_processing/agc2/saturation_protector.cc b/modules/audio_processing/agc2/saturation_protector.cc
index a6f1a83..216e1b6 100644
--- a/modules/audio_processing/agc2/saturation_protector.cc
+++ b/modules/audio_processing/agc2/saturation_protector.cc
@@ -17,13 +17,74 @@
 
 namespace webrtc {
 
-SaturationProtector::SaturationProtector(ApmDataDumper* apm_data_dumper) {}
+namespace {
+void ShiftBuffer(std::array<float, kPeakEnveloperBufferSize>* buffer_) {
+  // Move everything one element back.
+  std::copy(buffer_->begin() + 1, buffer_->end(), buffer_->begin());
+}
+}  // namespace
+
+SaturationProtector::PeakEnveloper::PeakEnveloper() = default;
+
+void SaturationProtector::PeakEnveloper::Process(float frame_peak_dbfs) {
+  // Update the delayed buffer and the current superframe peak.
+  current_superframe_peak_dbfs_ =
+      std::max(current_superframe_peak_dbfs_, frame_peak_dbfs);
+  speech_time_in_estimate_ms_ += kFrameDurationMs;
+  if (speech_time_in_estimate_ms_ > kPeakEnveloperSuperFrameLengthMs) {
+    speech_time_in_estimate_ms_ = 0;
+    const bool buffer_full = elements_in_buffer_ == kPeakEnveloperBufferSize;
+    if (buffer_full) {
+      ShiftBuffer(&peak_delay_buffer_);
+      *peak_delay_buffer_.rbegin() = current_superframe_peak_dbfs_;
+    } else {
+      peak_delay_buffer_[elements_in_buffer_] = current_superframe_peak_dbfs_;
+      elements_in_buffer_++;
+    }
+    current_superframe_peak_dbfs_ = -90.f;
+  }
+}
+
+float SaturationProtector::PeakEnveloper::Query() const {
+  float result;
+  if (elements_in_buffer_ > 0) {
+    result = peak_delay_buffer_[0];
+  } else {
+    result = current_superframe_peak_dbfs_;
+  }
+  return result;
+}
+
+SaturationProtector::SaturationProtector(ApmDataDumper* apm_data_dumper)
+    : apm_data_dumper_(apm_data_dumper) {}
 
 void SaturationProtector::UpdateMargin(
     const VadWithLevel::LevelAndProbability& vad_data,
-    float last_speech_level_estimate) {}
+    float last_speech_level_estimate) {
+  peak_enveloper_.Process(vad_data.speech_peak_dbfs);
+  const float delayed_peak_dbfs = peak_enveloper_.Query();
+  const float difference_db = delayed_peak_dbfs - last_speech_level_estimate;
+
+  if (last_margin_ < difference_db) {
+    last_margin_ = last_margin_ * kSaturationProtectorAttackConstant +
+                   difference_db * (1.f - kSaturationProtectorAttackConstant);
+  } else {
+    last_margin_ = last_margin_ * kSaturationProtectorDecayConstant +
+                   difference_db * (1.f - kSaturationProtectorDecayConstant);
+  }
+
+  last_margin_ = rtc::SafeClamp<float>(last_margin_, 12.f, 25.f);
+}
 
 float SaturationProtector::LastMargin() const {
-  return kInitialSaturationMarginDb;
+  return last_margin_;
 }
+
+void SaturationProtector::DebugDumpEstimate() const {
+  apm_data_dumper_->DumpRaw(
+      "agc2_adaptive_saturation_protector_delayed_peak_dbfs",
+      peak_enveloper_.Query());
+  apm_data_dumper_->DumpRaw("agc2_adaptive_saturation_margin_db", last_margin_);
+}
+
 }  // namespace webrtc
diff --git a/modules/audio_processing/agc2/saturation_protector.h b/modules/audio_processing/agc2/saturation_protector.h
index dcf5184..d330c15 100644
--- a/modules/audio_processing/agc2/saturation_protector.h
+++ b/modules/audio_processing/agc2/saturation_protector.h
@@ -34,6 +34,29 @@
   // Returns latest computed margin. Used in cases when speech is not
   // detected.
   float LastMargin() const;
+
+  void DebugDumpEstimate() const;
+
+ private:
+  // Computes a delayed envelope of peaks.
+  class PeakEnveloper {
+   public:
+    PeakEnveloper();
+    void Process(float frame_peak_dbfs);
+
+    float Query() const;
+
+   private:
+    size_t speech_time_in_estimate_ms_ = 0;
+    float current_superframe_peak_dbfs_ = -90.f;
+    size_t elements_in_buffer_ = 0;
+    std::array<float, kPeakEnveloperBufferSize> peak_delay_buffer_ = {};
+  };
+
+  ApmDataDumper* apm_data_dumper_;
+
+  float last_margin_ = kInitialSaturationMarginDb;
+  PeakEnveloper peak_enveloper_;
 };
 
 }  // namespace webrtc
diff --git a/modules/audio_processing/agc2/saturation_protector_unittest.cc b/modules/audio_processing/agc2/saturation_protector_unittest.cc
new file mode 100644
index 0000000..88da2a2
--- /dev/null
+++ b/modules/audio_processing/agc2/saturation_protector_unittest.cc
@@ -0,0 +1,137 @@
+/*
+ *  Copyright (c) 2018 The WebRTC project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include "modules/audio_processing/agc2/saturation_protector.h"
+
+#include <algorithm>
+
+#include "modules/audio_processing/agc2/agc2_common.h"
+#include "modules/audio_processing/logging/apm_data_dumper.h"
+#include "rtc_base/gunit.h"
+
+namespace webrtc {
+namespace {
+float RunOnConstantLevel(int num_iterations,
+                         VadWithLevel::LevelAndProbability vad_data,
+                         float estimated_level_dbfs,
+                         SaturationProtector* saturation_protector) {
+  float last_margin = saturation_protector->LastMargin();
+  float max_difference = 0.f;
+  for (int i = 0; i < num_iterations; ++i) {
+    saturation_protector->UpdateMargin(vad_data, estimated_level_dbfs);
+    const float new_margin = saturation_protector->LastMargin();
+    max_difference =
+        std::max(max_difference, std::abs(new_margin - last_margin));
+    last_margin = new_margin;
+  }
+  return max_difference;
+}
+}  // namespace
+
+TEST(AutomaticGainController2SaturationProtector, ProtectorShouldNotCrash) {
+  ApmDataDumper apm_data_dumper(0);
+  SaturationProtector saturation_protector(&apm_data_dumper);
+  VadWithLevel::LevelAndProbability vad_data(1.f, -20.f, -10.f);
+
+  saturation_protector.UpdateMargin(vad_data, -20.f);
+  static_cast<void>(saturation_protector.LastMargin());
+  saturation_protector.DebugDumpEstimate();
+}
+
+// Check that the estimate converges to the ratio between peaks and
+// level estimator values after a while.
+TEST(AutomaticGainController2SaturationProtector,
+     ProtectorEstimatesCrestRatio) {
+  ApmDataDumper apm_data_dumper(0);
+  SaturationProtector saturation_protector(&apm_data_dumper);
+
+  constexpr float kPeakLevel = -20.f;
+  constexpr float kCrestFactor = kInitialSaturationMarginDb + 1.f;
+  constexpr float kSpeechLevel = kPeakLevel - kCrestFactor;
+  const float kMaxDifference =
+      0.5 * std::abs(kInitialSaturationMarginDb - kCrestFactor);
+
+  static_cast<void>(RunOnConstantLevel(
+      2000, VadWithLevel::LevelAndProbability(1.f, -90.f, kPeakLevel),
+      kSpeechLevel, &saturation_protector));
+
+  EXPECT_NEAR(saturation_protector.LastMargin(), kCrestFactor, kMaxDifference);
+}
+
+TEST(AutomaticGainController2SaturationProtector, ProtectorChangesSlowly) {
+  ApmDataDumper apm_data_dumper(0);
+  SaturationProtector saturation_protector(&apm_data_dumper);
+
+  constexpr float kPeakLevel = -20.f;
+  constexpr float kCrestFactor = kInitialSaturationMarginDb - 5.f;
+  constexpr float kOtherCrestFactor = kInitialSaturationMarginDb;
+  constexpr float kSpeechLevel = kPeakLevel - kCrestFactor;
+  constexpr float kOtherSpeechLevel = kPeakLevel - kOtherCrestFactor;
+
+  constexpr int kNumIterations = 1000;
+  float max_difference = RunOnConstantLevel(
+      kNumIterations, VadWithLevel::LevelAndProbability(1.f, -90.f, kPeakLevel),
+      kSpeechLevel, &saturation_protector);
+
+  max_difference =
+      std::max(RunOnConstantLevel(
+                   kNumIterations,
+                   VadWithLevel::LevelAndProbability(1.f, -90.f, kPeakLevel),
+                   kOtherSpeechLevel, &saturation_protector),
+               max_difference);
+
+  constexpr float kMaxChangeSpeedDbPerSecond = 0.5;  // 1 db / 2 seconds.
+
+  EXPECT_LE(max_difference,
+            kMaxChangeSpeedDbPerSecond / 1000 * kFrameDurationMs);
+}
+
+TEST(AutomaticGainController2SaturationProtector,
+     ProtectorAdaptsToDelayedChanges) {
+  ApmDataDumper apm_data_dumper(0);
+  SaturationProtector saturation_protector(&apm_data_dumper);
+
+  constexpr int kDelayIterations = kFullBufferSizeMs / kFrameDurationMs;
+  constexpr float kInitialSpeechLevelDbfs = -30;
+  constexpr float kLaterSpeechLevelDbfs = -15;
+
+  // First run on initial level.
+  float max_difference = RunOnConstantLevel(
+      kDelayIterations,
+      VadWithLevel::LevelAndProbability(
+          1.f, -90.f, kInitialSpeechLevelDbfs + kInitialSaturationMarginDb),
+      kInitialSpeechLevelDbfs, &saturation_protector);
+
+  // Then peak changes, but not RMS.
+  max_difference = std::max(
+      RunOnConstantLevel(
+          kDelayIterations,
+          VadWithLevel::LevelAndProbability(
+              1.f, -90.f, kLaterSpeechLevelDbfs + kInitialSaturationMarginDb),
+          kInitialSpeechLevelDbfs, &saturation_protector),
+      max_difference);
+
+  // Then both change.
+  max_difference = std::max(
+      RunOnConstantLevel(
+          kDelayIterations,
+          VadWithLevel::LevelAndProbability(
+              1.f, -90.f, kLaterSpeechLevelDbfs + kInitialSaturationMarginDb),
+          kLaterSpeechLevelDbfs, &saturation_protector),
+      max_difference);
+
+  const float total_difference =
+      std::abs(saturation_protector.LastMargin() - kInitialSaturationMarginDb);
+
+  EXPECT_LE(total_difference, 0.05f);
+  EXPECT_LE(max_difference, 0.01f);
+}
+
+}  // namespace webrtc