Saturation Protector in AGC2.
Another submodule of the Automatic Gain Controller 2. It refines the
biased estimate of the Adaptive Mode Level Estimator. It works by
generating a delayed stream of peak levels. The delayed peaks are
compared to the level estimate.
Bug: webrtc:7494
Change-Id: If4c2c19088d1ca73fb93511dad4e1c8ccabcaf03
Reviewed-on: https://webrtc-review.googlesource.com/65461
Reviewed-by: Ivo Creusen <ivoc@webrtc.org>
Commit-Queue: Alex Loiko <aleloi@webrtc.org>
Cr-Commit-Position: refs/heads/master@{#22732}
diff --git a/modules/audio_processing/agc2/BUILD.gn b/modules/audio_processing/agc2/BUILD.gn
index df5ec6cf..ca71b92 100644
--- a/modules/audio_processing/agc2/BUILD.gn
+++ b/modules/audio_processing/agc2/BUILD.gn
@@ -135,6 +135,7 @@
sources = [
"adaptive_mode_level_estimator_unittest.cc",
+ "saturation_protector_unittest.cc",
]
deps = [
":adaptive_digital",
diff --git a/modules/audio_processing/agc2/adaptive_mode_level_estimator.cc b/modules/audio_processing/agc2/adaptive_mode_level_estimator.cc
index b190607..6aa2e91 100644
--- a/modules/audio_processing/agc2/adaptive_mode_level_estimator.cc
+++ b/modules/audio_processing/agc2/adaptive_mode_level_estimator.cc
@@ -64,5 +64,6 @@
last_estimate_with_offset_dbfs_);
apm_data_dumper_->DumpRaw("agc2_adaptive_level_estimate_dbfs",
LatestLevelEstimate());
+ saturation_protector_.DebugDumpEstimate();
}
} // namespace webrtc
diff --git a/modules/audio_processing/agc2/adaptive_mode_level_estimator.h b/modules/audio_processing/agc2/adaptive_mode_level_estimator.h
index dfcaa53..9762f1f 100644
--- a/modules/audio_processing/agc2/adaptive_mode_level_estimator.h
+++ b/modules/audio_processing/agc2/adaptive_mode_level_estimator.h
@@ -26,7 +26,7 @@
private:
void DebugDumpEstimate();
- int buffer_size_ms_ = 0;
+ size_t buffer_size_ms_ = 0;
float last_estimate_with_offset_dbfs_ = kInitialSpeechLevelEstimateDbfs;
float estimate_numerator_ = 0.f;
float estimate_denominator_ = 0.f;
diff --git a/modules/audio_processing/agc2/agc2_common.h b/modules/audio_processing/agc2/agc2_common.h
index d4aa3fb..168c66c 100644
--- a/modules/audio_processing/agc2/agc2_common.h
+++ b/modules/audio_processing/agc2/agc2_common.h
@@ -19,7 +19,7 @@
constexpr float kMinFloatS16Value = -32768.f;
constexpr float kMaxFloatS16Value = 32767.f;
-constexpr double kMaxAbsFloatS16Value = 32768.0;
+constexpr float kMaxAbsFloatS16Value = 32768.0f;
constexpr size_t kFrameDurationMs = 10;
constexpr size_t kSubFramesInFrame = 20;
@@ -32,13 +32,27 @@
constexpr float kVadConfidenceThreshold = 0.9f;
// The amount of 'memory' of the Level Estimator. Decides leak factors.
-constexpr float kFullBufferSizeMs = 1000.f;
+constexpr size_t kFullBufferSizeMs = 1000;
constexpr float kFullBufferLeakFactor = 1.f - 1.f / kFullBufferSizeMs;
constexpr float kInitialSpeechLevelEstimateDbfs = -30.f;
+// Saturation Protector settings.
constexpr float kInitialSaturationMarginDb = 17.f;
+constexpr size_t kPeakEnveloperSuperFrameLengthMs = 500;
+
+constexpr size_t kPeakEnveloperBufferSize =
+ kFullBufferSizeMs / kPeakEnveloperSuperFrameLengthMs + 1;
+
+// This value is 10 ** (-1/20 * frame_size_ms / satproc_attack_ms),
+// where satproc_attack_ms is 5000.
+constexpr float kSaturationProtectorAttackConstant = 0.9988493699365052f;
+
+// This value is 10 ** (-1/20 * frame_size_ms / satproc_decay_ms),
+// where satproc_decay_ms is 1000.
+constexpr float kSaturationProtectorDecayConstant = 0.9997697679981565f;
+
// This is computed from kDecayMs by
// 10 ** (-1/20 * subframe_duration / kDecayMs).
// |subframe_duration| is |kFrameDurationMs / kSubFramesInFrame|.
diff --git a/modules/audio_processing/agc2/saturation_protector.cc b/modules/audio_processing/agc2/saturation_protector.cc
index a6f1a83..216e1b6 100644
--- a/modules/audio_processing/agc2/saturation_protector.cc
+++ b/modules/audio_processing/agc2/saturation_protector.cc
@@ -17,13 +17,74 @@
namespace webrtc {
-SaturationProtector::SaturationProtector(ApmDataDumper* apm_data_dumper) {}
+namespace {
+void ShiftBuffer(std::array<float, kPeakEnveloperBufferSize>* buffer_) {
+ // Move everything one element back.
+ std::copy(buffer_->begin() + 1, buffer_->end(), buffer_->begin());
+}
+} // namespace
+
+SaturationProtector::PeakEnveloper::PeakEnveloper() = default;
+
+void SaturationProtector::PeakEnveloper::Process(float frame_peak_dbfs) {
+ // Update the delayed buffer and the current superframe peak.
+ current_superframe_peak_dbfs_ =
+ std::max(current_superframe_peak_dbfs_, frame_peak_dbfs);
+ speech_time_in_estimate_ms_ += kFrameDurationMs;
+ if (speech_time_in_estimate_ms_ > kPeakEnveloperSuperFrameLengthMs) {
+ speech_time_in_estimate_ms_ = 0;
+ const bool buffer_full = elements_in_buffer_ == kPeakEnveloperBufferSize;
+ if (buffer_full) {
+ ShiftBuffer(&peak_delay_buffer_);
+ *peak_delay_buffer_.rbegin() = current_superframe_peak_dbfs_;
+ } else {
+ peak_delay_buffer_[elements_in_buffer_] = current_superframe_peak_dbfs_;
+ elements_in_buffer_++;
+ }
+ current_superframe_peak_dbfs_ = -90.f;
+ }
+}
+
+float SaturationProtector::PeakEnveloper::Query() const {
+ float result;
+ if (elements_in_buffer_ > 0) {
+ result = peak_delay_buffer_[0];
+ } else {
+ result = current_superframe_peak_dbfs_;
+ }
+ return result;
+}
+
+SaturationProtector::SaturationProtector(ApmDataDumper* apm_data_dumper)
+ : apm_data_dumper_(apm_data_dumper) {}
void SaturationProtector::UpdateMargin(
const VadWithLevel::LevelAndProbability& vad_data,
- float last_speech_level_estimate) {}
+ float last_speech_level_estimate) {
+ peak_enveloper_.Process(vad_data.speech_peak_dbfs);
+ const float delayed_peak_dbfs = peak_enveloper_.Query();
+ const float difference_db = delayed_peak_dbfs - last_speech_level_estimate;
+
+ if (last_margin_ < difference_db) {
+ last_margin_ = last_margin_ * kSaturationProtectorAttackConstant +
+ difference_db * (1.f - kSaturationProtectorAttackConstant);
+ } else {
+ last_margin_ = last_margin_ * kSaturationProtectorDecayConstant +
+ difference_db * (1.f - kSaturationProtectorDecayConstant);
+ }
+
+ last_margin_ = rtc::SafeClamp<float>(last_margin_, 12.f, 25.f);
+}
float SaturationProtector::LastMargin() const {
- return kInitialSaturationMarginDb;
+ return last_margin_;
}
+
+void SaturationProtector::DebugDumpEstimate() const {
+ apm_data_dumper_->DumpRaw(
+ "agc2_adaptive_saturation_protector_delayed_peak_dbfs",
+ peak_enveloper_.Query());
+ apm_data_dumper_->DumpRaw("agc2_adaptive_saturation_margin_db", last_margin_);
+}
+
} // namespace webrtc
diff --git a/modules/audio_processing/agc2/saturation_protector.h b/modules/audio_processing/agc2/saturation_protector.h
index dcf5184..d330c15 100644
--- a/modules/audio_processing/agc2/saturation_protector.h
+++ b/modules/audio_processing/agc2/saturation_protector.h
@@ -34,6 +34,29 @@
// Returns latest computed margin. Used in cases when speech is not
// detected.
float LastMargin() const;
+
+ void DebugDumpEstimate() const;
+
+ private:
+ // Computes a delayed envelope of peaks.
+ class PeakEnveloper {
+ public:
+ PeakEnveloper();
+ void Process(float frame_peak_dbfs);
+
+ float Query() const;
+
+ private:
+ size_t speech_time_in_estimate_ms_ = 0;
+ float current_superframe_peak_dbfs_ = -90.f;
+ size_t elements_in_buffer_ = 0;
+ std::array<float, kPeakEnveloperBufferSize> peak_delay_buffer_ = {};
+ };
+
+ ApmDataDumper* apm_data_dumper_;
+
+ float last_margin_ = kInitialSaturationMarginDb;
+ PeakEnveloper peak_enveloper_;
};
} // namespace webrtc
diff --git a/modules/audio_processing/agc2/saturation_protector_unittest.cc b/modules/audio_processing/agc2/saturation_protector_unittest.cc
new file mode 100644
index 0000000..88da2a2
--- /dev/null
+++ b/modules/audio_processing/agc2/saturation_protector_unittest.cc
@@ -0,0 +1,137 @@
+/*
+ * Copyright (c) 2018 The WebRTC project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include "modules/audio_processing/agc2/saturation_protector.h"
+
+#include <algorithm>
+
+#include "modules/audio_processing/agc2/agc2_common.h"
+#include "modules/audio_processing/logging/apm_data_dumper.h"
+#include "rtc_base/gunit.h"
+
+namespace webrtc {
+namespace {
+float RunOnConstantLevel(int num_iterations,
+ VadWithLevel::LevelAndProbability vad_data,
+ float estimated_level_dbfs,
+ SaturationProtector* saturation_protector) {
+ float last_margin = saturation_protector->LastMargin();
+ float max_difference = 0.f;
+ for (int i = 0; i < num_iterations; ++i) {
+ saturation_protector->UpdateMargin(vad_data, estimated_level_dbfs);
+ const float new_margin = saturation_protector->LastMargin();
+ max_difference =
+ std::max(max_difference, std::abs(new_margin - last_margin));
+ last_margin = new_margin;
+ }
+ return max_difference;
+}
+} // namespace
+
+TEST(AutomaticGainController2SaturationProtector, ProtectorShouldNotCrash) {
+ ApmDataDumper apm_data_dumper(0);
+ SaturationProtector saturation_protector(&apm_data_dumper);
+ VadWithLevel::LevelAndProbability vad_data(1.f, -20.f, -10.f);
+
+ saturation_protector.UpdateMargin(vad_data, -20.f);
+ static_cast<void>(saturation_protector.LastMargin());
+ saturation_protector.DebugDumpEstimate();
+}
+
+// Check that the estimate converges to the ratio between peaks and
+// level estimator values after a while.
+TEST(AutomaticGainController2SaturationProtector,
+ ProtectorEstimatesCrestRatio) {
+ ApmDataDumper apm_data_dumper(0);
+ SaturationProtector saturation_protector(&apm_data_dumper);
+
+ constexpr float kPeakLevel = -20.f;
+ constexpr float kCrestFactor = kInitialSaturationMarginDb + 1.f;
+ constexpr float kSpeechLevel = kPeakLevel - kCrestFactor;
+ const float kMaxDifference =
+ 0.5 * std::abs(kInitialSaturationMarginDb - kCrestFactor);
+
+ static_cast<void>(RunOnConstantLevel(
+ 2000, VadWithLevel::LevelAndProbability(1.f, -90.f, kPeakLevel),
+ kSpeechLevel, &saturation_protector));
+
+ EXPECT_NEAR(saturation_protector.LastMargin(), kCrestFactor, kMaxDifference);
+}
+
+TEST(AutomaticGainController2SaturationProtector, ProtectorChangesSlowly) {
+ ApmDataDumper apm_data_dumper(0);
+ SaturationProtector saturation_protector(&apm_data_dumper);
+
+ constexpr float kPeakLevel = -20.f;
+ constexpr float kCrestFactor = kInitialSaturationMarginDb - 5.f;
+ constexpr float kOtherCrestFactor = kInitialSaturationMarginDb;
+ constexpr float kSpeechLevel = kPeakLevel - kCrestFactor;
+ constexpr float kOtherSpeechLevel = kPeakLevel - kOtherCrestFactor;
+
+ constexpr int kNumIterations = 1000;
+ float max_difference = RunOnConstantLevel(
+ kNumIterations, VadWithLevel::LevelAndProbability(1.f, -90.f, kPeakLevel),
+ kSpeechLevel, &saturation_protector);
+
+ max_difference =
+ std::max(RunOnConstantLevel(
+ kNumIterations,
+ VadWithLevel::LevelAndProbability(1.f, -90.f, kPeakLevel),
+ kOtherSpeechLevel, &saturation_protector),
+ max_difference);
+
+ constexpr float kMaxChangeSpeedDbPerSecond = 0.5; // 1 db / 2 seconds.
+
+ EXPECT_LE(max_difference,
+ kMaxChangeSpeedDbPerSecond / 1000 * kFrameDurationMs);
+}
+
+TEST(AutomaticGainController2SaturationProtector,
+ ProtectorAdaptsToDelayedChanges) {
+ ApmDataDumper apm_data_dumper(0);
+ SaturationProtector saturation_protector(&apm_data_dumper);
+
+ constexpr int kDelayIterations = kFullBufferSizeMs / kFrameDurationMs;
+ constexpr float kInitialSpeechLevelDbfs = -30;
+ constexpr float kLaterSpeechLevelDbfs = -15;
+
+ // First run on initial level.
+ float max_difference = RunOnConstantLevel(
+ kDelayIterations,
+ VadWithLevel::LevelAndProbability(
+ 1.f, -90.f, kInitialSpeechLevelDbfs + kInitialSaturationMarginDb),
+ kInitialSpeechLevelDbfs, &saturation_protector);
+
+ // Then peak changes, but not RMS.
+ max_difference = std::max(
+ RunOnConstantLevel(
+ kDelayIterations,
+ VadWithLevel::LevelAndProbability(
+ 1.f, -90.f, kLaterSpeechLevelDbfs + kInitialSaturationMarginDb),
+ kInitialSpeechLevelDbfs, &saturation_protector),
+ max_difference);
+
+ // Then both change.
+ max_difference = std::max(
+ RunOnConstantLevel(
+ kDelayIterations,
+ VadWithLevel::LevelAndProbability(
+ 1.f, -90.f, kLaterSpeechLevelDbfs + kInitialSaturationMarginDb),
+ kLaterSpeechLevelDbfs, &saturation_protector),
+ max_difference);
+
+ const float total_difference =
+ std::abs(saturation_protector.LastMargin() - kInitialSaturationMarginDb);
+
+ EXPECT_LE(total_difference, 0.05f);
+ EXPECT_LE(max_difference, 0.01f);
+}
+
+} // namespace webrtc