AGC2-fixed-digital: Level Estimator
This CL adds the Level Estimator of the new gain controller. The Level
Estimator divides a 10ms input frame in kSubFramesInFrame=20 sub
frames. We take the maximal sample values in every sub frame. We then
apply attack/decay smoothing. This is the final level estimate.
The results will be used with InterpolatedGainCurve (see this CL
https://webrtc-review.googlesource.com/c/src/+/51920). For every level
estimate value, we look up a gain with
InterpolatedGainCurve::LookUpGainToApply. This gain is then applied to
the signal.
Bug: webrtc:7949
Change-Id: I2b4b3894a3e945d3dd916ce516c79abacb2b18b1
Reviewed-on: https://webrtc-review.googlesource.com/52381
Reviewed-by: Sam Zackrisson <saza@webrtc.org>
Commit-Queue: Alex Loiko <aleloi@webrtc.org>
Cr-Commit-Position: refs/heads/master@{#22054}
diff --git a/modules/audio_processing/BUILD.gn b/modules/audio_processing/BUILD.gn
index 60605e9..a724ba4 100644
--- a/modules/audio_processing/BUILD.gn
+++ b/modules/audio_processing/BUILD.gn
@@ -549,6 +549,7 @@
"agc/loudness_histogram_unittest.cc",
"agc/mock_agc.h",
"audio_buffer_unittest.cc",
+ "audio_frame_view_unittest.cc",
"beamformer/array_util_unittest.cc",
"beamformer/complex_matrix_unittest.cc",
"beamformer/covariance_matrix_generator_unittest.cc",
@@ -557,6 +558,7 @@
"beamformer/mock_nonlinear_beamformer.h",
"config_unittest.cc",
"echo_cancellation_impl_unittest.cc",
+ "gain_controller2_unittest.cc",
"splitting_filter_unittest.cc",
"test/fake_recording_device_unittest.cc",
"transient/dyadic_decimator_unittest.cc",
@@ -576,6 +578,7 @@
":aec_core",
":analog_mic_simulation",
":apm_logging",
+ ":audio_frame_view",
":audio_processing",
":audioproc_test_utils",
":mocks",
@@ -677,7 +680,6 @@
"echo_detector/moving_max_unittest.cc",
"echo_detector/normalized_covariance_estimator_unittest.cc",
"gain_control_unittest.cc",
- "gain_controller2_unittest.cc",
"level_controller/level_controller_unittest.cc",
"level_estimator_unittest.cc",
"low_cut_filter_unittest.cc",
diff --git a/modules/audio_processing/agc2/BUILD.gn b/modules/audio_processing/agc2/BUILD.gn
index 8702e0f..e589010 100644
--- a/modules/audio_processing/agc2/BUILD.gn
+++ b/modules/audio_processing/agc2/BUILD.gn
@@ -11,6 +11,8 @@
rtc_source_set("agc2") {
sources = [
"agc2_common.h",
+ "fixed_digital_level_estimator.cc",
+ "fixed_digital_level_estimator.h",
"fixed_gain_controller.cc",
"fixed_gain_controller.h",
]
@@ -32,6 +34,8 @@
configs += [ "..:apm_debug_dump" ]
sources = [
+ "agc2_testing_common.h",
+ "fixed_digital_level_estimator_unittest.cc",
"fixed_gain_controller_unittest.cc",
"vector_float_frame.cc",
"vector_float_frame.h",
@@ -41,6 +45,7 @@
"..:apm_logging",
"..:audio_frame_view",
"../../../api:array_view",
+ "../../../common_audio",
"../../../rtc_base:rtc_base_tests_utils",
]
}
diff --git a/modules/audio_processing/agc2/agc2_common.h b/modules/audio_processing/agc2/agc2_common.h
index c668d0a..af13dd0 100644
--- a/modules/audio_processing/agc2/agc2_common.h
+++ b/modules/audio_processing/agc2/agc2_common.h
@@ -11,11 +11,26 @@
#ifndef MODULES_AUDIO_PROCESSING_AGC2_AGC2_COMMON_H_
#define MODULES_AUDIO_PROCESSING_AGC2_AGC2_COMMON_H_
+#include "rtc_base/basictypes.h"
+
namespace webrtc {
constexpr float kMinSampleValue = -32768.f;
constexpr float kMaxSampleValue = 32767.f;
+constexpr size_t kFrameDurationMs = 10;
+constexpr size_t kSubFramesInFrame = 20;
+
+constexpr float kAttackFilterConstant = 0.f;
+
+constexpr size_t kMaximalNumberOfSamplesPerChannel = 480;
+
+// This is computed from kDecayMs by
+// 10 ** (-1/20 * subframe_duration / kDecayMs).
+// |subframe_duration| is |kFrameDurationMs / kSubFramesInFrame|.
+// kDecayMs is defined in agc2_testing_common.h
+constexpr float kDecayFilterConstant = 0.9998848773724686f;
+
// TODO(aleloi): add the other constants as more AGC2 components are
// added.
} // namespace webrtc
diff --git a/modules/audio_processing/agc2/agc2_testing_common.h b/modules/audio_processing/agc2/agc2_testing_common.h
new file mode 100644
index 0000000..7e27a24
--- /dev/null
+++ b/modules/audio_processing/agc2/agc2_testing_common.h
@@ -0,0 +1,21 @@
+/*
+ * Copyright (c) 2018 The WebRTC project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#ifndef MODULES_AUDIO_PROCESSING_AGC2_AGC2_TESTING_COMMON_H_
+#define MODULES_AUDIO_PROCESSING_AGC2_AGC2_TESTING_COMMON_H_
+
+namespace webrtc {
+
+// Level Estimator test params.
+constexpr float kDecayMs = 500.f;
+
+} // namespace webrtc
+
+#endif // MODULES_AUDIO_PROCESSING_AGC2_AGC2_TESTING_COMMON_H_
diff --git a/modules/audio_processing/agc2/fixed_digital_level_estimator.cc b/modules/audio_processing/agc2/fixed_digital_level_estimator.cc
new file mode 100644
index 0000000..9a1fd28
--- /dev/null
+++ b/modules/audio_processing/agc2/fixed_digital_level_estimator.cc
@@ -0,0 +1,100 @@
+/*
+ * Copyright (c) 2018 The WebRTC project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include "modules/audio_processing/agc2/fixed_digital_level_estimator.h"
+
+#include <algorithm>
+#include <cmath>
+
+#include "modules/audio_processing/logging/apm_data_dumper.h"
+#include "rtc_base/checks.h"
+
+namespace webrtc {
+
+FixedDigitalLevelEstimator::FixedDigitalLevelEstimator(
+ size_t sample_rate_hz,
+ ApmDataDumper* apm_data_dumper)
+ : apm_data_dumper_(apm_data_dumper) {
+ SetSampleRate(sample_rate_hz);
+ CheckParameterCombination();
+ RTC_DCHECK(apm_data_dumper_);
+ apm_data_dumper_->DumpRaw("agc2_level_estimator_samplerate", sample_rate_hz);
+}
+
+void FixedDigitalLevelEstimator::CheckParameterCombination() {
+ RTC_DCHECK_GT(samples_in_frame_, 0);
+ RTC_DCHECK_LE(kSubFramesInFrame, samples_in_frame_);
+ RTC_DCHECK_EQ(samples_in_frame_ % kSubFramesInFrame, 0);
+ RTC_DCHECK_GT(samples_in_sub_frame_, 1);
+}
+
+std::array<float, kSubFramesInFrame> FixedDigitalLevelEstimator::ComputeLevel(
+ const AudioFrameView<const float>& float_frame) {
+ RTC_DCHECK_GT(float_frame.num_channels(), 0);
+ RTC_DCHECK_EQ(float_frame.samples_per_channel(), samples_in_frame_);
+
+ // Compute max envelope without smoothing.
+ std::array<float, kSubFramesInFrame> envelope{};
+ for (size_t channel_idx = 0; channel_idx < float_frame.num_channels();
+ ++channel_idx) {
+ const auto channel = float_frame.channel(channel_idx);
+ for (size_t sub_frame = 0; sub_frame < kSubFramesInFrame; ++sub_frame) {
+ for (size_t sample_in_sub_frame = 0;
+ sample_in_sub_frame < samples_in_sub_frame_; ++sample_in_sub_frame) {
+ envelope[sub_frame] =
+ std::max(envelope[sub_frame],
+ std::abs(channel[sub_frame * samples_in_sub_frame_ +
+ sample_in_sub_frame]));
+ }
+ }
+ }
+
+ // Make sure envelope increases happen one step earlier so that the
+ // corresponding *gain decrease* doesn't miss a sudden signal
+ // increase due to interpolation.
+ for (size_t sub_frame = 0; sub_frame < kSubFramesInFrame - 1; ++sub_frame) {
+ if (envelope[sub_frame] < envelope[sub_frame + 1]) {
+ envelope[sub_frame] = envelope[sub_frame + 1];
+ }
+ }
+
+ // Add attack / decay smoothing.
+ for (size_t sub_frame = 0; sub_frame < kSubFramesInFrame; ++sub_frame) {
+ const float envelope_value = envelope[sub_frame];
+ if (envelope_value > filter_state_level_) {
+ envelope[sub_frame] = envelope_value * (1 - kAttackFilterConstant) +
+ filter_state_level_ * kAttackFilterConstant;
+ } else {
+ envelope[sub_frame] = envelope_value * (1 - kDecayFilterConstant) +
+ filter_state_level_ * kDecayFilterConstant;
+ }
+ filter_state_level_ = envelope[sub_frame];
+
+ // Dump data for debug.
+ RTC_DCHECK(apm_data_dumper_);
+ const auto channel = float_frame.channel(0);
+ apm_data_dumper_->DumpRaw("agc2_level_estimator_samples",
+ samples_in_sub_frame_,
+ &channel[sub_frame * samples_in_sub_frame_]);
+ apm_data_dumper_->DumpRaw("agc2_level_estimator_level",
+ envelope[sub_frame]);
+ }
+
+ return envelope;
+}
+
+void FixedDigitalLevelEstimator::SetSampleRate(size_t sample_rate_hz) {
+ samples_in_frame_ = rtc::CheckedDivExact(sample_rate_hz * kFrameDurationMs,
+ static_cast<size_t>(1000));
+ samples_in_sub_frame_ =
+ rtc::CheckedDivExact(samples_in_frame_, kSubFramesInFrame);
+ CheckParameterCombination();
+}
+} // namespace webrtc
diff --git a/modules/audio_processing/agc2/fixed_digital_level_estimator.h b/modules/audio_processing/agc2/fixed_digital_level_estimator.h
new file mode 100644
index 0000000..7266c15
--- /dev/null
+++ b/modules/audio_processing/agc2/fixed_digital_level_estimator.h
@@ -0,0 +1,55 @@
+/*
+ * Copyright (c) 2018 The WebRTC project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#ifndef MODULES_AUDIO_PROCESSING_AGC2_FIXED_DIGITAL_LEVEL_ESTIMATOR_H_
+#define MODULES_AUDIO_PROCESSING_AGC2_FIXED_DIGITAL_LEVEL_ESTIMATOR_H_
+
+#include <array>
+#include <vector>
+
+#include "modules/audio_processing/agc2/agc2_common.h"
+#include "modules/audio_processing/include/audio_frame_view.h"
+#include "rtc_base/constructormagic.h"
+
+namespace webrtc {
+
+class ApmDataDumper;
+// Produces a smooth signal level estimate from an input audio
+// stream. The estimate smoothing is done through exponential
+// filtering.
+class FixedDigitalLevelEstimator {
+ public:
+ FixedDigitalLevelEstimator(size_t sample_rate_hz,
+ ApmDataDumper* apm_data_dumper);
+
+ // The input is assumed to be in FloatS16 format. Scaled input will
+ // produce similarly scaled output. A frame of
+ // length kFrameDurationMs=10 ms produces kSubFramesInFrame=20 level
+ // estimates in the same scale.
+ std::array<float, kSubFramesInFrame> ComputeLevel(
+ const AudioFrameView<const float>& float_frame);
+
+ // Rate may be changed at any time (but not concurrently) from the
+ // value passed to the constructor. The class is not thread safe.
+ void SetSampleRate(size_t sample_rate_hz);
+
+ private:
+ void CheckParameterCombination();
+
+ ApmDataDumper* const apm_data_dumper_;
+ float filter_state_level_ = 0.f;
+ size_t samples_in_frame_;
+ size_t samples_in_sub_frame_;
+
+ RTC_DISALLOW_COPY_AND_ASSIGN(FixedDigitalLevelEstimator);
+};
+} // namespace webrtc
+
+#endif // MODULES_AUDIO_PROCESSING_AGC2_FIXED_DIGITAL_LEVEL_ESTIMATOR_H_
diff --git a/modules/audio_processing/agc2/fixed_digital_level_estimator_unittest.cc b/modules/audio_processing/agc2/fixed_digital_level_estimator_unittest.cc
new file mode 100644
index 0000000..d530ecc
--- /dev/null
+++ b/modules/audio_processing/agc2/fixed_digital_level_estimator_unittest.cc
@@ -0,0 +1,158 @@
+/*
+ * Copyright (c) 2018 The WebRTC project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include "modules/audio_processing/agc2/fixed_digital_level_estimator.h"
+
+#include <limits>
+
+#include "common_audio/include/audio_util.h"
+#include "modules/audio_processing/agc2/agc2_common.h"
+#include "modules/audio_processing/agc2/agc2_testing_common.h"
+#include "modules/audio_processing/agc2/vector_float_frame.h"
+#include "modules/audio_processing/logging/apm_data_dumper.h"
+#include "rtc_base/gunit.h"
+
+namespace webrtc {
+namespace {
+
+constexpr float kInputLevel = 10000.f;
+
+// Run audio at specified settings through the level estimator, and
+// verify that the output level falls within the bounds.
+void TestLevelEstimator(int sample_rate_hz,
+ int num_channels,
+ float input_level_linear_scale,
+ float expected_min,
+ float expected_max) {
+ ApmDataDumper apm_data_dumper(0);
+ FixedDigitalLevelEstimator level_estimator(sample_rate_hz, &apm_data_dumper);
+
+ const VectorFloatFrame vectors_with_float_frame(
+ num_channels, rtc::CheckedDivExact(sample_rate_hz, 100),
+ input_level_linear_scale);
+
+ for (int i = 0; i < 500; ++i) {
+ const auto level = level_estimator.ComputeLevel(
+ vectors_with_float_frame.float_frame_view());
+
+ // Give the estimator some time to ramp up.
+ if (i < 50) {
+ continue;
+ }
+
+ for (const auto& x : level) {
+ EXPECT_LE(expected_min, x);
+ EXPECT_LE(x, expected_max);
+ }
+ }
+}
+
+// Returns time it takes for the level estimator to decrease its level
+// estimate by 'level_reduction_db'.
+float TimeMsToDecreaseLevel(int sample_rate_hz,
+ int num_channels,
+ float input_level_db,
+ float level_reduction_db) {
+ const float input_level = DbfsToFloatS16(input_level_db);
+ RTC_DCHECK_GT(level_reduction_db, 0);
+
+ const VectorFloatFrame vectors_with_float_frame(
+ num_channels, rtc::CheckedDivExact(sample_rate_hz, 100), input_level);
+
+ ApmDataDumper apm_data_dumper(0);
+ FixedDigitalLevelEstimator level_estimator(sample_rate_hz, &apm_data_dumper);
+
+ // Give the LevelEstimator plenty of time to ramp up and stabilize
+ float last_level = 0.f;
+ for (int i = 0; i < 500; ++i) {
+ const auto level_envelope = level_estimator.ComputeLevel(
+ vectors_with_float_frame.float_frame_view());
+ last_level = *level_envelope.rbegin();
+ }
+
+ // Set input to 0.
+ VectorFloatFrame vectors_with_zero_float_frame(
+ num_channels, rtc::CheckedDivExact(sample_rate_hz, 100), 0);
+
+ const float reduced_level_linear =
+ DbfsToFloatS16(input_level_db - level_reduction_db);
+ int sub_frames_until_level_reduction = 0;
+ while (last_level > reduced_level_linear) {
+ const auto level_envelope = level_estimator.ComputeLevel(
+ vectors_with_zero_float_frame.float_frame_view());
+ for (const auto& v : level_envelope) {
+ EXPECT_LT(v, last_level);
+ sub_frames_until_level_reduction++;
+ last_level = v;
+ if (last_level <= reduced_level_linear) {
+ break;
+ }
+ }
+ }
+ return static_cast<float>(sub_frames_until_level_reduction) *
+ kFrameDurationMs / kSubFramesInFrame;
+}
+} // namespace
+
+TEST(AutomaticGainController2LevelEstimator, EstimatorShouldNotCrash) {
+ TestLevelEstimator(8000, 1, 0, std::numeric_limits<float>::lowest(),
+ std::numeric_limits<float>::max());
+}
+
+TEST(AutomaticGainController2LevelEstimator,
+ EstimatorShouldEstimateConstantLevel) {
+ TestLevelEstimator(10000, 1, kInputLevel, kInputLevel * 0.99,
+ kInputLevel * 1.01);
+}
+
+TEST(AutomaticGainController2LevelEstimator,
+ EstimatorShouldEstimateConstantLevelForManyChannels) {
+ constexpr size_t num_channels = 10;
+ TestLevelEstimator(20000, num_channels, kInputLevel, kInputLevel * 0.99,
+ kInputLevel * 1.01);
+}
+
+TEST(AutomaticGainController2LevelEstimator, TimeToDecreaseForLowLevel) {
+ constexpr float kLevelReductionDb = 25;
+ constexpr float kInitialLowLevel = -40;
+ constexpr float kExpectedTime = kLevelReductionDb * kDecayMs;
+
+ const float time_to_decrease =
+ TimeMsToDecreaseLevel(22000, 1, kInitialLowLevel, kLevelReductionDb);
+
+ EXPECT_LE(kExpectedTime * 0.9, time_to_decrease);
+ EXPECT_LE(time_to_decrease, kExpectedTime * 1.1);
+}
+
+TEST(AutomaticGainController2LevelEstimator, TimeToDecreaseForFullScaleLevel) {
+ constexpr float kLevelReductionDb = 25;
+ constexpr float kExpectedTime = kLevelReductionDb * kDecayMs;
+
+ const float time_to_decrease =
+ TimeMsToDecreaseLevel(26000, 1, 0, kLevelReductionDb);
+
+ EXPECT_LE(kExpectedTime * 0.9, time_to_decrease);
+ EXPECT_LE(time_to_decrease, kExpectedTime * 1.1);
+}
+
+TEST(AutomaticGainController2LevelEstimator,
+ TimeToDecreaseForMultipleChannels) {
+ constexpr float kLevelReductionDb = 25;
+ constexpr float kExpectedTime = kLevelReductionDb * kDecayMs;
+ constexpr size_t kNumChannels = 10;
+
+ const float time_to_decrease =
+ TimeMsToDecreaseLevel(28000, kNumChannels, 0, kLevelReductionDb);
+
+ EXPECT_LE(kExpectedTime * 0.9, time_to_decrease);
+ EXPECT_LE(time_to_decrease, kExpectedTime * 1.1);
+}
+
+} // namespace webrtc
diff --git a/modules/audio_processing/agc2/vector_float_frame.h b/modules/audio_processing/agc2/vector_float_frame.h
index 0e86089..b521f34 100644
--- a/modules/audio_processing/agc2/vector_float_frame.h
+++ b/modules/audio_processing/agc2/vector_float_frame.h
@@ -25,6 +25,9 @@
int samples_per_channel,
float start_value);
const AudioFrameView<float>& float_frame_view() { return float_frame_view_; }
+ AudioFrameView<const float> float_frame_view() const {
+ return float_frame_view_;
+ }
~VectorFloatFrame();
diff --git a/modules/audio_processing/audio_frame_view_unittest.cc b/modules/audio_processing/audio_frame_view_unittest.cc
new file mode 100644
index 0000000..5bd51ce
--- /dev/null
+++ b/modules/audio_processing/audio_frame_view_unittest.cc
@@ -0,0 +1,49 @@
+/*
+ * Copyright 2018 The WebRTC Project Authors. All rights reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include "modules/audio_processing/include/audio_frame_view.h"
+
+#include "modules/audio_processing/audio_buffer.h"
+#include "test/gtest.h"
+
+TEST(AudioFrameTest, ConstructFromAudioBuffer) {
+ constexpr int kSampleRateHz = 48000;
+ constexpr int kNumChannels = 2;
+ constexpr float kFloatConstant = 1272.f;
+ constexpr float kIntConstant = 17252;
+ const webrtc::StreamConfig stream_config(kSampleRateHz, kNumChannels, false);
+ webrtc::AudioBuffer buffer(
+ stream_config.num_frames(), stream_config.num_channels(),
+ stream_config.num_frames(), stream_config.num_channels(),
+ stream_config.num_frames());
+
+ AudioFrameView<float> non_const_view(
+ buffer.channels_f(), buffer.num_channels(), buffer.num_frames());
+ // Modification is allowed.
+ non_const_view.channel(0)[0] = kFloatConstant;
+ EXPECT_EQ(buffer.channels_f()[0][0], kFloatConstant);
+
+ AudioFrameView<const float> const_view(
+ buffer.channels_f(), buffer.num_channels(), buffer.num_frames());
+ // Modification is not allowed.
+ // const_view.channel(0)[0] = kFloatConstant;
+
+ // Assignment is allowed.
+ AudioFrameView<const float> other_const_view = non_const_view;
+ static_cast<void>(other_const_view);
+
+ // But not the other way.
+ // non_const_view = other_const_view;
+
+ AudioFrameView<int16_t> non_const_int16_view(
+ buffer.channels(), buffer.num_channels(), buffer.num_frames());
+ non_const_int16_view.channel(0)[0] = kIntConstant;
+ EXPECT_EQ(buffer.channels()[0][0], kIntConstant);
+}
diff --git a/modules/audio_processing/include/audio_frame_view.h b/modules/audio_processing/include/audio_frame_view.h
index 86e593a..366fc32 100644
--- a/modules/audio_processing/include/audio_frame_view.h
+++ b/modules/audio_processing/include/audio_frame_view.h
@@ -27,6 +27,14 @@
num_channels_(num_channels),
channel_size_(channel_size) {}
+ // Implicit cast to allow converting Frame<float> to
+ // Frame<const float>.
+ template <class U>
+ AudioFrameView(AudioFrameView<U> other)
+ : audio_samples_(other.data()),
+ num_channels_(other.num_channels()),
+ channel_size_(other.samples_per_channel()) {}
+
AudioFrameView() = delete;
size_t num_channels() const { return num_channels_; }
@@ -45,6 +53,8 @@
return rtc::ArrayView<const T>(audio_samples_[idx], channel_size_);
}
+ T* const* data() { return audio_samples_; }
+
private:
T* const* audio_samples_;
size_t num_channels_;