AGC2: max adaptation speed now part of config
Tested: bit-exactness verified with audioproc_f
Bug: webrtc:7494
Change-Id: Ie65a2e2139cff0bd730307d06b74760e307c9568
Reviewed-on: https://webrtc-review.googlesource.com/c/src/+/186264
Commit-Queue: Alessio Bazzica <alessiob@webrtc.org>
Reviewed-by: Minyue Li <minyue@webrtc.org>
Cr-Commit-Position: refs/heads/master@{#32277}
diff --git a/modules/audio_processing/agc2/adaptive_agc.cc b/modules/audio_processing/agc2/adaptive_agc.cc
index 9349436..380c39c 100644
--- a/modules/audio_processing/agc2/adaptive_agc.cc
+++ b/modules/audio_processing/agc2/adaptive_agc.cc
@@ -28,11 +28,16 @@
dumper.DumpRaw("agc2_last_limiter_audio_level", info.limiter_envelope_dbfs);
}
+constexpr int kGainApplierAdjacentSpeechFramesThreshold = 1;
+constexpr float kMaxGainChangePerSecondDb = 3.f;
+
} // namespace
AdaptiveAgc::AdaptiveAgc(ApmDataDumper* apm_data_dumper)
: speech_level_estimator_(apm_data_dumper),
- gain_applier_(apm_data_dumper),
+ gain_applier_(apm_data_dumper,
+ kGainApplierAdjacentSpeechFramesThreshold,
+ kMaxGainChangePerSecondDb),
apm_data_dumper_(apm_data_dumper),
noise_level_estimator_(apm_data_dumper) {
RTC_DCHECK(apm_data_dumper);
@@ -48,9 +53,10 @@
config.adaptive_digital.initial_saturation_margin_db,
config.adaptive_digital.extra_saturation_margin_db),
vad_(config.adaptive_digital.vad_probability_attack),
- gain_applier_(apm_data_dumper,
- config.adaptive_digital
- .gain_applier_adjacent_speech_frames_threshold),
+ gain_applier_(
+ apm_data_dumper,
+ config.adaptive_digital.gain_applier_adjacent_speech_frames_threshold,
+ config.adaptive_digital.max_gain_change_db_per_second),
apm_data_dumper_(apm_data_dumper),
noise_level_estimator_(apm_data_dumper) {
RTC_DCHECK(apm_data_dumper);
diff --git a/modules/audio_processing/agc2/adaptive_agc.h b/modules/audio_processing/agc2/adaptive_agc.h
index e2decb8..f3c7854 100644
--- a/modules/audio_processing/agc2/adaptive_agc.h
+++ b/modules/audio_processing/agc2/adaptive_agc.h
@@ -26,6 +26,7 @@
class AdaptiveAgc {
public:
explicit AdaptiveAgc(ApmDataDumper* apm_data_dumper);
+ // TODO(crbug.com/webrtc/7494): Remove ctor above.
AdaptiveAgc(ApmDataDumper* apm_data_dumper,
const AudioProcessing::Config::GainController2& config);
~AdaptiveAgc();
diff --git a/modules/audio_processing/agc2/adaptive_digital_gain_applier.cc b/modules/audio_processing/agc2/adaptive_digital_gain_applier.cc
index e4e7886..ef048e6 100644
--- a/modules/audio_processing/agc2/adaptive_digital_gain_applier.cc
+++ b/modules/audio_processing/agc2/adaptive_digital_gain_applier.cc
@@ -74,35 +74,33 @@
// Return the gain difference in db to 'last_gain_db'.
float ComputeGainChangeThisFrameDb(float target_gain_db,
float last_gain_db,
- bool gain_increase_allowed) {
+ bool gain_increase_allowed,
+ float max_gain_change_db) {
float target_gain_difference_db = target_gain_db - last_gain_db;
if (!gain_increase_allowed) {
target_gain_difference_db = std::min(target_gain_difference_db, 0.f);
}
-
- return rtc::SafeClamp(target_gain_difference_db, -kMaxGainChangePerFrameDb,
- kMaxGainChangePerFrameDb);
+ return rtc::SafeClamp(target_gain_difference_db, -max_gain_change_db,
+ max_gain_change_db);
}
-} // namespace
-// TODO(crbug.com/webrtc/7494): Remove ctor and the constant used below.
-AdaptiveDigitalGainApplier::AdaptiveDigitalGainApplier(
- ApmDataDumper* apm_data_dumper)
- : AdaptiveDigitalGainApplier(
- apm_data_dumper,
- kDefaultDigitalGainApplierAdjacentSpeechFramesThreshold) {}
+} // namespace
AdaptiveDigitalGainApplier::AdaptiveDigitalGainApplier(
ApmDataDumper* apm_data_dumper,
- int adjacent_speech_frames_threshold)
+ int adjacent_speech_frames_threshold,
+ float max_gain_change_db_per_second)
: apm_data_dumper_(apm_data_dumper),
gain_applier_(
/*hard_clip_samples=*/false,
/*initial_gain_factor=*/DbToRatio(kInitialAdaptiveDigitalGainDb)),
adjacent_speech_frames_threshold_(adjacent_speech_frames_threshold),
+ max_gain_change_db_per_10ms_(max_gain_change_db_per_second *
+ kFrameDurationMs / 1000.f),
calls_since_last_gain_log_(0),
frames_to_gain_increase_allowed_(adjacent_speech_frames_threshold_),
last_gain_db_(kInitialAdaptiveDigitalGainDb) {
+ RTC_DCHECK_GT(max_gain_change_db_per_second, 0.f);
RTC_DCHECK_GE(frames_to_gain_increase_allowed_, 1);
}
@@ -110,7 +108,11 @@
AudioFrameView<float> frame) {
RTC_DCHECK_GE(info.input_level_dbfs, -150.f);
RTC_DCHECK_GE(frame.num_channels(), 1);
- RTC_DCHECK_GE(frame.samples_per_channel(), 1);
+ RTC_DCHECK(
+ frame.samples_per_channel() == 80 || frame.samples_per_channel() == 160 ||
+ frame.samples_per_channel() == 320 || frame.samples_per_channel() == 480)
+ << "`frame` does not look like a 10 ms frame for an APM supported sample "
+ "rate";
// Log every second.
calls_since_last_gain_log_++;
@@ -137,7 +139,8 @@
const float gain_change_this_frame_db = ComputeGainChangeThisFrameDb(
target_gain_db, last_gain_db_,
- /*gain_increase_allowed=*/frames_to_gain_increase_allowed_ == 0);
+ /*gain_increase_allowed=*/frames_to_gain_increase_allowed_ == 0,
+ max_gain_change_db_per_10ms_);
apm_data_dumper_->DumpRaw("agc2_want_to_change_by_db",
target_gain_db - last_gain_db_);
diff --git a/modules/audio_processing/agc2/adaptive_digital_gain_applier.h b/modules/audio_processing/agc2/adaptive_digital_gain_applier.h
index ad3f39c..ca36abc 100644
--- a/modules/audio_processing/agc2/adaptive_digital_gain_applier.h
+++ b/modules/audio_processing/agc2/adaptive_digital_gain_applier.h
@@ -34,16 +34,18 @@
bool estimate_is_confident;
};
- explicit AdaptiveDigitalGainApplier(ApmDataDumper* apm_data_dumper);
- // Ctor. `adjacent_speech_frames_threshold` indicates how many speech frames
- // are required before a gain increase is allowed.
+ // `adjacent_speech_frames_threshold` indicates how many speech frames are
+ // required before a gain increase is allowed. `max_gain_change_db_per_second`
+ // limits the adaptation speed (uniformly operated across frames).
AdaptiveDigitalGainApplier(ApmDataDumper* apm_data_dumper,
- int adjacent_speech_frames_threshold);
+ int adjacent_speech_frames_threshold,
+ float max_gain_change_db_per_second);
AdaptiveDigitalGainApplier(const AdaptiveDigitalGainApplier&) = delete;
AdaptiveDigitalGainApplier& operator=(const AdaptiveDigitalGainApplier&) =
delete;
- // Analyzes `info`, updates the digital gain and applies it to `frame`.
+ // Analyzes `info`, updates the digital gain and applies it to a 10 ms
+ // `frame`. Supports any sample rate supported by APM.
void Process(const FrameInfo& info, AudioFrameView<float> frame);
private:
@@ -51,6 +53,7 @@
GainApplier gain_applier_;
const int adjacent_speech_frames_threshold_;
+ const float max_gain_change_db_per_10ms_;
int calls_since_last_gain_log_;
int frames_to_gain_increase_allowed_;
diff --git a/modules/audio_processing/agc2/adaptive_digital_gain_applier_unittest.cc b/modules/audio_processing/agc2/adaptive_digital_gain_applier_unittest.cc
index c8fb6ca..63763c8 100644
--- a/modules/audio_processing/agc2/adaptive_digital_gain_applier_unittest.cc
+++ b/modules/audio_processing/agc2/adaptive_digital_gain_applier_unittest.cc
@@ -23,6 +23,7 @@
constexpr int kMono = 1;
constexpr int kStereo = 2;
+constexpr int kFrameLen10ms8kHz = 80;
constexpr int kFrameLen10ms48kHz = 480;
// Constants used in place of estimated noise levels.
@@ -32,6 +33,21 @@
"");
constexpr VadLevelAnalyzer::Result kVadSpeech{1.f, -20.f, 0.f};
+constexpr float kMaxGainChangePerSecondDb = 3.f;
+constexpr float kMaxGainChangePerFrameDb =
+ kMaxGainChangePerSecondDb * kFrameDurationMs / 1000.f;
+
+// Helper to instance `AdaptiveDigitalGainApplier`.
+struct GainApplierHelper {
+ GainApplierHelper()
+ : apm_data_dumper(0),
+ gain_applier(&apm_data_dumper,
+ /*adjacent_speech_frames_threshold=*/1,
+ kMaxGainChangePerSecondDb) {}
+ ApmDataDumper apm_data_dumper;
+ AdaptiveDigitalGainApplier gain_applier;
+};
+
// Runs gain applier and returns the applied gain in linear scale.
float RunOnConstantLevel(int num_iterations,
VadLevelAnalyzer::Result vad_level,
@@ -40,7 +56,7 @@
float gain_linear = 0.f;
for (int i = 0; i < num_iterations; ++i) {
- VectorFloatFrame fake_audio(kMono, 1, 1.f);
+ VectorFloatFrame fake_audio(kMono, kFrameLen10ms8kHz, 1.f);
AdaptiveDigitalGainApplier::FrameInfo info;
info.input_level_dbfs = input_level_dbfs;
info.input_noise_level_dbfs = kNoNoiseDbfs;
@@ -62,25 +78,22 @@
/*estimate_is_confident=*/true};
TEST(AutomaticGainController2AdaptiveGainApplier, GainApplierShouldNotCrash) {
- ApmDataDumper apm_data_dumper(0);
- AdaptiveDigitalGainApplier gain_applier(&apm_data_dumper);
-
+ GainApplierHelper helper;
// Make one call with reasonable audio level values and settings.
VectorFloatFrame fake_audio(kStereo, kFrameLen10ms48kHz, 10000.f);
AdaptiveDigitalGainApplier::FrameInfo info = kFrameInfo;
info.input_level_dbfs = -5.0;
- gain_applier.Process(kFrameInfo, fake_audio.float_frame_view());
+ helper.gain_applier.Process(kFrameInfo, fake_audio.float_frame_view());
}
// Check that the output is -kHeadroom dBFS.
TEST(AutomaticGainController2AdaptiveGainApplier, TargetLevelIsReached) {
- ApmDataDumper apm_data_dumper(0);
- AdaptiveDigitalGainApplier gain_applier(&apm_data_dumper);
+ GainApplierHelper helper;
constexpr float initial_level_dbfs = -5.f;
- const float applied_gain =
- RunOnConstantLevel(200, kVadSpeech, initial_level_dbfs, &gain_applier);
+ const float applied_gain = RunOnConstantLevel(
+ 200, kVadSpeech, initial_level_dbfs, &helper.gain_applier);
EXPECT_NEAR(applied_gain, DbToRatio(-kHeadroomDbfs - initial_level_dbfs),
0.1f);
@@ -88,8 +101,7 @@
// Check that the output is -kHeadroom dBFS
TEST(AutomaticGainController2AdaptiveGainApplier, GainApproachesMaxGain) {
- ApmDataDumper apm_data_dumper(0);
- AdaptiveDigitalGainApplier gain_applier(&apm_data_dumper);
+ GainApplierHelper helper;
constexpr float initial_level_dbfs = -kHeadroomDbfs - kMaxGainDb - 10.f;
// A few extra frames for safety.
@@ -97,7 +109,7 @@
static_cast<int>(kMaxGainDb / kMaxGainChangePerFrameDb) + 10;
const float applied_gain = RunOnConstantLevel(
- kNumFramesToAdapt, kVadSpeech, initial_level_dbfs, &gain_applier);
+ kNumFramesToAdapt, kVadSpeech, initial_level_dbfs, &helper.gain_applier);
EXPECT_NEAR(applied_gain, DbToRatio(kMaxGainDb), 0.1f);
const float applied_gain_db = 20.f * std::log10(applied_gain);
@@ -105,8 +117,7 @@
}
TEST(AutomaticGainController2AdaptiveGainApplier, GainDoesNotChangeFast) {
- ApmDataDumper apm_data_dumper(0);
- AdaptiveDigitalGainApplier gain_applier(&apm_data_dumper);
+ GainApplierHelper helper;
constexpr float initial_level_dbfs = -25.f;
// A few extra frames for safety.
@@ -118,10 +129,10 @@
float last_gain_linear = 1.f;
for (int i = 0; i < kNumFramesToAdapt; ++i) {
SCOPED_TRACE(i);
- VectorFloatFrame fake_audio(kMono, 1, 1.f);
+ VectorFloatFrame fake_audio(kMono, kFrameLen10ms8kHz, 1.f);
AdaptiveDigitalGainApplier::FrameInfo info = kFrameInfo;
info.input_level_dbfs = initial_level_dbfs;
- gain_applier.Process(info, fake_audio.float_frame_view());
+ helper.gain_applier.Process(info, fake_audio.float_frame_view());
float current_gain_linear = fake_audio.float_frame_view().channel(0)[0];
EXPECT_LE(std::abs(current_gain_linear - last_gain_linear),
kMaxChangePerFrameLinear);
@@ -131,10 +142,10 @@
// Check that the same is true when gain decreases as well.
for (int i = 0; i < kNumFramesToAdapt; ++i) {
SCOPED_TRACE(i);
- VectorFloatFrame fake_audio(kMono, 1, 1.f);
+ VectorFloatFrame fake_audio(kMono, kFrameLen10ms8kHz, 1.f);
AdaptiveDigitalGainApplier::FrameInfo info = kFrameInfo;
info.input_level_dbfs = 0.f;
- gain_applier.Process(info, fake_audio.float_frame_view());
+ helper.gain_applier.Process(info, fake_audio.float_frame_view());
float current_gain_linear = fake_audio.float_frame_view().channel(0)[0];
EXPECT_LE(std::abs(current_gain_linear - last_gain_linear),
kMaxChangePerFrameLinear);
@@ -143,15 +154,14 @@
}
TEST(AutomaticGainController2AdaptiveGainApplier, GainIsRampedInAFrame) {
- ApmDataDumper apm_data_dumper(0);
- AdaptiveDigitalGainApplier gain_applier(&apm_data_dumper);
+ GainApplierHelper helper;
constexpr float initial_level_dbfs = -25.f;
VectorFloatFrame fake_audio(kMono, kFrameLen10ms48kHz, 1.f);
AdaptiveDigitalGainApplier::FrameInfo info = kFrameInfo;
info.input_level_dbfs = initial_level_dbfs;
- gain_applier.Process(info, fake_audio.float_frame_view());
+ helper.gain_applier.Process(info, fake_audio.float_frame_view());
float maximal_difference = 0.f;
float current_value = 1.f * DbToRatio(kInitialAdaptiveDigitalGainDb);
for (const auto& x : fake_audio.float_frame_view().channel(0)) {
@@ -168,8 +178,7 @@
}
TEST(AutomaticGainController2AdaptiveGainApplier, NoiseLimitsGain) {
- ApmDataDumper apm_data_dumper(0);
- AdaptiveDigitalGainApplier gain_applier(&apm_data_dumper);
+ GainApplierHelper helper;
constexpr float initial_level_dbfs = -25.f;
constexpr int num_initial_frames =
@@ -183,7 +192,7 @@
AdaptiveDigitalGainApplier::FrameInfo info = kFrameInfo;
info.input_level_dbfs = initial_level_dbfs;
info.input_noise_level_dbfs = kWithNoiseDbfs;
- gain_applier.Process(info, fake_audio.float_frame_view());
+ helper.gain_applier.Process(info, fake_audio.float_frame_view());
// Wait so that the adaptive gain applier has time to lower the gain.
if (i > num_initial_frames) {
@@ -197,19 +206,17 @@
}
TEST(AutomaticGainController2GainApplier, CanHandlePositiveSpeechLevels) {
- ApmDataDumper apm_data_dumper(0);
- AdaptiveDigitalGainApplier gain_applier(&apm_data_dumper);
+ GainApplierHelper helper;
// Make one call with positive audio level values and settings.
VectorFloatFrame fake_audio(kStereo, kFrameLen10ms48kHz, 10000.f);
AdaptiveDigitalGainApplier::FrameInfo info = kFrameInfo;
info.input_level_dbfs = 5.f;
- gain_applier.Process(info, fake_audio.float_frame_view());
+ helper.gain_applier.Process(info, fake_audio.float_frame_view());
}
TEST(AutomaticGainController2GainApplier, AudioLevelLimitsGain) {
- ApmDataDumper apm_data_dumper(0);
- AdaptiveDigitalGainApplier gain_applier(&apm_data_dumper);
+ GainApplierHelper helper;
constexpr float initial_level_dbfs = -25.f;
constexpr int num_initial_frames =
@@ -224,7 +231,7 @@
info.input_level_dbfs = initial_level_dbfs;
info.limiter_envelope_dbfs = 1.f;
info.estimate_is_confident = false;
- gain_applier.Process(info, fake_audio.float_frame_view());
+ helper.gain_applier.Process(info, fake_audio.float_frame_view());
// Wait so that the adaptive gain applier has time to lower the gain.
if (i > num_initial_frames) {
@@ -247,7 +254,8 @@
const int adjacent_speech_frames_threshold = AdjacentSpeechFramesThreshold();
ApmDataDumper apm_data_dumper(0);
AdaptiveDigitalGainApplier gain_applier(&apm_data_dumper,
- adjacent_speech_frames_threshold);
+ adjacent_speech_frames_threshold,
+ kMaxGainChangePerFrameDb);
AdaptiveDigitalGainApplier::FrameInfo info = kFrameInfo;
info.input_level_dbfs = -25.0;
@@ -268,7 +276,8 @@
const int adjacent_speech_frames_threshold = AdjacentSpeechFramesThreshold();
ApmDataDumper apm_data_dumper(0);
AdaptiveDigitalGainApplier gain_applier(&apm_data_dumper,
- adjacent_speech_frames_threshold);
+ adjacent_speech_frames_threshold,
+ kMaxGainChangePerFrameDb);
AdaptiveDigitalGainApplier::FrameInfo info = kFrameInfo;
info.input_level_dbfs = -25.0;
diff --git a/modules/audio_processing/agc2/agc2_common.h b/modules/audio_processing/agc2/agc2_common.h
index 30880e2..5149ed6 100644
--- a/modules/audio_processing/agc2/agc2_common.h
+++ b/modules/audio_processing/agc2/agc2_common.h
@@ -26,9 +26,6 @@
constexpr float kAttackFilterConstant = 0.f;
// Adaptive digital gain applier settings below.
-constexpr float kMaxGainChangePerSecondDb = 3.f;
-constexpr float kMaxGainChangePerFrameDb =
- kMaxGainChangePerSecondDb * kFrameDurationMs / 1000.f;
constexpr float kHeadroomDbfs = 1.f;
constexpr float kMaxGainDb = 30.f;
constexpr float kInitialAdaptiveDigitalGainDb = 8.f;
@@ -51,7 +48,6 @@
// Robust VAD probability and speech decisions.
constexpr float kDefaultSmoothedVadProbabilityAttack = 1.f;
-constexpr int kDefaultDigitalGainApplierAdjacentSpeechFramesThreshold = 1;
constexpr int kDefaultLevelEstimatorAdjacentSpeechFramesThreshold = 1;
// Saturation Protector settings.
diff --git a/modules/audio_processing/include/audio_processing.h b/modules/audio_processing/include/audio_processing.h
index e96a45b..a56b315 100644
--- a/modules/audio_processing/include/audio_processing.h
+++ b/modules/audio_processing/include/audio_processing.h
@@ -353,6 +353,7 @@
float initial_saturation_margin_db = 20.f;
float extra_saturation_margin_db = 2.f;
int gain_applier_adjacent_speech_frames_threshold = 1;
+ float max_gain_change_db_per_second = 3.f;
} adaptive_digital;
} gain_controller2;