This CL adds functionality in the level controller to
receive a signal level to use initially, instead of the
default initial signal level.
BUG=
Review-Url: https://codereview.webrtc.org/2254973003
Cr-Commit-Position: refs/heads/master@{#13931}
diff --git a/webrtc/media/engine/fakewebrtcvoiceengine.h b/webrtc/media/engine/fakewebrtcvoiceengine.h
index c8fb9cf..f27810c 100644
--- a/webrtc/media/engine/fakewebrtcvoiceengine.h
+++ b/webrtc/media/engine/fakewebrtcvoiceengine.h
@@ -93,6 +93,7 @@
WEBRTC_STUB_CONST(stream_delay_ms, ());
WEBRTC_BOOL_STUB_CONST(was_stream_delay_set, ());
WEBRTC_VOID_STUB(set_stream_key_pressed, (bool key_pressed));
+ WEBRTC_VOID_STUB(SetLevelControllerInitialLevel, (float level));
WEBRTC_VOID_STUB(set_delay_offset_ms, (int offset));
WEBRTC_STUB_CONST(delay_offset_ms, ());
WEBRTC_STUB(StartDebugRecording,
diff --git a/webrtc/modules/audio_processing/audio_processing_impl.cc b/webrtc/modules/audio_processing/audio_processing_impl.cc
index 3b3a951..750bcb6 100644
--- a/webrtc/modules/audio_processing/audio_processing_impl.cc
+++ b/webrtc/modules/audio_processing/audio_processing_impl.cc
@@ -992,6 +992,11 @@
capture_.key_pressed = key_pressed;
}
+void AudioProcessingImpl::SetLevelControllerInitialLevel(float level) {
+ rtc::CritScope cs(&crit_capture_);
+ private_submodules_->level_controller->SetInitialLevel(level);
+}
+
void AudioProcessingImpl::set_delay_offset_ms(int offset) {
rtc::CritScope cs(&crit_capture_);
capture_.delay_offset_ms = offset;
diff --git a/webrtc/modules/audio_processing/audio_processing_impl.h b/webrtc/modules/audio_processing/audio_processing_impl.h
index 4b9011d..5ba3638 100644
--- a/webrtc/modules/audio_processing/audio_processing_impl.h
+++ b/webrtc/modules/audio_processing/audio_processing_impl.h
@@ -82,6 +82,7 @@
void set_delay_offset_ms(int offset) override;
int delay_offset_ms() const override;
void set_stream_key_pressed(bool key_pressed) override;
+ void SetLevelControllerInitialLevel(float level) override;
// Render-side exclusive methods possibly running APM in a
// multi-threaded manner. Acquire the render lock.
diff --git a/webrtc/modules/audio_processing/include/audio_processing.h b/webrtc/modules/audio_processing/include/audio_processing.h
index 09e5d5b..2f1ae99 100644
--- a/webrtc/modules/audio_processing/include/audio_processing.h
+++ b/webrtc/modules/audio_processing/include/audio_processing.h
@@ -410,6 +410,11 @@
// with this chunk of audio.
virtual void set_stream_key_pressed(bool key_pressed) = 0;
+ // Sets the initial peak level to use inside the level controller in order
+ // to compute the signal gain. The unit for the peak level is dBFS and
+ // the allowed range is [-100, 0].
+ virtual void SetLevelControllerInitialLevel(float level) = 0;
+
// Sets a delay |offset| in ms to add to the values passed in through
// set_stream_delay_ms(). May be positive or negative.
//
diff --git a/webrtc/modules/audio_processing/level_controller/gain_selector.cc b/webrtc/modules/audio_processing/level_controller/gain_selector.cc
index 2accd71..80d9c0f 100644
--- a/webrtc/modules/audio_processing/level_controller/gain_selector.cc
+++ b/webrtc/modules/audio_processing/level_controller/gain_selector.cc
@@ -42,10 +42,12 @@
float GainSelector::GetNewGain(float peak_level,
float noise_energy,
float saturating_gain,
+ bool gain_jumpstart,
SignalClassifier::SignalType signal_type) {
RTC_DCHECK_LT(0.f, peak_level);
- if (signal_type == SignalClassifier::SignalType::kHighlyNonStationary) {
+ if (signal_type == SignalClassifier::SignalType::kHighlyNonStationary ||
+ gain_jumpstart) {
highly_nonstationary_signal_hold_counter_ = 100;
} else {
highly_nonstationary_signal_hold_counter_ =
diff --git a/webrtc/modules/audio_processing/level_controller/gain_selector.h b/webrtc/modules/audio_processing/level_controller/gain_selector.h
index 3d00499..78b9101 100644
--- a/webrtc/modules/audio_processing/level_controller/gain_selector.h
+++ b/webrtc/modules/audio_processing/level_controller/gain_selector.h
@@ -24,6 +24,7 @@
float GetNewGain(float peak_level,
float noise_energy,
float saturating_gain,
+ bool gain_jumpstart,
SignalClassifier::SignalType signal_type);
private:
diff --git a/webrtc/modules/audio_processing/level_controller/level_controller.cc b/webrtc/modules/audio_processing/level_controller/level_controller.cc
index a9fed9b..07618e3 100644
--- a/webrtc/modules/audio_processing/level_controller/level_controller.cc
+++ b/webrtc/modules/audio_processing/level_controller/level_controller.cc
@@ -155,6 +155,11 @@
LevelController::~LevelController() {}
+void LevelController::SetInitialLevel(float level) {
+ peak_level_estimator_.SetInitialPeakLevel(level);
+ gain_jumpstart_ = true;
+}
+
void LevelController::Initialize(int sample_rate_hz) {
RTC_DCHECK(sample_rate_hz == AudioProcessing::kSampleRate8kHz ||
sample_rate_hz == AudioProcessing::kSampleRate16kHz ||
@@ -206,8 +211,11 @@
float saturating_gain = saturating_gain_estimator_.GetGain();
// Compute the new gain to apply.
- last_gain_ = gain_selector_.GetNewGain(peak_level, noise_energy,
- saturating_gain, signal_type);
+ last_gain_ = gain_selector_.GetNewGain(
+ peak_level, noise_energy, saturating_gain, gain_jumpstart_, signal_type);
+
+ // Unflag the jumpstart of the gain as it should only happen once.
+ gain_jumpstart_ = false;
// Apply the gain to the signal.
int num_saturations = gain_applier_.Process(last_gain_, audio);
diff --git a/webrtc/modules/audio_processing/level_controller/level_controller.h b/webrtc/modules/audio_processing/level_controller/level_controller.h
index 3d203f9..1d7f174 100644
--- a/webrtc/modules/audio_processing/level_controller/level_controller.h
+++ b/webrtc/modules/audio_processing/level_controller/level_controller.h
@@ -38,6 +38,11 @@
void Process(AudioBuffer* audio);
float GetLastGain() { return last_gain_; }
+ // Sets the initial peak level to use inside the level controller in order
+ // to compute the signal gain. The unit for the peak level is dBFS and
+ // the allowed range is [-100, 0].
+ void SetInitialLevel(float level);
+
private:
class Metrics {
public:
@@ -71,6 +76,7 @@
float dc_level_[2];
float dc_forgetting_factor_;
float last_gain_;
+ bool gain_jumpstart_ = false;
RTC_DISALLOW_COPY_AND_ASSIGN(LevelController);
};
diff --git a/webrtc/modules/audio_processing/level_controller/level_controller_unittest.cc b/webrtc/modules/audio_processing/level_controller/level_controller_unittest.cc
index c470c2f..74d129f 100644
--- a/webrtc/modules/audio_processing/level_controller/level_controller_unittest.cc
+++ b/webrtc/modules/audio_processing/level_controller/level_controller_unittest.cc
@@ -12,6 +12,7 @@
#include "testing/gtest/include/gtest/gtest.h"
#include "webrtc/base/array_view.h"
+#include "webrtc/base/optional.h"
#include "webrtc/modules/audio_processing/audio_buffer.h"
#include "webrtc/modules/audio_processing/include/audio_processing.h"
#include "webrtc/modules/audio_processing/level_controller/level_controller.h"
@@ -27,9 +28,13 @@
// any errors.
void RunBitexactnessTest(int sample_rate_hz,
size_t num_channels,
+ rtc::Optional<float> initial_level,
rtc::ArrayView<const float> output_reference) {
LevelController level_controller;
level_controller.Initialize(sample_rate_hz);
+ if (initial_level) {
+ level_controller.SetInitialLevel(*initial_level);
+ }
int samples_per_channel = rtc::CheckedDivExact(sample_rate_hz, 100);
const StreamConfig capture_config(sample_rate_hz, num_channels, false);
@@ -71,19 +76,19 @@
TEST(LevelControlBitExactnessTest, DISABLED_Mono8kHz) {
const float kOutputReference[] = {-0.013939f, -0.012154f, -0.009054f};
RunBitexactnessTest(AudioProcessing::kSampleRate8kHz, 1,
- kOutputReference);
+ rtc::Optional<float>(), kOutputReference);
}
TEST(LevelControlBitExactnessTest, DISABLED_Mono16kHz) {
const float kOutputReference[] = {-0.013706f, -0.013215f, -0.013018f};
RunBitexactnessTest(AudioProcessing::kSampleRate16kHz, 1,
- kOutputReference);
+ rtc::Optional<float>(), kOutputReference);
}
TEST(LevelControlBitExactnessTest, DISABLED_Mono32kHz) {
const float kOutputReference[] = {-0.014495f, -0.016425f, -0.016085f};
RunBitexactnessTest(AudioProcessing::kSampleRate32kHz, 1,
- kOutputReference);
+ rtc::Optional<float>(), kOutputReference);
}
// TODO(peah): Investigate why this particular testcase differ between Android
@@ -96,35 +101,41 @@
const float kOutputReference[] = {-0.015949f, -0.016957f, -0.019478f};
#endif
RunBitexactnessTest(AudioProcessing::kSampleRate48kHz, 1,
- kOutputReference);
+ rtc::Optional<float>(), kOutputReference);
}
TEST(LevelControlBitExactnessTest, DISABLED_Stereo8kHz) {
const float kOutputReference[] = {-0.014063f, -0.008450f, -0.012159f,
-0.051967f, -0.023202f, -0.047858f};
RunBitexactnessTest(AudioProcessing::kSampleRate8kHz, 2,
- kOutputReference);
+ rtc::Optional<float>(), kOutputReference);
}
TEST(LevelControlBitExactnessTest, DISABLED_Stereo16kHz) {
const float kOutputReference[] = {-0.012714f, -0.005896f, -0.012220f,
-0.053306f, -0.024549f, -0.051527f};
RunBitexactnessTest(AudioProcessing::kSampleRate16kHz, 2,
- kOutputReference);
+ rtc::Optional<float>(), kOutputReference);
}
TEST(LevelControlBitExactnessTest, DISABLED_Stereo32kHz) {
const float kOutputReference[] = {-0.011737f, -0.007018f, -0.013446f,
-0.053505f, -0.026292f, -0.056221f};
RunBitexactnessTest(AudioProcessing::kSampleRate32kHz, 2,
- kOutputReference);
+ rtc::Optional<float>(), kOutputReference);
}
TEST(LevelControlBitExactnessTest, DISABLED_Stereo48kHz) {
const float kOutputReference[] = {-0.010643f, -0.006334f, -0.011377f,
-0.049088f, -0.023600f, -0.050465f};
RunBitexactnessTest(AudioProcessing::kSampleRate48kHz, 2,
- kOutputReference);
+ rtc::Optional<float>(), kOutputReference);
+}
+
+TEST(LevelControlBitExactnessTest, DISABLED_MonoInitial48kHz) {
+ const float kOutputReference[] = {-0.013753f, -0.014623f, -0.016797f};
+ RunBitexactnessTest(AudioProcessing::kSampleRate48kHz, 1,
+ rtc::Optional<float>(2000), kOutputReference);
}
diff --git a/webrtc/modules/audio_processing/level_controller/peak_level_estimator.cc b/webrtc/modules/audio_processing/level_controller/peak_level_estimator.cc
index 2ba806c..9d4fe33 100644
--- a/webrtc/modules/audio_processing/level_controller/peak_level_estimator.cc
+++ b/webrtc/modules/audio_processing/level_controller/peak_level_estimator.cc
@@ -13,10 +13,14 @@
#include <algorithm>
#include "webrtc/modules/audio_processing/audio_buffer.h"
-#include "webrtc/modules/audio_processing/level_controller/lc_constants.h"
#include "webrtc/modules/audio_processing/logging/apm_data_dumper.h"
namespace webrtc {
+namespace {
+
+const float kMinLevel = 30.f;
+
+} // namespace
PeakLevelEstimator::PeakLevelEstimator() {
Initialize();
@@ -25,15 +29,26 @@
PeakLevelEstimator::~PeakLevelEstimator() {}
void PeakLevelEstimator::Initialize() {
- peak_level_ = kTargetLcPeakLevel;
+ peak_level_ = initial_peak_level_;
hold_counter_ = 0;
initialization_phase_ = true;
}
+void PeakLevelEstimator::SetInitialPeakLevel(float level) {
+ RTC_DCHECK_LE(-100.f, level);
+ RTC_DCHECK_GE(0.f, level);
+
+ float linear_level = std::pow(10.f, level / 20.f) * 32768.f;
+
+ // Limit the supplied level to the level range used internally.
+ initial_peak_level_ = std::max(linear_level, kMinLevel);
+ Initialize();
+}
+
float PeakLevelEstimator::Analyze(SignalClassifier::SignalType signal_type,
float frame_peak_level) {
if (frame_peak_level == 0) {
- RTC_DCHECK_LE(30.f, peak_level_);
+ RTC_DCHECK_LE(kMinLevel, peak_level_);
return peak_level_;
}
@@ -57,7 +72,7 @@
}
}
- peak_level_ = std::max(peak_level_, 30.f);
+ peak_level_ = std::max(peak_level_, kMinLevel);
return peak_level_;
}
diff --git a/webrtc/modules/audio_processing/level_controller/peak_level_estimator.h b/webrtc/modules/audio_processing/level_controller/peak_level_estimator.h
index 270bbc3..941dd36 100644
--- a/webrtc/modules/audio_processing/level_controller/peak_level_estimator.h
+++ b/webrtc/modules/audio_processing/level_controller/peak_level_estimator.h
@@ -12,6 +12,7 @@
#define WEBRTC_MODULES_AUDIO_PROCESSING_LEVEL_CONTROLLER_PEAK_LEVEL_ESTIMATOR_H_
#include "webrtc/base/constructormagic.h"
+#include "webrtc/modules/audio_processing/level_controller/lc_constants.h"
#include "webrtc/modules/audio_processing/level_controller/signal_classifier.h"
namespace webrtc {
@@ -23,9 +24,11 @@
void Initialize();
float Analyze(SignalClassifier::SignalType signal_type,
float frame_peak_level);
+ void SetInitialPeakLevel(float level);
private:
float peak_level_;
+ float initial_peak_level_ = kTargetLcPeakLevel;
int hold_counter_;
bool initialization_phase_;