blob: e59b110efeeeea746f9f6b399c58a00c742e6400 [file] [log] [blame]
/*
* Copyright (c) 2018 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include "modules/audio_processing/agc2/adaptive_digital_gain_applier.h"
#include <algorithm>
#include "common_audio/include/audio_util.h"
#include "modules/audio_processing/agc2/agc2_common.h"
#include "modules/audio_processing/logging/apm_data_dumper.h"
#include "rtc_base/checks.h"
#include "rtc_base/logging.h"
#include "rtc_base/numerics/safe_minmax.h"
#include "system_wrappers/include/metrics.h"
namespace webrtc {
namespace {
constexpr int kHeadroomHistogramMin = 0;
constexpr int kHeadroomHistogramMax = 50;
// This function maps input level to desired applied gain. We want to
// boost the signal so that peaks are at -kHeadroomDbfs. We can't
// apply more than kMaxGainDb gain.
float ComputeGainDb(float input_level_dbfs) {
// If the level is very low, boost it as much as we can.
if (input_level_dbfs < -(kHeadroomDbfs + kMaxGainDb)) {
return kMaxGainDb;
}
// We expect to end up here most of the time: the level is below
// -headroom, but we can boost it to -headroom.
if (input_level_dbfs < -kHeadroomDbfs) {
return -kHeadroomDbfs - input_level_dbfs;
}
// Otherwise, the level is too high and we can't boost.
RTC_DCHECK_GE(input_level_dbfs, -kHeadroomDbfs);
return 0.f;
}
// Returns `target_gain` if the output noise level is below
// `max_output_noise_level_dbfs`; otherwise returns a capped gain so that the
// output noise level equals `max_output_noise_level_dbfs`.
float LimitGainByNoise(float target_gain,
float input_noise_level_dbfs,
float max_output_noise_level_dbfs,
ApmDataDumper& apm_data_dumper) {
const float max_allowed_gain_db =
max_output_noise_level_dbfs - input_noise_level_dbfs;
apm_data_dumper.DumpRaw("agc2_adaptive_gain_applier_max_allowed_gain_db",
max_allowed_gain_db);
return std::min(target_gain, std::max(max_allowed_gain_db, 0.f));
}
float LimitGainByLowConfidence(float target_gain,
float last_gain,
float limiter_audio_level_dbfs,
bool estimate_is_confident) {
if (estimate_is_confident ||
limiter_audio_level_dbfs <= kLimiterThresholdForAgcGainDbfs) {
return target_gain;
}
const float limiter_level_before_gain = limiter_audio_level_dbfs - last_gain;
// Compute a new gain so that `limiter_level_before_gain` + `new_target_gain`
// is not great than `kLimiterThresholdForAgcGainDbfs`.
const float new_target_gain = std::max(
kLimiterThresholdForAgcGainDbfs - limiter_level_before_gain, 0.f);
return std::min(new_target_gain, target_gain);
}
// Computes how the gain should change during this frame.
// Return the gain difference in db to 'last_gain_db'.
float ComputeGainChangeThisFrameDb(float target_gain_db,
float last_gain_db,
bool gain_increase_allowed,
float max_gain_decrease_db,
float max_gain_increase_db) {
RTC_DCHECK_GT(max_gain_decrease_db, 0);
RTC_DCHECK_GT(max_gain_increase_db, 0);
float target_gain_difference_db = target_gain_db - last_gain_db;
if (!gain_increase_allowed) {
target_gain_difference_db = std::min(target_gain_difference_db, 0.f);
}
return rtc::SafeClamp(target_gain_difference_db, -max_gain_decrease_db,
max_gain_increase_db);
}
// Copies the (multichannel) audio samples from `src` into `dst`.
void CopyAudio(AudioFrameView<const float> src,
std::vector<std::vector<float>>& dst) {
RTC_DCHECK_GT(src.num_channels(), 0);
RTC_DCHECK_GT(src.samples_per_channel(), 0);
RTC_DCHECK_EQ(dst.size(), src.num_channels());
for (size_t c = 0; c < src.num_channels(); ++c) {
rtc::ArrayView<const float> channel_view = src.channel(c);
RTC_DCHECK_EQ(channel_view.size(), src.samples_per_channel());
RTC_DCHECK_EQ(dst[c].size(), src.samples_per_channel());
std::copy(channel_view.begin(), channel_view.end(), dst[c].begin());
}
}
} // namespace
AdaptiveDigitalGainApplier::AdaptiveDigitalGainApplier(
ApmDataDumper* apm_data_dumper,
int adjacent_speech_frames_threshold,
float max_gain_change_db_per_second,
float max_output_noise_level_dbfs,
bool dry_run)
: apm_data_dumper_(apm_data_dumper),
gain_applier_(
/*hard_clip_samples=*/false,
/*initial_gain_factor=*/DbToRatio(kInitialAdaptiveDigitalGainDb)),
adjacent_speech_frames_threshold_(adjacent_speech_frames_threshold),
max_gain_change_db_per_10ms_(max_gain_change_db_per_second *
kFrameDurationMs / 1000.f),
max_output_noise_level_dbfs_(max_output_noise_level_dbfs),
dry_run_(dry_run),
calls_since_last_gain_log_(0),
frames_to_gain_increase_allowed_(adjacent_speech_frames_threshold_),
last_gain_db_(kInitialAdaptiveDigitalGainDb) {
RTC_DCHECK_GT(max_gain_change_db_per_second, 0.0f);
RTC_DCHECK_GE(frames_to_gain_increase_allowed_, 1);
RTC_DCHECK_GE(max_output_noise_level_dbfs_, -90.0f);
RTC_DCHECK_LE(max_output_noise_level_dbfs_, 0.0f);
Initialize(/*sample_rate_hz=*/48000, /*num_channels=*/1);
}
void AdaptiveDigitalGainApplier::Initialize(int sample_rate_hz,
int num_channels) {
if (!dry_run_) {
return;
}
RTC_DCHECK_GT(sample_rate_hz, 0);
RTC_DCHECK_GT(num_channels, 0);
int frame_size = rtc::CheckedDivExact(sample_rate_hz, 100);
bool sample_rate_changed =
dry_run_frame_.empty() || // Handle initialization.
dry_run_frame_[0].size() != static_cast<size_t>(frame_size);
bool num_channels_changed =
dry_run_channels_.size() != static_cast<size_t>(num_channels);
if (sample_rate_changed || num_channels_changed) {
// Resize the multichannel audio vector and update the channel pointers.
dry_run_frame_.resize(num_channels);
dry_run_channels_.resize(num_channels);
for (int c = 0; c < num_channels; ++c) {
dry_run_frame_[c].resize(frame_size);
dry_run_channels_[c] = dry_run_frame_[c].data();
}
}
}
void AdaptiveDigitalGainApplier::Process(const FrameInfo& info,
AudioFrameView<float> frame) {
RTC_DCHECK_GE(info.speech_level_dbfs, -150.f);
RTC_DCHECK_GE(frame.num_channels(), 1);
RTC_DCHECK(
frame.samples_per_channel() == 80 || frame.samples_per_channel() == 160 ||
frame.samples_per_channel() == 320 || frame.samples_per_channel() == 480)
<< "`frame` does not look like a 10 ms frame for an APM supported sample "
"rate";
// Compute the input level used to select the desired gain.
RTC_DCHECK_GT(info.headroom_db, 0.0f);
const float input_level_dbfs = info.speech_level_dbfs + info.headroom_db;
const float target_gain_db = LimitGainByLowConfidence(
LimitGainByNoise(ComputeGainDb(input_level_dbfs), info.noise_rms_dbfs,
max_output_noise_level_dbfs_, *apm_data_dumper_),
last_gain_db_, info.limiter_envelope_dbfs, info.speech_level_reliable);
// Forbid increasing the gain until enough adjacent speech frames are
// observed.
bool first_confident_speech_frame = false;
if (info.speech_probability < kVadConfidenceThreshold) {
frames_to_gain_increase_allowed_ = adjacent_speech_frames_threshold_;
} else if (frames_to_gain_increase_allowed_ > 0) {
frames_to_gain_increase_allowed_--;
first_confident_speech_frame = frames_to_gain_increase_allowed_ == 0;
}
apm_data_dumper_->DumpRaw(
"agc2_adaptive_gain_applier_frames_to_gain_increase_allowed",
frames_to_gain_increase_allowed_);
const bool gain_increase_allowed = frames_to_gain_increase_allowed_ == 0;
float max_gain_increase_db = max_gain_change_db_per_10ms_;
if (first_confident_speech_frame) {
// No gain increase happened while waiting for a long enough speech
// sequence. Therefore, temporarily allow a faster gain increase.
RTC_DCHECK(gain_increase_allowed);
max_gain_increase_db *= adjacent_speech_frames_threshold_;
}
const float gain_change_this_frame_db = ComputeGainChangeThisFrameDb(
target_gain_db, last_gain_db_, gain_increase_allowed,
/*max_gain_decrease_db=*/max_gain_change_db_per_10ms_,
max_gain_increase_db);
apm_data_dumper_->DumpRaw("agc2_adaptive_gain_applier_want_to_change_by_db",
target_gain_db - last_gain_db_);
apm_data_dumper_->DumpRaw("agc2_adaptive_gain_applier_will_change_by_db",
gain_change_this_frame_db);
// Optimization: avoid calling math functions if gain does not
// change.
if (gain_change_this_frame_db != 0.f) {
gain_applier_.SetGainFactor(
DbToRatio(last_gain_db_ + gain_change_this_frame_db));
}
// Modify `frame` only if not running in "dry run" mode.
if (!dry_run_) {
gain_applier_.ApplyGain(frame);
} else {
// Copy `frame` so that `ApplyGain()` is called (on a copy).
CopyAudio(frame, dry_run_frame_);
RTC_DCHECK(!dry_run_channels_.empty());
AudioFrameView<float> frame_copy(&dry_run_channels_[0],
frame.num_channels(),
frame.samples_per_channel());
gain_applier_.ApplyGain(frame_copy);
}
// Remember that the gain has changed for the next iteration.
last_gain_db_ = last_gain_db_ + gain_change_this_frame_db;
apm_data_dumper_->DumpRaw("agc2_adaptive_gain_applier_applied_gain_db",
last_gain_db_);
// Log every 10 seconds.
calls_since_last_gain_log_++;
if (calls_since_last_gain_log_ == 1000) {
calls_since_last_gain_log_ = 0;
RTC_HISTOGRAM_COUNTS_LINEAR("WebRTC.Audio.Agc2.EstimatedSpeechLevel",
-info.speech_level_dbfs, 0, 100, 101);
RTC_HISTOGRAM_COUNTS_LINEAR("WebRTC.Audio.Agc2.EstimatedNoiseLevel",
-info.noise_rms_dbfs, 0, 100, 101);
RTC_HISTOGRAM_COUNTS_LINEAR(
"WebRTC.Audio.Agc2.Headroom", info.headroom_db, kHeadroomHistogramMin,
kHeadroomHistogramMax,
kHeadroomHistogramMax - kHeadroomHistogramMin + 1);
RTC_HISTOGRAM_COUNTS_LINEAR("WebRTC.Audio.Agc2.DigitalGainApplied",
last_gain_db_, 0, kMaxGainDb, kMaxGainDb + 1);
RTC_LOG(LS_INFO) << "AGC2 adaptive digital"
<< " | speech_dbfs: " << info.speech_level_dbfs
<< " | noise_dbfs: " << info.noise_rms_dbfs
<< " | headroom_db: " << info.headroom_db
<< " | gain_db: " << last_gain_db_;
}
}
} // namespace webrtc