modules/audio_processing/agc2/adaptive_digital_gain_applier.cc - src - Git at Google

 /*
  *  Copyright (c) 2018 The WebRTC project authors. All Rights Reserved.
  *
  *  Use of this source code is governed by a BSD-style license
  *  that can be found in the LICENSE file in the root of the source
  *  tree. An additional intellectual property rights grant can be found
  *  in the file PATENTS.  All contributing project authors may
  *  be found in the AUTHORS file in the root of the source tree.
  */

 #include "modules/audio_processing/agc2/adaptive_digital_gain_applier.h"

 #include <algorithm>

 #include "common_audio/include/audio_util.h"
 #include "modules/audio_processing/agc2/agc2_common.h"
 #include "modules/audio_processing/logging/apm_data_dumper.h"
 #include "rtc_base/checks.h"
 #include "rtc_base/logging.h"
 #include "rtc_base/numerics/safe_minmax.h"
 #include "system_wrappers/include/metrics.h"

 namespace webrtc {
 namespace {

 constexpr int kHeadroomHistogramMin = 0;
 constexpr int kHeadroomHistogramMax = 50;

 // This function maps input level to desired applied gain. We want to
 // boost the signal so that peaks are at -kHeadroomDbfs. We can't
 // apply more than kMaxGainDb gain.
 float ComputeGainDb(float input_level_dbfs) {
   // If the level is very low, boost it as much as we can.
   if (input_level_dbfs < -(kHeadroomDbfs + kMaxGainDb)) {
     return kMaxGainDb;
   }
   // We expect to end up here most of the time: the level is below
   // -headroom, but we can boost it to -headroom.
   if (input_level_dbfs < -kHeadroomDbfs) {
     return -kHeadroomDbfs - input_level_dbfs;
   }
   // Otherwise, the level is too high and we can't boost.
   RTC_DCHECK_GE(input_level_dbfs, -kHeadroomDbfs);
   return 0.f;
 }

 // Returns `target_gain` if the output noise level is below
 // `max_output_noise_level_dbfs`; otherwise returns a capped gain so that the
 // output noise level equals `max_output_noise_level_dbfs`.
 float LimitGainByNoise(float target_gain,
                        float input_noise_level_dbfs,
                        float max_output_noise_level_dbfs,
                        ApmDataDumper& apm_data_dumper) {
   const float max_allowed_gain_db =
       max_output_noise_level_dbfs - input_noise_level_dbfs;
   apm_data_dumper.DumpRaw("agc2_adaptive_gain_applier_max_allowed_gain_db",
                           max_allowed_gain_db);
   return std::min(target_gain, std::max(max_allowed_gain_db, 0.f));
 }

 float LimitGainByLowConfidence(float target_gain,
                                float last_gain,
                                float limiter_audio_level_dbfs,
                                bool estimate_is_confident) {
   if (estimate_is_confident ||
       limiter_audio_level_dbfs <= kLimiterThresholdForAgcGainDbfs) {
     return target_gain;
   }
   const float limiter_level_before_gain = limiter_audio_level_dbfs - last_gain;

   // Compute a new gain so that `limiter_level_before_gain` + `new_target_gain`
   // is not great than `kLimiterThresholdForAgcGainDbfs`.
   const float new_target_gain = std::max(
       kLimiterThresholdForAgcGainDbfs - limiter_level_before_gain, 0.f);
   return std::min(new_target_gain, target_gain);
 }

 // Computes how the gain should change during this frame.
 // Return the gain difference in db to 'last_gain_db'.
 float ComputeGainChangeThisFrameDb(float target_gain_db,
                                    float last_gain_db,
                                    bool gain_increase_allowed,
                                    float max_gain_decrease_db,
                                    float max_gain_increase_db) {
   RTC_DCHECK_GT(max_gain_decrease_db, 0);
   RTC_DCHECK_GT(max_gain_increase_db, 0);
   float target_gain_difference_db = target_gain_db - last_gain_db;
   if (!gain_increase_allowed) {
     target_gain_difference_db = std::min(target_gain_difference_db, 0.f);
   }
   return rtc::SafeClamp(target_gain_difference_db, -max_gain_decrease_db,
                         max_gain_increase_db);
 }

 // Copies the (multichannel) audio samples from `src` into `dst`.
 void CopyAudio(AudioFrameView<const float> src,
                std::vector<std::vector<float>>& dst) {
   RTC_DCHECK_GT(src.num_channels(), 0);
   RTC_DCHECK_GT(src.samples_per_channel(), 0);
   RTC_DCHECK_EQ(dst.size(), src.num_channels());
   for (size_t c = 0; c < src.num_channels(); ++c) {
     rtc::ArrayView<const float> channel_view = src.channel(c);
     RTC_DCHECK_EQ(channel_view.size(), src.samples_per_channel());
     RTC_DCHECK_EQ(dst[c].size(), src.samples_per_channel());
     std::copy(channel_view.begin(), channel_view.end(), dst[c].begin());
   }
 }

 }  // namespace

 AdaptiveDigitalGainApplier::AdaptiveDigitalGainApplier(
     ApmDataDumper* apm_data_dumper,
     int adjacent_speech_frames_threshold,
     float max_gain_change_db_per_second,
     float max_output_noise_level_dbfs,
     bool dry_run)
     : apm_data_dumper_(apm_data_dumper),
       gain_applier_(
           /*hard_clip_samples=*/false,
           /*initial_gain_factor=*/DbToRatio(kInitialAdaptiveDigitalGainDb)),
       adjacent_speech_frames_threshold_(adjacent_speech_frames_threshold),
       max_gain_change_db_per_10ms_(max_gain_change_db_per_second *
                                    kFrameDurationMs / 1000.f),
       max_output_noise_level_dbfs_(max_output_noise_level_dbfs),
       dry_run_(dry_run),
       calls_since_last_gain_log_(0),
       frames_to_gain_increase_allowed_(adjacent_speech_frames_threshold_),
       last_gain_db_(kInitialAdaptiveDigitalGainDb) {
   RTC_DCHECK_GT(max_gain_change_db_per_second, 0.0f);
   RTC_DCHECK_GE(frames_to_gain_increase_allowed_, 1);
   RTC_DCHECK_GE(max_output_noise_level_dbfs_, -90.0f);
   RTC_DCHECK_LE(max_output_noise_level_dbfs_, 0.0f);
   Initialize(/*sample_rate_hz=*/48000, /*num_channels=*/1);
 }

 void AdaptiveDigitalGainApplier::Initialize(int sample_rate_hz,
                                             int num_channels) {
   if (!dry_run_) {
     return;
   }
   RTC_DCHECK_GT(sample_rate_hz, 0);
   RTC_DCHECK_GT(num_channels, 0);
   int frame_size = rtc::CheckedDivExact(sample_rate_hz, 100);
   bool sample_rate_changed =
       dry_run_frame_.empty() ||  // Handle initialization.
       dry_run_frame_[0].size() != static_cast<size_t>(frame_size);
   bool num_channels_changed =
       dry_run_channels_.size() != static_cast<size_t>(num_channels);
   if (sample_rate_changed || num_channels_changed) {
     // Resize the multichannel audio vector and update the channel pointers.
     dry_run_frame_.resize(num_channels);
     dry_run_channels_.resize(num_channels);
     for (int c = 0; c < num_channels; ++c) {
       dry_run_frame_[c].resize(frame_size);
       dry_run_channels_[c] = dry_run_frame_[c].data();
     }
   }
 }

 void AdaptiveDigitalGainApplier::Process(const FrameInfo& info,
                                          AudioFrameView<float> frame) {
   RTC_DCHECK_GE(info.speech_level_dbfs, -150.f);
   RTC_DCHECK_GE(frame.num_channels(), 1);
   RTC_DCHECK(
       frame.samples_per_channel() == 80 || frame.samples_per_channel() == 160 ||
       frame.samples_per_channel() == 320 || frame.samples_per_channel() == 480)
       << "`frame` does not look like a 10 ms frame for an APM supported sample "
          "rate";

   // Compute the input level used to select the desired gain.
   RTC_DCHECK_GT(info.headroom_db, 0.0f);
   const float input_level_dbfs = info.speech_level_dbfs + info.headroom_db;

   const float target_gain_db = LimitGainByLowConfidence(
       LimitGainByNoise(ComputeGainDb(input_level_dbfs), info.noise_rms_dbfs,
                        max_output_noise_level_dbfs_, *apm_data_dumper_),
       last_gain_db_, info.limiter_envelope_dbfs, info.speech_level_reliable);

   // Forbid increasing the gain until enough adjacent speech frames are
   // observed.
   bool first_confident_speech_frame = false;
   if (info.speech_probability < kVadConfidenceThreshold) {
     frames_to_gain_increase_allowed_ = adjacent_speech_frames_threshold_;
   } else if (frames_to_gain_increase_allowed_ > 0) {
     frames_to_gain_increase_allowed_--;
     first_confident_speech_frame = frames_to_gain_increase_allowed_ == 0;
   }
   apm_data_dumper_->DumpRaw(
       "agc2_adaptive_gain_applier_frames_to_gain_increase_allowed",
       frames_to_gain_increase_allowed_);

   const bool gain_increase_allowed = frames_to_gain_increase_allowed_ == 0;

   float max_gain_increase_db = max_gain_change_db_per_10ms_;
   if (first_confident_speech_frame) {
     // No gain increase happened while waiting for a long enough speech
     // sequence. Therefore, temporarily allow a faster gain increase.
     RTC_DCHECK(gain_increase_allowed);
     max_gain_increase_db *= adjacent_speech_frames_threshold_;
   }

   const float gain_change_this_frame_db = ComputeGainChangeThisFrameDb(
       target_gain_db, last_gain_db_, gain_increase_allowed,
       /*max_gain_decrease_db=*/max_gain_change_db_per_10ms_,
       max_gain_increase_db);

   apm_data_dumper_->DumpRaw("agc2_adaptive_gain_applier_want_to_change_by_db",
                             target_gain_db - last_gain_db_);
   apm_data_dumper_->DumpRaw("agc2_adaptive_gain_applier_will_change_by_db",
                             gain_change_this_frame_db);

   // Optimization: avoid calling math functions if gain does not
   // change.
   if (gain_change_this_frame_db != 0.f) {
     gain_applier_.SetGainFactor(
         DbToRatio(last_gain_db_ + gain_change_this_frame_db));
   }

   // Modify `frame` only if not running in "dry run" mode.
   if (!dry_run_) {
     gain_applier_.ApplyGain(frame);
   } else {
     // Copy `frame` so that `ApplyGain()` is called (on a copy).
     CopyAudio(frame, dry_run_frame_);
     RTC_DCHECK(!dry_run_channels_.empty());
     AudioFrameView<float> frame_copy(&dry_run_channels_[0],
                                      frame.num_channels(),
                                      frame.samples_per_channel());
     gain_applier_.ApplyGain(frame_copy);
   }

   // Remember that the gain has changed for the next iteration.
   last_gain_db_ = last_gain_db_ + gain_change_this_frame_db;
   apm_data_dumper_->DumpRaw("agc2_adaptive_gain_applier_applied_gain_db",
                             last_gain_db_);

   // Log every 10 seconds.
   calls_since_last_gain_log_++;
   if (calls_since_last_gain_log_ == 1000) {
     calls_since_last_gain_log_ = 0;
     RTC_HISTOGRAM_COUNTS_LINEAR("WebRTC.Audio.Agc2.EstimatedSpeechLevel",
                                 -info.speech_level_dbfs, 0, 100, 101);
     RTC_HISTOGRAM_COUNTS_LINEAR("WebRTC.Audio.Agc2.EstimatedNoiseLevel",
                                 -info.noise_rms_dbfs, 0, 100, 101);
     RTC_HISTOGRAM_COUNTS_LINEAR(
         "WebRTC.Audio.Agc2.Headroom", info.headroom_db, kHeadroomHistogramMin,
         kHeadroomHistogramMax,
         kHeadroomHistogramMax - kHeadroomHistogramMin + 1);
     RTC_HISTOGRAM_COUNTS_LINEAR("WebRTC.Audio.Agc2.DigitalGainApplied",
                                 last_gain_db_, 0, kMaxGainDb, kMaxGainDb + 1);
     RTC_LOG(LS_INFO) << "AGC2 adaptive digital"
                      << " | speech_dbfs: " << info.speech_level_dbfs
                      << " | noise_dbfs: " << info.noise_rms_dbfs
                      << " | headroom_db: " << info.headroom_db
                      << " | gain_db: " << last_gain_db_;
   }
 }

 }  // namespace webrtc
	/*
	* Copyright (c) 2018 The WebRTC project authors. All Rights Reserved.
	*
	* Use of this source code is governed by a BSD-style license
	* that can be found in the LICENSE file in the root of the source
	* tree. An additional intellectual property rights grant can be found
	* in the file PATENTS. All contributing project authors may
	* be found in the AUTHORS file in the root of the source tree.
	*/

	#include "modules/audio_processing/agc2/adaptive_digital_gain_applier.h"

	#include <algorithm>

	#include "common_audio/include/audio_util.h"
	#include "modules/audio_processing/agc2/agc2_common.h"
	#include "modules/audio_processing/logging/apm_data_dumper.h"
	#include "rtc_base/checks.h"
	#include "rtc_base/logging.h"
	#include "rtc_base/numerics/safe_minmax.h"
	#include "system_wrappers/include/metrics.h"

	namespace webrtc {
	namespace {

	constexpr int kHeadroomHistogramMin = 0;
	constexpr int kHeadroomHistogramMax = 50;

	// This function maps input level to desired applied gain. We want to
	// boost the signal so that peaks are at -kHeadroomDbfs. We can't
	// apply more than kMaxGainDb gain.
	float ComputeGainDb(float input_level_dbfs) {
	// If the level is very low, boost it as much as we can.
	if (input_level_dbfs < -(kHeadroomDbfs + kMaxGainDb)) {
	return kMaxGainDb;
	}
	// We expect to end up here most of the time: the level is below
	// -headroom, but we can boost it to -headroom.
	if (input_level_dbfs < -kHeadroomDbfs) {
	return -kHeadroomDbfs - input_level_dbfs;
	}
	// Otherwise, the level is too high and we can't boost.
	RTC_DCHECK_GE(input_level_dbfs, -kHeadroomDbfs);
	return 0.f;
	}

	// Returns `target_gain` if the output noise level is below
	// `max_output_noise_level_dbfs`; otherwise returns a capped gain so that the
	// output noise level equals `max_output_noise_level_dbfs`.
	float LimitGainByNoise(float target_gain,
	float input_noise_level_dbfs,
	float max_output_noise_level_dbfs,
	ApmDataDumper& apm_data_dumper) {
	const float max_allowed_gain_db =
	max_output_noise_level_dbfs - input_noise_level_dbfs;
	apm_data_dumper.DumpRaw("agc2_adaptive_gain_applier_max_allowed_gain_db",
	max_allowed_gain_db);
	return std::min(target_gain, std::max(max_allowed_gain_db, 0.f));
	}

	float LimitGainByLowConfidence(float target_gain,
	float last_gain,
	float limiter_audio_level_dbfs,
	bool estimate_is_confident) {
	if (estimate_is_confident \|\|
	limiter_audio_level_dbfs <= kLimiterThresholdForAgcGainDbfs) {
	return target_gain;
	}
	const float limiter_level_before_gain = limiter_audio_level_dbfs - last_gain;

	// Compute a new gain so that `limiter_level_before_gain` + `new_target_gain`
	// is not great than `kLimiterThresholdForAgcGainDbfs`.
	const float new_target_gain = std::max(
	kLimiterThresholdForAgcGainDbfs - limiter_level_before_gain, 0.f);
	return std::min(new_target_gain, target_gain);
	}

	// Computes how the gain should change during this frame.
	// Return the gain difference in db to 'last_gain_db'.
	float ComputeGainChangeThisFrameDb(float target_gain_db,
	float last_gain_db,
	bool gain_increase_allowed,
	float max_gain_decrease_db,
	float max_gain_increase_db) {
	RTC_DCHECK_GT(max_gain_decrease_db, 0);
	RTC_DCHECK_GT(max_gain_increase_db, 0);
	float target_gain_difference_db = target_gain_db - last_gain_db;
	if (!gain_increase_allowed) {
	target_gain_difference_db = std::min(target_gain_difference_db, 0.f);
	}
	return rtc::SafeClamp(target_gain_difference_db, -max_gain_decrease_db,
	max_gain_increase_db);
	}

	// Copies the (multichannel) audio samples from `src` into `dst`.
	void CopyAudio(AudioFrameView<const float> src,
	std::vector<std::vector<float>>& dst) {
	RTC_DCHECK_GT(src.num_channels(), 0);
	RTC_DCHECK_GT(src.samples_per_channel(), 0);
	RTC_DCHECK_EQ(dst.size(), src.num_channels());
	for (size_t c = 0; c < src.num_channels(); ++c) {
	rtc::ArrayView<const float> channel_view = src.channel(c);
	RTC_DCHECK_EQ(channel_view.size(), src.samples_per_channel());
	RTC_DCHECK_EQ(dst[c].size(), src.samples_per_channel());
	std::copy(channel_view.begin(), channel_view.end(), dst[c].begin());
	}
	}

	} // namespace

	AdaptiveDigitalGainApplier::AdaptiveDigitalGainApplier(
	ApmDataDumper* apm_data_dumper,
	int adjacent_speech_frames_threshold,
	float max_gain_change_db_per_second,
	float max_output_noise_level_dbfs,
	bool dry_run)
	: apm_data_dumper_(apm_data_dumper),
	gain_applier_(
	/hard_clip_samples=/false,
	/initial_gain_factor=/DbToRatio(kInitialAdaptiveDigitalGainDb)),
	adjacent_speech_frames_threshold_(adjacent_speech_frames_threshold),
	max_gain_change_db_per_10ms_(max_gain_change_db_per_second *
	kFrameDurationMs / 1000.f),
	max_output_noise_level_dbfs_(max_output_noise_level_dbfs),
	dry_run_(dry_run),
	calls_since_last_gain_log_(0),
	frames_to_gain_increase_allowed_(adjacent_speech_frames_threshold_),
	last_gain_db_(kInitialAdaptiveDigitalGainDb) {
	RTC_DCHECK_GT(max_gain_change_db_per_second, 0.0f);
	RTC_DCHECK_GE(frames_to_gain_increase_allowed_, 1);
	RTC_DCHECK_GE(max_output_noise_level_dbfs_, -90.0f);
	RTC_DCHECK_LE(max_output_noise_level_dbfs_, 0.0f);
	Initialize(/sample_rate_hz=/48000, /num_channels=/1);
	}

	void AdaptiveDigitalGainApplier::Initialize(int sample_rate_hz,
	int num_channels) {
	if (!dry_run_) {
	return;
	}
	RTC_DCHECK_GT(sample_rate_hz, 0);
	RTC_DCHECK_GT(num_channels, 0);
	int frame_size = rtc::CheckedDivExact(sample_rate_hz, 100);
	bool sample_rate_changed =
	dry_run_frame_.empty() \|\| // Handle initialization.
	dry_run_frame_[0].size() != static_cast<size_t>(frame_size);
	bool num_channels_changed =
	dry_run_channels_.size() != static_cast<size_t>(num_channels);
	if (sample_rate_changed \|\| num_channels_changed) {
	// Resize the multichannel audio vector and update the channel pointers.
	dry_run_frame_.resize(num_channels);
	dry_run_channels_.resize(num_channels);
	for (int c = 0; c < num_channels; ++c) {
	dry_run_frame_[c].resize(frame_size);
	dry_run_channels_[c] = dry_run_frame_[c].data();
	}
	}
	}

	void AdaptiveDigitalGainApplier::Process(const FrameInfo& info,
	AudioFrameView<float> frame) {
	RTC_DCHECK_GE(info.speech_level_dbfs, -150.f);
	RTC_DCHECK_GE(frame.num_channels(), 1);
	RTC_DCHECK(
	frame.samples_per_channel() == 80 \|\| frame.samples_per_channel() == 160 \|\|
	frame.samples_per_channel() == 320 \|\| frame.samples_per_channel() == 480)
	<< "`frame` does not look like a 10 ms frame for an APM supported sample "
	"rate";

	// Compute the input level used to select the desired gain.
	RTC_DCHECK_GT(info.headroom_db, 0.0f);
	const float input_level_dbfs = info.speech_level_dbfs + info.headroom_db;

	const float target_gain_db = LimitGainByLowConfidence(
	LimitGainByNoise(ComputeGainDb(input_level_dbfs), info.noise_rms_dbfs,
	max_output_noise_level_dbfs_, *apm_data_dumper_),
	last_gain_db_, info.limiter_envelope_dbfs, info.speech_level_reliable);

	// Forbid increasing the gain until enough adjacent speech frames are
	// observed.
	bool first_confident_speech_frame = false;
	if (info.speech_probability < kVadConfidenceThreshold) {
	frames_to_gain_increase_allowed_ = adjacent_speech_frames_threshold_;
	} else if (frames_to_gain_increase_allowed_ > 0) {
	frames_to_gain_increase_allowed_--;
	first_confident_speech_frame = frames_to_gain_increase_allowed_ == 0;
	}
	apm_data_dumper_->DumpRaw(
	"agc2_adaptive_gain_applier_frames_to_gain_increase_allowed",
	frames_to_gain_increase_allowed_);

	const bool gain_increase_allowed = frames_to_gain_increase_allowed_ == 0;

	float max_gain_increase_db = max_gain_change_db_per_10ms_;
	if (first_confident_speech_frame) {
	// No gain increase happened while waiting for a long enough speech
	// sequence. Therefore, temporarily allow a faster gain increase.
	RTC_DCHECK(gain_increase_allowed);
	max_gain_increase_db *= adjacent_speech_frames_threshold_;
	}

	const float gain_change_this_frame_db = ComputeGainChangeThisFrameDb(
	target_gain_db, last_gain_db_, gain_increase_allowed,
	/max_gain_decrease_db=/max_gain_change_db_per_10ms_,
	max_gain_increase_db);

	apm_data_dumper_->DumpRaw("agc2_adaptive_gain_applier_want_to_change_by_db",
	target_gain_db - last_gain_db_);
	apm_data_dumper_->DumpRaw("agc2_adaptive_gain_applier_will_change_by_db",
	gain_change_this_frame_db);

	// Optimization: avoid calling math functions if gain does not
	// change.
	if (gain_change_this_frame_db != 0.f) {
	gain_applier_.SetGainFactor(
	DbToRatio(last_gain_db_ + gain_change_this_frame_db));
	}

	// Modify `frame` only if not running in "dry run" mode.
	if (!dry_run_) {
	gain_applier_.ApplyGain(frame);
	} else {
	// Copy `frame` so that `ApplyGain()` is called (on a copy).
	CopyAudio(frame, dry_run_frame_);
	RTC_DCHECK(!dry_run_channels_.empty());
	AudioFrameView<float> frame_copy(&dry_run_channels_[0],
	frame.num_channels(),
	frame.samples_per_channel());
	gain_applier_.ApplyGain(frame_copy);
	}

	// Remember that the gain has changed for the next iteration.
	last_gain_db_ = last_gain_db_ + gain_change_this_frame_db;
	apm_data_dumper_->DumpRaw("agc2_adaptive_gain_applier_applied_gain_db",
	last_gain_db_);

	// Log every 10 seconds.
	calls_since_last_gain_log_++;
	if (calls_since_last_gain_log_ == 1000) {
	calls_since_last_gain_log_ = 0;
	RTC_HISTOGRAM_COUNTS_LINEAR("WebRTC.Audio.Agc2.EstimatedSpeechLevel",
	-info.speech_level_dbfs, 0, 100, 101);
	RTC_HISTOGRAM_COUNTS_LINEAR("WebRTC.Audio.Agc2.EstimatedNoiseLevel",
	-info.noise_rms_dbfs, 0, 100, 101);
	RTC_HISTOGRAM_COUNTS_LINEAR(
	"WebRTC.Audio.Agc2.Headroom", info.headroom_db, kHeadroomHistogramMin,
	kHeadroomHistogramMax,
	kHeadroomHistogramMax - kHeadroomHistogramMin + 1);
	RTC_HISTOGRAM_COUNTS_LINEAR("WebRTC.Audio.Agc2.DigitalGainApplied",
	last_gain_db_, 0, kMaxGainDb, kMaxGainDb + 1);
	RTC_LOG(LS_INFO) << "AGC2 adaptive digital"
	<< " \| speech_dbfs: " << info.speech_level_dbfs
	<< " \| noise_dbfs: " << info.noise_rms_dbfs
	<< " \| headroom_db: " << info.headroom_db
	<< " \| gain_db: " << last_gain_db_;
	}
	}

	} // namespace webrtc