modules/audio_processing/agc2/saturation_protector.cc - src - Git at Google

 /*
  *  Copyright (c) 2018 The WebRTC project authors. All Rights Reserved.
  *
  *  Use of this source code is governed by a BSD-style license
  *  that can be found in the LICENSE file in the root of the source
  *  tree. An additional intellectual property rights grant can be found
  *  in the file PATENTS.  All contributing project authors may
  *  be found in the AUTHORS file in the root of the source tree.
  */

 #include "modules/audio_processing/agc2/saturation_protector.h"

 #include <algorithm>
 #include <memory>

 #include "modules/audio_processing/agc2/agc2_common.h"
 #include "modules/audio_processing/agc2/saturation_protector_buffer.h"
 #include "modules/audio_processing/logging/apm_data_dumper.h"
 #include "rtc_base/numerics/safe_compare.h"
 #include "rtc_base/numerics/safe_minmax.h"

 namespace webrtc {
 namespace {

 constexpr int kPeakEnveloperSuperFrameLengthMs = 400;
 constexpr float kMinMarginDb = 12.0f;
 constexpr float kMaxMarginDb = 25.0f;
 constexpr float kAttack = 0.9988493699365052f;
 constexpr float kDecay = 0.9997697679981565f;

 // Saturation protector state. Defined outside of `SaturationProtectorImpl` to
 // implement check-point and restore ops.
 struct SaturationProtectorState {
   bool operator==(const SaturationProtectorState& s) const {
     return headroom_db == s.headroom_db &&
            peak_delay_buffer == s.peak_delay_buffer &&
            max_peaks_dbfs == s.max_peaks_dbfs &&
            time_since_push_ms == s.time_since_push_ms;
   }
   inline bool operator!=(const SaturationProtectorState& s) const {
     return !(*this == s);
   }

   float headroom_db;
   SaturationProtectorBuffer peak_delay_buffer;
   float max_peaks_dbfs;
   int time_since_push_ms;  // Time since the last ring buffer push operation.
 };

 // Resets the saturation protector state.
 void ResetSaturationProtectorState(float initial_headroom_db,
                                    SaturationProtectorState& state) {
   state.headroom_db = initial_headroom_db;
   state.peak_delay_buffer.Reset();
   state.max_peaks_dbfs = kMinLevelDbfs;
   state.time_since_push_ms = 0;
 }

 // Updates `state` by analyzing the estimated speech level `speech_level_dbfs`
 // and the peak level `peak_dbfs` for an observed frame. `state` must not be
 // modified without calling this function.
 void UpdateSaturationProtectorState(float peak_dbfs,
                                     float speech_level_dbfs,
                                     SaturationProtectorState& state) {
   // Get the max peak over `kPeakEnveloperSuperFrameLengthMs` ms.
   state.max_peaks_dbfs = std::max(state.max_peaks_dbfs, peak_dbfs);
   state.time_since_push_ms += kFrameDurationMs;
   if (SafeGt(state.time_since_push_ms, kPeakEnveloperSuperFrameLengthMs)) {
     // Push `max_peaks_dbfs` back into the ring buffer.
     state.peak_delay_buffer.PushBack(state.max_peaks_dbfs);
     // Reset.
     state.max_peaks_dbfs = kMinLevelDbfs;
     state.time_since_push_ms = 0;
   }

   // Update the headroom by comparing the estimated speech level and the delayed
   // max speech peak.
   const float delayed_peak_dbfs =
       state.peak_delay_buffer.Front().value_or(state.max_peaks_dbfs);
   const float difference_db = delayed_peak_dbfs - speech_level_dbfs;
   if (difference_db > state.headroom_db) {
     // Attack.
     state.headroom_db =
         state.headroom_db * kAttack + difference_db * (1.0f - kAttack);
   } else {
     // Decay.
     state.headroom_db =
         state.headroom_db * kDecay + difference_db * (1.0f - kDecay);
   }

   state.headroom_db =
       SafeClamp<float>(state.headroom_db, kMinMarginDb, kMaxMarginDb);
 }

 // Saturation protector which recommends a headroom based on the recent peaks.
 class SaturationProtectorImpl : public SaturationProtector {
  public:
   explicit SaturationProtectorImpl(float initial_headroom_db,
                                    int adjacent_speech_frames_threshold,
                                    ApmDataDumper* apm_data_dumper)
       : apm_data_dumper_(apm_data_dumper),
         initial_headroom_db_(initial_headroom_db),
         adjacent_speech_frames_threshold_(adjacent_speech_frames_threshold) {
     Reset();
   }
   SaturationProtectorImpl(const SaturationProtectorImpl&) = delete;
   SaturationProtectorImpl& operator=(const SaturationProtectorImpl&) = delete;
   ~SaturationProtectorImpl() override = default;

   float HeadroomDb() override { return headroom_db_; }

   void Analyze(float speech_probability,
                float peak_dbfs,
                float speech_level_dbfs) override {
     if (speech_probability < kVadConfidenceThreshold) {
       // Not a speech frame.
       if (adjacent_speech_frames_threshold_ > 1) {
         // When two or more adjacent speech frames are required in order to
         // update the state, we need to decide whether to discard or confirm the
         // updates based on the speech sequence length.
         if (num_adjacent_speech_frames_ >= adjacent_speech_frames_threshold_) {
           // First non-speech frame after a long enough sequence of speech
           // frames. Update the reliable state.
           reliable_state_ = preliminary_state_;
         } else if (num_adjacent_speech_frames_ > 0) {
           // First non-speech frame after a too short sequence of speech frames.
           // Reset to the last reliable state.
           preliminary_state_ = reliable_state_;
         }
       }
       num_adjacent_speech_frames_ = 0;
     } else {
       // Speech frame observed.
       num_adjacent_speech_frames_++;

       // Update preliminary level estimate.
       UpdateSaturationProtectorState(peak_dbfs, speech_level_dbfs,
                                      preliminary_state_);

       if (num_adjacent_speech_frames_ >= adjacent_speech_frames_threshold_) {
         // `preliminary_state_` is now reliable. Update the headroom.
         headroom_db_ = preliminary_state_.headroom_db;
       }
     }
     DumpDebugData();
   }

   void Reset() override {
     num_adjacent_speech_frames_ = 0;
     headroom_db_ = initial_headroom_db_;
     ResetSaturationProtectorState(initial_headroom_db_, preliminary_state_);
     ResetSaturationProtectorState(initial_headroom_db_, reliable_state_);
   }

  private:
   void DumpDebugData() {
     apm_data_dumper_->DumpRaw(
         "agc2_saturation_protector_preliminary_max_peak_dbfs",
         preliminary_state_.max_peaks_dbfs);
     apm_data_dumper_->DumpRaw(
         "agc2_saturation_protector_reliable_max_peak_dbfs",
         reliable_state_.max_peaks_dbfs);
   }

   ApmDataDumper* const apm_data_dumper_;
   const float initial_headroom_db_;
   const int adjacent_speech_frames_threshold_;
   int num_adjacent_speech_frames_;
   float headroom_db_;
   SaturationProtectorState preliminary_state_;
   SaturationProtectorState reliable_state_;
 };

 }  // namespace

 std::unique_ptr<SaturationProtector> CreateSaturationProtector(
     float initial_headroom_db,
     int adjacent_speech_frames_threshold,
     ApmDataDumper* apm_data_dumper) {
   return std::make_unique<SaturationProtectorImpl>(
       initial_headroom_db, adjacent_speech_frames_threshold, apm_data_dumper);
 }

 }  // namespace webrtc
	/*
	* Copyright (c) 2018 The WebRTC project authors. All Rights Reserved.
	*
	* Use of this source code is governed by a BSD-style license
	* that can be found in the LICENSE file in the root of the source
	* tree. An additional intellectual property rights grant can be found
	* in the file PATENTS. All contributing project authors may
	* be found in the AUTHORS file in the root of the source tree.
	*/

	#include "modules/audio_processing/agc2/saturation_protector.h"

	#include <algorithm>
	#include <memory>

	#include "modules/audio_processing/agc2/agc2_common.h"
	#include "modules/audio_processing/agc2/saturation_protector_buffer.h"
	#include "modules/audio_processing/logging/apm_data_dumper.h"
	#include "rtc_base/numerics/safe_compare.h"
	#include "rtc_base/numerics/safe_minmax.h"

	namespace webrtc {
	namespace {

	constexpr int kPeakEnveloperSuperFrameLengthMs = 400;
	constexpr float kMinMarginDb = 12.0f;
	constexpr float kMaxMarginDb = 25.0f;
	constexpr float kAttack = 0.9988493699365052f;
	constexpr float kDecay = 0.9997697679981565f;

	// Saturation protector state. Defined outside of `SaturationProtectorImpl` to
	// implement check-point and restore ops.
	struct SaturationProtectorState {
	bool operator==(const SaturationProtectorState& s) const {
	return headroom_db == s.headroom_db &&
	peak_delay_buffer == s.peak_delay_buffer &&
	max_peaks_dbfs == s.max_peaks_dbfs &&
	time_since_push_ms == s.time_since_push_ms;
	}
	inline bool operator!=(const SaturationProtectorState& s) const {
	return !(*this == s);
	}

	float headroom_db;
	SaturationProtectorBuffer peak_delay_buffer;
	float max_peaks_dbfs;
	int time_since_push_ms; // Time since the last ring buffer push operation.
	};

	// Resets the saturation protector state.
	void ResetSaturationProtectorState(float initial_headroom_db,
	SaturationProtectorState& state) {
	state.headroom_db = initial_headroom_db;
	state.peak_delay_buffer.Reset();
	state.max_peaks_dbfs = kMinLevelDbfs;
	state.time_since_push_ms = 0;
	}

	// Updates `state` by analyzing the estimated speech level `speech_level_dbfs`
	// and the peak level `peak_dbfs` for an observed frame. `state` must not be
	// modified without calling this function.
	void UpdateSaturationProtectorState(float peak_dbfs,
	float speech_level_dbfs,
	SaturationProtectorState& state) {
	// Get the max peak over `kPeakEnveloperSuperFrameLengthMs` ms.
	state.max_peaks_dbfs = std::max(state.max_peaks_dbfs, peak_dbfs);
	state.time_since_push_ms += kFrameDurationMs;
	if (SafeGt(state.time_since_push_ms, kPeakEnveloperSuperFrameLengthMs)) {
	// Push `max_peaks_dbfs` back into the ring buffer.
	state.peak_delay_buffer.PushBack(state.max_peaks_dbfs);
	// Reset.
	state.max_peaks_dbfs = kMinLevelDbfs;
	state.time_since_push_ms = 0;
	}

	// Update the headroom by comparing the estimated speech level and the delayed
	// max speech peak.
	const float delayed_peak_dbfs =
	state.peak_delay_buffer.Front().value_or(state.max_peaks_dbfs);
	const float difference_db = delayed_peak_dbfs - speech_level_dbfs;
	if (difference_db > state.headroom_db) {
	// Attack.
	state.headroom_db =
	state.headroom_db * kAttack + difference_db * (1.0f - kAttack);
	} else {
	// Decay.
	state.headroom_db =
	state.headroom_db * kDecay + difference_db * (1.0f - kDecay);
	}

	state.headroom_db =
	SafeClamp<float>(state.headroom_db, kMinMarginDb, kMaxMarginDb);
	}

	// Saturation protector which recommends a headroom based on the recent peaks.
	class SaturationProtectorImpl : public SaturationProtector {
	public:
	explicit SaturationProtectorImpl(float initial_headroom_db,
	int adjacent_speech_frames_threshold,
	ApmDataDumper* apm_data_dumper)
	: apm_data_dumper_(apm_data_dumper),
	initial_headroom_db_(initial_headroom_db),
	adjacent_speech_frames_threshold_(adjacent_speech_frames_threshold) {
	Reset();
	}
	SaturationProtectorImpl(const SaturationProtectorImpl&) = delete;
	SaturationProtectorImpl& operator=(const SaturationProtectorImpl&) = delete;
	~SaturationProtectorImpl() override = default;

	float HeadroomDb() override { return headroom_db_; }

	void Analyze(float speech_probability,
	float peak_dbfs,
	float speech_level_dbfs) override {
	if (speech_probability < kVadConfidenceThreshold) {
	// Not a speech frame.
	if (adjacent_speech_frames_threshold_ > 1) {
	// When two or more adjacent speech frames are required in order to
	// update the state, we need to decide whether to discard or confirm the
	// updates based on the speech sequence length.
	if (num_adjacent_speech_frames_ >= adjacent_speech_frames_threshold_) {
	// First non-speech frame after a long enough sequence of speech
	// frames. Update the reliable state.
	reliable_state_ = preliminary_state_;
	} else if (num_adjacent_speech_frames_ > 0) {
	// First non-speech frame after a too short sequence of speech frames.
	// Reset to the last reliable state.
	preliminary_state_ = reliable_state_;
	}
	}
	num_adjacent_speech_frames_ = 0;
	} else {
	// Speech frame observed.
	num_adjacent_speech_frames_++;

	// Update preliminary level estimate.
	UpdateSaturationProtectorState(peak_dbfs, speech_level_dbfs,
	preliminary_state_);

	if (num_adjacent_speech_frames_ >= adjacent_speech_frames_threshold_) {
	// `preliminary_state_` is now reliable. Update the headroom.
	headroom_db_ = preliminary_state_.headroom_db;
	}
	}
	DumpDebugData();
	}

	void Reset() override {
	num_adjacent_speech_frames_ = 0;
	headroom_db_ = initial_headroom_db_;
	ResetSaturationProtectorState(initial_headroom_db_, preliminary_state_);
	ResetSaturationProtectorState(initial_headroom_db_, reliable_state_);
	}

	private:
	void DumpDebugData() {
	apm_data_dumper_->DumpRaw(
	"agc2_saturation_protector_preliminary_max_peak_dbfs",
	preliminary_state_.max_peaks_dbfs);
	apm_data_dumper_->DumpRaw(
	"agc2_saturation_protector_reliable_max_peak_dbfs",
	reliable_state_.max_peaks_dbfs);
	}

	ApmDataDumper* const apm_data_dumper_;
	const float initial_headroom_db_;
	const int adjacent_speech_frames_threshold_;
	int num_adjacent_speech_frames_;
	float headroom_db_;
	SaturationProtectorState preliminary_state_;
	SaturationProtectorState reliable_state_;
	};

	} // namespace

	std::unique_ptr<SaturationProtector> CreateSaturationProtector(
	float initial_headroom_db,
	int adjacent_speech_frames_threshold,
	ApmDataDumper* apm_data_dumper) {
	return std::make_unique<SaturationProtectorImpl>(
	initial_headroom_db, adjacent_speech_frames_threshold, apm_data_dumper);
	}

	} // namespace webrtc