modules/audio_processing/ns/speech_probability_estimator.cc - src - Git at Google

 /*
  *  Copyright (c) 2019 The WebRTC project authors. All Rights Reserved.
  *
  *  Use of this source code is governed by a BSD-style license
  *  that can be found in the LICENSE file in the root of the source
  *  tree. An additional intellectual property rights grant can be found
  *  in the file PATENTS.  All contributing project authors may
  *  be found in the AUTHORS file in the root of the source tree.
  */

 #include "modules/audio_processing/ns/speech_probability_estimator.h"

 #include <math.h>
 #include <algorithm>

 #include "modules/audio_processing/ns/fast_math.h"
 #include "rtc_base/checks.h"

 namespace webrtc {

 SpeechProbabilityEstimator::SpeechProbabilityEstimator() {
   speech_probability_.fill(0.f);
 }

 void SpeechProbabilityEstimator::Update(
     int32_t num_analyzed_frames,
     rtc::ArrayView<const float, kFftSizeBy2Plus1> prior_snr,
     rtc::ArrayView<const float, kFftSizeBy2Plus1> post_snr,
     rtc::ArrayView<const float, kFftSizeBy2Plus1> conservative_noise_spectrum,
     rtc::ArrayView<const float, kFftSizeBy2Plus1> signal_spectrum,
     float signal_spectral_sum,
     float signal_energy) {
   // Update models.
   if (num_analyzed_frames < kLongStartupPhaseBlocks) {
     signal_model_estimator_.AdjustNormalization(num_analyzed_frames,
                                                 signal_energy);
   }
   signal_model_estimator_.Update(prior_snr, post_snr,
                                  conservative_noise_spectrum, signal_spectrum,
                                  signal_spectral_sum, signal_energy);

   const SignalModel& model = signal_model_estimator_.get_model();
   const PriorSignalModel& prior_model =
       signal_model_estimator_.get_prior_model();

   // Width parameter in sigmoid map for prior model.
   constexpr float kWidthPrior0 = 4.f;
   // Width for pause region: lower range, so increase width in tanh map.
   constexpr float kWidthPrior1 = 2.f * kWidthPrior0;

   // Average LRT feature: use larger width in tanh map for pause regions.
   float width_prior = model.lrt < prior_model.lrt ? kWidthPrior1 : kWidthPrior0;

   // Compute indicator function: sigmoid map.
   float indicator0 =
       0.5f * (tanh(width_prior * (model.lrt - prior_model.lrt)) + 1.f);

   // Spectral flatness feature: use larger width in tanh map for pause regions.
   width_prior = model.spectral_flatness > prior_model.flatness_threshold
                     ? kWidthPrior1
                     : kWidthPrior0;

   // Compute indicator function: sigmoid map.
   float indicator1 =
       0.5f * (tanh(1.f * width_prior *
                    (prior_model.flatness_threshold - model.spectral_flatness)) +
               1.f);

   // For template spectrum-difference : use larger width in tanh map for pause
   // regions.
   width_prior = model.spectral_diff < prior_model.template_diff_threshold
                     ? kWidthPrior1
                     : kWidthPrior0;

   // Compute indicator function: sigmoid map.
   float indicator2 =
       0.5f * (tanh(width_prior * (model.spectral_diff -
                                   prior_model.template_diff_threshold)) +
               1.f);

   // Combine the indicator function with the feature weights.
   float ind_prior = prior_model.lrt_weighting * indicator0 +
                     prior_model.flatness_weighting * indicator1 +
                     prior_model.difference_weighting * indicator2;

   // Compute the prior probability.
   prior_speech_prob_ += 0.1f * (ind_prior - prior_speech_prob_);

   // Make sure probabilities are within range: keep floor to 0.01.
   prior_speech_prob_ = std::max(std::min(prior_speech_prob_, 1.f), 0.01f);

   // Final speech probability: combine prior model with LR factor:.
   float gain_prior =
       (1.f - prior_speech_prob_) / (prior_speech_prob_ + 0.0001f);

   std::array<float, kFftSizeBy2Plus1> inv_lrt;
   ExpApproximationSignFlip(model.avg_log_lrt, inv_lrt);
   for (size_t i = 0; i < kFftSizeBy2Plus1; ++i) {
     speech_probability_[i] = 1.f / (1.f + gain_prior * inv_lrt[i]);
   }
 }

 }  // namespace webrtc
	/*
	* Copyright (c) 2019 The WebRTC project authors. All Rights Reserved.
	*
	* Use of this source code is governed by a BSD-style license
	* that can be found in the LICENSE file in the root of the source
	* tree. An additional intellectual property rights grant can be found
	* in the file PATENTS. All contributing project authors may
	* be found in the AUTHORS file in the root of the source tree.
	*/

	#include "modules/audio_processing/ns/speech_probability_estimator.h"

	#include <math.h>
	#include <algorithm>

	#include "modules/audio_processing/ns/fast_math.h"
	#include "rtc_base/checks.h"

	namespace webrtc {

	SpeechProbabilityEstimator::SpeechProbabilityEstimator() {
	speech_probability_.fill(0.f);
	}

	void SpeechProbabilityEstimator::Update(
	int32_t num_analyzed_frames,
	rtc::ArrayView<const float, kFftSizeBy2Plus1> prior_snr,
	rtc::ArrayView<const float, kFftSizeBy2Plus1> post_snr,
	rtc::ArrayView<const float, kFftSizeBy2Plus1> conservative_noise_spectrum,
	rtc::ArrayView<const float, kFftSizeBy2Plus1> signal_spectrum,
	float signal_spectral_sum,
	float signal_energy) {
	// Update models.
	if (num_analyzed_frames < kLongStartupPhaseBlocks) {
	signal_model_estimator_.AdjustNormalization(num_analyzed_frames,
	signal_energy);
	}
	signal_model_estimator_.Update(prior_snr, post_snr,
	conservative_noise_spectrum, signal_spectrum,
	signal_spectral_sum, signal_energy);

	const SignalModel& model = signal_model_estimator_.get_model();
	const PriorSignalModel& prior_model =
	signal_model_estimator_.get_prior_model();

	// Width parameter in sigmoid map for prior model.
	constexpr float kWidthPrior0 = 4.f;
	// Width for pause region: lower range, so increase width in tanh map.
	constexpr float kWidthPrior1 = 2.f * kWidthPrior0;

	// Average LRT feature: use larger width in tanh map for pause regions.
	float width_prior = model.lrt < prior_model.lrt ? kWidthPrior1 : kWidthPrior0;

	// Compute indicator function: sigmoid map.
	float indicator0 =
	0.5f * (tanh(width_prior * (model.lrt - prior_model.lrt)) + 1.f);

	// Spectral flatness feature: use larger width in tanh map for pause regions.
	width_prior = model.spectral_flatness > prior_model.flatness_threshold
	? kWidthPrior1
	: kWidthPrior0;

	// Compute indicator function: sigmoid map.
	float indicator1 =
	0.5f * (tanh(1.f * width_prior *
	(prior_model.flatness_threshold - model.spectral_flatness)) +
	1.f);

	// For template spectrum-difference : use larger width in tanh map for pause
	// regions.
	width_prior = model.spectral_diff < prior_model.template_diff_threshold
	? kWidthPrior1
	: kWidthPrior0;

	// Compute indicator function: sigmoid map.
	float indicator2 =
	0.5f * (tanh(width_prior * (model.spectral_diff -
	prior_model.template_diff_threshold)) +
	1.f);

	// Combine the indicator function with the feature weights.
	float ind_prior = prior_model.lrt_weighting * indicator0 +
	prior_model.flatness_weighting * indicator1 +
	prior_model.difference_weighting * indicator2;

	// Compute the prior probability.
	prior_speech_prob_ += 0.1f * (ind_prior - prior_speech_prob_);

	// Make sure probabilities are within range: keep floor to 0.01.
	prior_speech_prob_ = std::max(std::min(prior_speech_prob_, 1.f), 0.01f);

	// Final speech probability: combine prior model with LR factor:.
	float gain_prior =
	(1.f - prior_speech_prob_) / (prior_speech_prob_ + 0.0001f);

	std::array<float, kFftSizeBy2Plus1> inv_lrt;
	ExpApproximationSignFlip(model.avg_log_lrt, inv_lrt);
	for (size_t i = 0; i < kFftSizeBy2Plus1; ++i) {
	speech_probability_[i] = 1.f / (1.f + gain_prior * inv_lrt[i]);
	}
	}

	} // namespace webrtc