modules/audio_coding/neteq/background_noise.cc - src/webrtc - Git at Google

 /*
  *  Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
  *
  *  Use of this source code is governed by a BSD-style license
  *  that can be found in the LICENSE file in the root of the source
  *  tree. An additional intellectual property rights grant can be found
  *  in the file PATENTS.  All contributing project authors may
  *  be found in the AUTHORS file in the root of the source tree.
  */

 #include "webrtc/modules/audio_coding/neteq/background_noise.h"

 #include <assert.h>
 #include <string.h>  // memcpy

 #include <algorithm>  // min, max

 #include "webrtc/common_audio/signal_processing/include/signal_processing_library.h"
 #include "webrtc/modules/audio_coding/neteq/audio_multi_vector.h"
 #include "webrtc/modules/audio_coding/neteq/post_decode_vad.h"

 namespace webrtc {

 BackgroundNoise::BackgroundNoise(size_t num_channels)
     : num_channels_(num_channels),
       channel_parameters_(new ChannelParameters[num_channels_]),
       mode_(kBgnOn) {
   Reset();
 }

 BackgroundNoise::~BackgroundNoise() {}

 void BackgroundNoise::Reset() {
   initialized_ = false;
   for (size_t channel = 0; channel < num_channels_; ++channel) {
     channel_parameters_[channel].Reset();
   }
   // Keep _bgnMode as it is.
 }

 void BackgroundNoise::Update(const AudioMultiVector& input,
                              const PostDecodeVad& vad) {
   if (vad.running() && vad.active_speech()) {
     // Do not update the background noise parameters if we know that the signal
     // is active speech.
     return;
   }

   int32_t auto_correlation[kMaxLpcOrder + 1];
   int16_t fiter_output[kMaxLpcOrder + kResidualLength];
   int16_t reflection_coefficients[kMaxLpcOrder];
   int16_t lpc_coefficients[kMaxLpcOrder + 1];

   for (size_t channel_ix = 0; channel_ix < num_channels_; ++channel_ix) {
     ChannelParameters& parameters = channel_parameters_[channel_ix];
     int16_t temp_signal_array[kVecLen + kMaxLpcOrder] = {0};
     int16_t* temp_signal = &temp_signal_array[kMaxLpcOrder];
     memcpy(temp_signal,
            &input[channel_ix][input.Size() - kVecLen],
            sizeof(int16_t) * kVecLen);

     int32_t sample_energy = CalculateAutoCorrelation(temp_signal, kVecLen,
                                                      auto_correlation);

     if ((!vad.running() &&
         sample_energy < parameters.energy_update_threshold) ||
         (vad.running() && !vad.active_speech())) {
       // Generate LPC coefficients.
       if (auto_correlation[0] > 0) {
         // Regardless of whether the filter is actually updated or not,
         // update energy threshold levels, since we have in fact observed
         // a low energy signal.
         if (sample_energy < parameters.energy_update_threshold) {
           // Never go under 1.0 in average sample energy.
           parameters.energy_update_threshold = std::max(sample_energy, 1);
           parameters.low_energy_update_threshold = 0;
         }

         // Only update BGN if filter is stable, i.e., if return value from
         // Levinson-Durbin function is 1.
         if (WebRtcSpl_LevinsonDurbin(auto_correlation, lpc_coefficients,
                                      reflection_coefficients,
                                      kMaxLpcOrder) != 1) {
           return;
         }
       } else {
         // Center value in auto-correlation is not positive. Do not update.
         return;
       }

       // Generate the CNG gain factor by looking at the energy of the residual.
       WebRtcSpl_FilterMAFastQ12(temp_signal + kVecLen - kResidualLength,
                                 fiter_output, lpc_coefficients,
                                 kMaxLpcOrder + 1, kResidualLength);
       int32_t residual_energy = WebRtcSpl_DotProductWithScale(fiter_output,
                                                               fiter_output,
                                                               kResidualLength,
                                                               0);

       // Check spectral flatness.
       // Comparing the residual variance with the input signal variance tells
       // if the spectrum is flat or not.
       // If 20 * residual_energy >= sample_energy << 6, the spectrum is flat
       // enough.  Also ensure that the energy is non-zero.
       if ((residual_energy * 20 >= (sample_energy << 6)) &&
           (sample_energy > 0)) {
         // Spectrum is flat enough; save filter parameters.
         // |temp_signal| + |kVecLen| - |kMaxLpcOrder| points at the first of the
         // |kMaxLpcOrder| samples in the residual signal, which will form the
         // filter state for the next noise generation.
         SaveParameters(channel_ix, lpc_coefficients,
                        temp_signal + kVecLen - kMaxLpcOrder, sample_energy,
                        residual_energy);
       }
     } else {
       // Will only happen if post-decode VAD is disabled and |sample_energy| is
       // not low enough. Increase the threshold for update so that it increases
       // by a factor 4 in 4 seconds.
       IncrementEnergyThreshold(channel_ix, sample_energy);
     }
   }
   return;
 }

 int32_t BackgroundNoise::Energy(size_t channel) const {
   assert(channel < num_channels_);
   return channel_parameters_[channel].energy;
 }

 void BackgroundNoise::SetMuteFactor(size_t channel, int16_t value) {
   assert(channel < num_channels_);
   channel_parameters_[channel].mute_factor = value;
 }

 int16_t BackgroundNoise::MuteFactor(size_t channel) const {
   assert(channel < num_channels_);
   return channel_parameters_[channel].mute_factor;
 }

 const int16_t* BackgroundNoise::Filter(size_t channel) const {
   assert(channel < num_channels_);
   return channel_parameters_[channel].filter;
 }

 const int16_t* BackgroundNoise::FilterState(size_t channel) const {
   assert(channel < num_channels_);
   return channel_parameters_[channel].filter_state;
 }

 void BackgroundNoise::SetFilterState(size_t channel, const int16_t* input,
                                      size_t length) {
   assert(channel < num_channels_);
   length = std::min(length, static_cast<size_t>(kMaxLpcOrder));
   memcpy(channel_parameters_[channel].filter_state, input,
          length * sizeof(int16_t));
 }

 int16_t BackgroundNoise::Scale(size_t channel) const {
   assert(channel < num_channels_);
   return channel_parameters_[channel].scale;
 }
 int16_t BackgroundNoise::ScaleShift(size_t channel) const {
   assert(channel < num_channels_);
   return channel_parameters_[channel].scale_shift;
 }

 int32_t BackgroundNoise::CalculateAutoCorrelation(
     const int16_t* signal, int length, int32_t* auto_correlation) const {
   int16_t signal_max = WebRtcSpl_MaxAbsValueW16(signal, length);
   int correlation_scale = kLogVecLen -
       WebRtcSpl_NormW32(signal_max * signal_max);
   correlation_scale = std::max(0, correlation_scale);

   static const int kCorrelationStep = -1;
   WebRtcSpl_CrossCorrelation(auto_correlation, signal, signal, length,
                              kMaxLpcOrder + 1, correlation_scale,
                              kCorrelationStep);

   // Number of shifts to normalize energy to energy/sample.
   int energy_sample_shift = kLogVecLen - correlation_scale;
   return auto_correlation[0] >> energy_sample_shift;
 }

 void BackgroundNoise::IncrementEnergyThreshold(size_t channel,
                                                int32_t sample_energy) {
   // TODO(hlundin): Simplify the below threshold update. What this code
   // does is simply "threshold += (increment * threshold) >> 16", but due
   // to the limited-width operations, it is not exactly the same. The
   // difference should be inaudible, but bit-exactness would not be
   // maintained.
   assert(channel < num_channels_);
   ChannelParameters& parameters = channel_parameters_[channel];
   int32_t temp_energy =
       WEBRTC_SPL_MUL_16_16_RSFT(kThresholdIncrement,
                                 parameters.low_energy_update_threshold, 16);
   temp_energy += kThresholdIncrement *
       (parameters.energy_update_threshold & 0xFF);
   temp_energy += (kThresholdIncrement *
       ((parameters.energy_update_threshold>>8) & 0xFF)) << 8;
   parameters.low_energy_update_threshold += temp_energy;

   parameters.energy_update_threshold += kThresholdIncrement *
       (parameters.energy_update_threshold>>16);
   parameters.energy_update_threshold +=
       parameters.low_energy_update_threshold >> 16;
   parameters.low_energy_update_threshold =
       parameters.low_energy_update_threshold & 0x0FFFF;

   // Update maximum energy.
   // Decrease by a factor 1/1024 each time.
   parameters.max_energy = parameters.max_energy -
       (parameters.max_energy >> 10);
   if (sample_energy > parameters.max_energy) {
     parameters.max_energy = sample_energy;
   }

   // Set |energy_update_threshold| to no less than 60 dB lower than
   // |max_energy_|. Adding 524288 assures proper rounding.
   int32_t energy_update_threshold = (parameters.max_energy + 524288) >> 20;
   if (energy_update_threshold > parameters.energy_update_threshold) {
     parameters.energy_update_threshold = energy_update_threshold;
   }
 }

 void BackgroundNoise::SaveParameters(size_t channel,
                                      const int16_t* lpc_coefficients,
                                      const int16_t* filter_state,
                                      int32_t sample_energy,
                                      int32_t residual_energy) {
   assert(channel < num_channels_);
   ChannelParameters& parameters = channel_parameters_[channel];
   memcpy(parameters.filter, lpc_coefficients,
          (kMaxLpcOrder+1) * sizeof(int16_t));
   memcpy(parameters.filter_state, filter_state,
          kMaxLpcOrder * sizeof(int16_t));
   // Save energy level and update energy threshold levels.
   // Never get under 1.0 in average sample energy.
   parameters.energy = std::max(sample_energy, 1);
   parameters.energy_update_threshold = parameters.energy;
   parameters.low_energy_update_threshold = 0;

   // Normalize residual_energy to 29 or 30 bits before sqrt.
   int norm_shift = WebRtcSpl_NormW32(residual_energy) - 1;
   if (norm_shift & 0x1) {
     norm_shift -= 1;  // Even number of shifts required.
   }
   assert(norm_shift >= 0);  // Should always be positive.
   residual_energy = residual_energy << norm_shift;

   // Calculate scale and shift factor.
   parameters.scale = WebRtcSpl_SqrtFloor(residual_energy);
   // Add 13 to the |scale_shift_|, since the random numbers table is in
   // Q13.
   // TODO(hlundin): Move the "13" to where the |scale_shift_| is used?
   parameters.scale_shift = 13 + ((kLogResidualLength + norm_shift) / 2);

   initialized_ = true;
 }

 }  // namespace webrtc
	/*
	* Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
	*
	* Use of this source code is governed by a BSD-style license
	* that can be found in the LICENSE file in the root of the source
	* tree. An additional intellectual property rights grant can be found
	* in the file PATENTS. All contributing project authors may
	* be found in the AUTHORS file in the root of the source tree.
	*/

	#include "webrtc/modules/audio_coding/neteq/background_noise.h"

	#include <assert.h>
	#include <string.h> // memcpy

	#include <algorithm> // min, max

	#include "webrtc/common_audio/signal_processing/include/signal_processing_library.h"
	#include "webrtc/modules/audio_coding/neteq/audio_multi_vector.h"
	#include "webrtc/modules/audio_coding/neteq/post_decode_vad.h"

	namespace webrtc {

	BackgroundNoise::BackgroundNoise(size_t num_channels)
	: num_channels_(num_channels),
	channel_parameters_(new ChannelParameters[num_channels_]),
	mode_(kBgnOn) {
	Reset();
	}

	BackgroundNoise::~BackgroundNoise() {}

	void BackgroundNoise::Reset() {
	initialized_ = false;
	for (size_t channel = 0; channel < num_channels_; ++channel) {
	channel_parameters_[channel].Reset();
	}
	// Keep _bgnMode as it is.
	}

	void BackgroundNoise::Update(const AudioMultiVector& input,
	const PostDecodeVad& vad) {
	if (vad.running() && vad.active_speech()) {
	// Do not update the background noise parameters if we know that the signal
	// is active speech.
	return;
	}

	int32_t auto_correlation[kMaxLpcOrder + 1];
	int16_t fiter_output[kMaxLpcOrder + kResidualLength];
	int16_t reflection_coefficients[kMaxLpcOrder];
	int16_t lpc_coefficients[kMaxLpcOrder + 1];

	for (size_t channel_ix = 0; channel_ix < num_channels_; ++channel_ix) {
	ChannelParameters& parameters = channel_parameters_[channel_ix];
	int16_t temp_signal_array[kVecLen + kMaxLpcOrder] = {0};
	int16_t* temp_signal = &temp_signal_array[kMaxLpcOrder];
	memcpy(temp_signal,
	&input[channel_ix][input.Size() - kVecLen],
	sizeof(int16_t) * kVecLen);

	int32_t sample_energy = CalculateAutoCorrelation(temp_signal, kVecLen,
	auto_correlation);

	if ((!vad.running() &&
	sample_energy < parameters.energy_update_threshold) \|\|
	(vad.running() && !vad.active_speech())) {
	// Generate LPC coefficients.
	if (auto_correlation[0] > 0) {
	// Regardless of whether the filter is actually updated or not,
	// update energy threshold levels, since we have in fact observed
	// a low energy signal.
	if (sample_energy < parameters.energy_update_threshold) {
	// Never go under 1.0 in average sample energy.
	parameters.energy_update_threshold = std::max(sample_energy, 1);
	parameters.low_energy_update_threshold = 0;
	}

	// Only update BGN if filter is stable, i.e., if return value from
	// Levinson-Durbin function is 1.
	if (WebRtcSpl_LevinsonDurbin(auto_correlation, lpc_coefficients,
	reflection_coefficients,
	kMaxLpcOrder) != 1) {
	return;
	}
	} else {
	// Center value in auto-correlation is not positive. Do not update.
	return;
	}

	// Generate the CNG gain factor by looking at the energy of the residual.
	WebRtcSpl_FilterMAFastQ12(temp_signal + kVecLen - kResidualLength,
	fiter_output, lpc_coefficients,
	kMaxLpcOrder + 1, kResidualLength);
	int32_t residual_energy = WebRtcSpl_DotProductWithScale(fiter_output,
	fiter_output,
	kResidualLength,
	0);

	// Check spectral flatness.
	// Comparing the residual variance with the input signal variance tells
	// if the spectrum is flat or not.
	// If 20 * residual_energy >= sample_energy << 6, the spectrum is flat
	// enough. Also ensure that the energy is non-zero.
	if ((residual_energy * 20 >= (sample_energy << 6)) &&
	(sample_energy > 0)) {
	// Spectrum is flat enough; save filter parameters.
	// \|temp_signal\| + \|kVecLen\| - \|kMaxLpcOrder\| points at the first of the
	// \|kMaxLpcOrder\| samples in the residual signal, which will form the
	// filter state for the next noise generation.
	SaveParameters(channel_ix, lpc_coefficients,
	temp_signal + kVecLen - kMaxLpcOrder, sample_energy,
	residual_energy);
	}
	} else {
	// Will only happen if post-decode VAD is disabled and \|sample_energy\| is
	// not low enough. Increase the threshold for update so that it increases
	// by a factor 4 in 4 seconds.
	IncrementEnergyThreshold(channel_ix, sample_energy);
	}
	}
	return;
	}

	int32_t BackgroundNoise::Energy(size_t channel) const {
	assert(channel < num_channels_);
	return channel_parameters_[channel].energy;
	}

	void BackgroundNoise::SetMuteFactor(size_t channel, int16_t value) {
	assert(channel < num_channels_);
	channel_parameters_[channel].mute_factor = value;
	}

	int16_t BackgroundNoise::MuteFactor(size_t channel) const {
	assert(channel < num_channels_);
	return channel_parameters_[channel].mute_factor;
	}

	const int16_t* BackgroundNoise::Filter(size_t channel) const {
	assert(channel < num_channels_);
	return channel_parameters_[channel].filter;
	}

	const int16_t* BackgroundNoise::FilterState(size_t channel) const {
	assert(channel < num_channels_);
	return channel_parameters_[channel].filter_state;
	}

	void BackgroundNoise::SetFilterState(size_t channel, const int16_t* input,
	size_t length) {
	assert(channel < num_channels_);
	length = std::min(length, static_cast<size_t>(kMaxLpcOrder));
	memcpy(channel_parameters_[channel].filter_state, input,
	length * sizeof(int16_t));
	}

	int16_t BackgroundNoise::Scale(size_t channel) const {
	assert(channel < num_channels_);
	return channel_parameters_[channel].scale;
	}
	int16_t BackgroundNoise::ScaleShift(size_t channel) const {
	assert(channel < num_channels_);
	return channel_parameters_[channel].scale_shift;
	}

	int32_t BackgroundNoise::CalculateAutoCorrelation(
	const int16_t* signal, int length, int32_t* auto_correlation) const {
	int16_t signal_max = WebRtcSpl_MaxAbsValueW16(signal, length);
	int correlation_scale = kLogVecLen -
	WebRtcSpl_NormW32(signal_max * signal_max);
	correlation_scale = std::max(0, correlation_scale);

	static const int kCorrelationStep = -1;
	WebRtcSpl_CrossCorrelation(auto_correlation, signal, signal, length,
	kMaxLpcOrder + 1, correlation_scale,
	kCorrelationStep);

	// Number of shifts to normalize energy to energy/sample.
	int energy_sample_shift = kLogVecLen - correlation_scale;
	return auto_correlation[0] >> energy_sample_shift;
	}

	void BackgroundNoise::IncrementEnergyThreshold(size_t channel,
	int32_t sample_energy) {
	// TODO(hlundin): Simplify the below threshold update. What this code
	// does is simply "threshold += (increment * threshold) >> 16", but due
	// to the limited-width operations, it is not exactly the same. The
	// difference should be inaudible, but bit-exactness would not be
	// maintained.
	assert(channel < num_channels_);
	ChannelParameters& parameters = channel_parameters_[channel];
	int32_t temp_energy =
	WEBRTC_SPL_MUL_16_16_RSFT(kThresholdIncrement,
	parameters.low_energy_update_threshold, 16);
	temp_energy += kThresholdIncrement *
	(parameters.energy_update_threshold & 0xFF);
	temp_energy += (kThresholdIncrement *
	((parameters.energy_update_threshold>>8) & 0xFF)) << 8;
	parameters.low_energy_update_threshold += temp_energy;

	parameters.energy_update_threshold += kThresholdIncrement *
	(parameters.energy_update_threshold>>16);
	parameters.energy_update_threshold +=
	parameters.low_energy_update_threshold >> 16;
	parameters.low_energy_update_threshold =
	parameters.low_energy_update_threshold & 0x0FFFF;

	// Update maximum energy.
	// Decrease by a factor 1/1024 each time.
	parameters.max_energy = parameters.max_energy -
	(parameters.max_energy >> 10);
	if (sample_energy > parameters.max_energy) {
	parameters.max_energy = sample_energy;
	}

	// Set \|energy_update_threshold\| to no less than 60 dB lower than
	// \|max_energy_\|. Adding 524288 assures proper rounding.
	int32_t energy_update_threshold = (parameters.max_energy + 524288) >> 20;
	if (energy_update_threshold > parameters.energy_update_threshold) {
	parameters.energy_update_threshold = energy_update_threshold;
	}
	}

	void BackgroundNoise::SaveParameters(size_t channel,
	const int16_t* lpc_coefficients,
	const int16_t* filter_state,
	int32_t sample_energy,
	int32_t residual_energy) {
	assert(channel < num_channels_);
	ChannelParameters& parameters = channel_parameters_[channel];
	memcpy(parameters.filter, lpc_coefficients,
	(kMaxLpcOrder+1) * sizeof(int16_t));
	memcpy(parameters.filter_state, filter_state,
	kMaxLpcOrder * sizeof(int16_t));
	// Save energy level and update energy threshold levels.
	// Never get under 1.0 in average sample energy.
	parameters.energy = std::max(sample_energy, 1);
	parameters.energy_update_threshold = parameters.energy;
	parameters.low_energy_update_threshold = 0;

	// Normalize residual_energy to 29 or 30 bits before sqrt.
	int norm_shift = WebRtcSpl_NormW32(residual_energy) - 1;
	if (norm_shift & 0x1) {
	norm_shift -= 1; // Even number of shifts required.
	}
	assert(norm_shift >= 0); // Should always be positive.
	residual_energy = residual_energy << norm_shift;

	// Calculate scale and shift factor.
	parameters.scale = WebRtcSpl_SqrtFloor(residual_energy);
	// Add 13 to the \|scale_shift_\|, since the random numbers table is in
	// Q13.
	// TODO(hlundin): Move the "13" to where the \|scale_shift_\| is used?
	parameters.scale_shift = 13 + ((kLogResidualLength + norm_shift) / 2);

	initialized_ = true;
	}

	} // namespace webrtc