modules/audio_processing/aec3/comfort_noise_generator.cc - src - Git at Google

 /*
  *  Copyright (c) 2017 The WebRTC project authors. All Rights Reserved.
  *
  *  Use of this source code is governed by a BSD-style license
  *  that can be found in the LICENSE file in the root of the source
  *  tree. An additional intellectual property rights grant can be found
  *  in the file PATENTS.  All contributing project authors may
  *  be found in the AUTHORS file in the root of the source tree.
  */

 #include "modules/audio_processing/aec3/comfort_noise_generator.h"

 // Defines WEBRTC_ARCH_X86_FAMILY, used below.
 #include "rtc_base/system/arch.h"

 #if defined(WEBRTC_ARCH_X86_FAMILY)
 #include <emmintrin.h>
 #endif
 #include <algorithm>
 #include <array>
 #include <cmath>
 #include <cstdint>
 #include <functional>
 #include <numeric>

 #include "common_audio/signal_processing/include/signal_processing_library.h"
 #include "modules/audio_processing/aec3/vector_math.h"
 #include "rtc_base/checks.h"

 namespace webrtc {

 namespace {

 // Computes the noise floor value that matches a WGN input of noise_floor_dbfs.
 float GetNoiseFloorFactor(float noise_floor_dbfs) {
   // kdBfsNormalization = 20.f*log10(32768.f).
   constexpr float kdBfsNormalization = 90.30899869919436f;
   return 64.f * powf(10.f, (kdBfsNormalization + noise_floor_dbfs) * 0.1f);
 }

 // Table of sqrt(2) * sin(2*pi*i/32).
 constexpr float kSqrt2Sin[32] = {
     +0.0000000f, +0.2758994f, +0.5411961f, +0.7856950f, +1.0000000f,
     +1.1758756f, +1.3065630f, +1.3870398f, +1.4142136f, +1.3870398f,
     +1.3065630f, +1.1758756f, +1.0000000f, +0.7856950f, +0.5411961f,
     +0.2758994f, +0.0000000f, -0.2758994f, -0.5411961f, -0.7856950f,
     -1.0000000f, -1.1758756f, -1.3065630f, -1.3870398f, -1.4142136f,
     -1.3870398f, -1.3065630f, -1.1758756f, -1.0000000f, -0.7856950f,
     -0.5411961f, -0.2758994f};

 void GenerateComfortNoise(Aec3Optimization optimization,
                           const std::array<float, kFftLengthBy2Plus1>& N2,
                           uint32_t* seed,
                           FftData* lower_band_noise,
                           FftData* upper_band_noise) {
   FftData* N_low = lower_band_noise;
   FftData* N_high = upper_band_noise;

   // Compute square root spectrum.
   std::array<float, kFftLengthBy2Plus1> N;
   std::copy(N2.begin(), N2.end(), N.begin());
   aec3::VectorMath(optimization).Sqrt(N);

   // Compute the noise level for the upper bands.
   constexpr float kOneByNumBands = 1.f / (kFftLengthBy2Plus1 / 2 + 1);
   constexpr int kFftLengthBy2Plus1By2 = kFftLengthBy2Plus1 / 2;
   const float high_band_noise_level =
       std::accumulate(N.begin() + kFftLengthBy2Plus1By2, N.end(), 0.f) *
       kOneByNumBands;

   // The analysis and synthesis windowing cause loss of power when
   // cross-fading the noise where frames are completely uncorrelated
   // (generated with random phase), hence the factor sqrt(2).
   // This is not the case for the speech signal where the input is overlapping
   // (strong correlation).
   N_low->re[0] = N_low->re[kFftLengthBy2] = N_high->re[0] =
       N_high->re[kFftLengthBy2] = 0.f;
   for (size_t k = 1; k < kFftLengthBy2; k++) {
     constexpr int kIndexMask = 32 - 1;
     // Generate a random 31-bit integer.
     seed[0] = (seed[0] * 69069 + 1) & (0x80000000 - 1);
     // Convert to a 5-bit index.
     int i = seed[0] >> 26;

     // y = sqrt(2) * sin(a)
     const float x = kSqrt2Sin[i];
     // x = sqrt(2) * cos(a) = sqrt(2) * sin(a + pi/2)
     const float y = kSqrt2Sin[(i + 8) & kIndexMask];

     // Form low-frequency noise via spectral shaping.
     N_low->re[k] = N[k] * x;
     N_low->im[k] = N[k] * y;

     // Form the high-frequency noise via simple levelling.
     N_high->re[k] = high_band_noise_level * x;
     N_high->im[k] = high_band_noise_level * y;
   }
 }

 }  // namespace

 ComfortNoiseGenerator::ComfortNoiseGenerator(const EchoCanceller3Config& config,
                                              Aec3Optimization optimization,
                                              size_t num_capture_channels)
     : optimization_(optimization),
       seed_(42),
       num_capture_channels_(num_capture_channels),
       noise_floor_(GetNoiseFloorFactor(config.comfort_noise.noise_floor_dbfs)),
       N2_initial_(
           std::make_unique<std::vector<std::array<float, kFftLengthBy2Plus1>>>(
               num_capture_channels_)),
       Y2_smoothed_(num_capture_channels_),
       N2_(num_capture_channels_) {
   for (size_t ch = 0; ch < num_capture_channels_; ++ch) {
     (*N2_initial_)[ch].fill(0.f);
     Y2_smoothed_[ch].fill(0.f);
     N2_[ch].fill(1.0e6f);
   }
 }

 ComfortNoiseGenerator::~ComfortNoiseGenerator() = default;

 void ComfortNoiseGenerator::Compute(
     bool saturated_capture,
     rtc::ArrayView<const std::array<float, kFftLengthBy2Plus1>>
         capture_spectrum,
     rtc::ArrayView<FftData> lower_band_noise,
     rtc::ArrayView<FftData> upper_band_noise) {
   const auto& Y2 = capture_spectrum;

   if (!saturated_capture) {
     // Smooth Y2.
     for (size_t ch = 0; ch < num_capture_channels_; ++ch) {
       std::transform(Y2_smoothed_[ch].begin(), Y2_smoothed_[ch].end(),
                      Y2[ch].begin(), Y2_smoothed_[ch].begin(),
                      [](float a, float b) { return a + 0.1f * (b - a); });
     }

     if (N2_counter_ > 50) {
       // Update N2 from Y2_smoothed.
       for (size_t ch = 0; ch < num_capture_channels_; ++ch) {
         std::transform(N2_[ch].begin(), N2_[ch].end(), Y2_smoothed_[ch].begin(),
                        N2_[ch].begin(), [](float a, float b) {
                          return b < a ? (0.9f * b + 0.1f * a) * 1.0002f
                                       : a * 1.0002f;
                        });
       }
     }

     if (N2_initial_) {
       if (++N2_counter_ == 1000) {
         N2_initial_.reset();
       } else {
         // Compute the N2_initial from N2.
         for (size_t ch = 0; ch < num_capture_channels_; ++ch) {
           std::transform(N2_[ch].begin(), N2_[ch].end(),
                          (*N2_initial_)[ch].begin(), (*N2_initial_)[ch].begin(),
                          [](float a, float b) {
                            return a > b ? b + 0.001f * (a - b) : a;
                          });
         }
       }
     }

     for (size_t ch = 0; ch < num_capture_channels_; ++ch) {
       for (auto& n : N2_[ch]) {
         n = std::max(n, noise_floor_);
       }
       if (N2_initial_) {
         for (auto& n : (*N2_initial_)[ch]) {
           n = std::max(n, noise_floor_);
         }
       }
     }
   }

   // Choose N2 estimate to use.
   const auto& N2 = N2_initial_ ? (*N2_initial_) : N2_;

   for (size_t ch = 0; ch < num_capture_channels_; ++ch) {
     GenerateComfortNoise(optimization_, N2[ch], &seed_, &lower_band_noise[ch],
                          &upper_band_noise[ch]);
   }
 }

 }  // namespace webrtc
	/*
	* Copyright (c) 2017 The WebRTC project authors. All Rights Reserved.
	*
	* Use of this source code is governed by a BSD-style license
	* that can be found in the LICENSE file in the root of the source
	* tree. An additional intellectual property rights grant can be found
	* in the file PATENTS. All contributing project authors may
	* be found in the AUTHORS file in the root of the source tree.
	*/

	#include "modules/audio_processing/aec3/comfort_noise_generator.h"

	// Defines WEBRTC_ARCH_X86_FAMILY, used below.
	#include "rtc_base/system/arch.h"

	#if defined(WEBRTC_ARCH_X86_FAMILY)
	#include <emmintrin.h>
	#endif
	#include <algorithm>
	#include <array>
	#include <cmath>
	#include <cstdint>
	#include <functional>
	#include <numeric>

	#include "common_audio/signal_processing/include/signal_processing_library.h"
	#include "modules/audio_processing/aec3/vector_math.h"
	#include "rtc_base/checks.h"

	namespace webrtc {

	namespace {

	// Computes the noise floor value that matches a WGN input of noise_floor_dbfs.
	float GetNoiseFloorFactor(float noise_floor_dbfs) {
	// kdBfsNormalization = 20.f*log10(32768.f).
	constexpr float kdBfsNormalization = 90.30899869919436f;
	return 64.f * powf(10.f, (kdBfsNormalization + noise_floor_dbfs) * 0.1f);
	}

	// Table of sqrt(2) * sin(2pii/32).
	constexpr float kSqrt2Sin[32] = {
	+0.0000000f, +0.2758994f, +0.5411961f, +0.7856950f, +1.0000000f,
	+1.1758756f, +1.3065630f, +1.3870398f, +1.4142136f, +1.3870398f,
	+1.3065630f, +1.1758756f, +1.0000000f, +0.7856950f, +0.5411961f,
	+0.2758994f, +0.0000000f, -0.2758994f, -0.5411961f, -0.7856950f,
	-1.0000000f, -1.1758756f, -1.3065630f, -1.3870398f, -1.4142136f,
	-1.3870398f, -1.3065630f, -1.1758756f, -1.0000000f, -0.7856950f,
	-0.5411961f, -0.2758994f};

	void GenerateComfortNoise(Aec3Optimization optimization,
	const std::array<float, kFftLengthBy2Plus1>& N2,
	uint32_t* seed,
	FftData* lower_band_noise,
	FftData* upper_band_noise) {
	FftData* N_low = lower_band_noise;
	FftData* N_high = upper_band_noise;

	// Compute square root spectrum.
	std::array<float, kFftLengthBy2Plus1> N;
	std::copy(N2.begin(), N2.end(), N.begin());
	aec3::VectorMath(optimization).Sqrt(N);

	// Compute the noise level for the upper bands.
	constexpr float kOneByNumBands = 1.f / (kFftLengthBy2Plus1 / 2 + 1);
	constexpr int kFftLengthBy2Plus1By2 = kFftLengthBy2Plus1 / 2;
	const float high_band_noise_level =
	std::accumulate(N.begin() + kFftLengthBy2Plus1By2, N.end(), 0.f) *
	kOneByNumBands;

	// The analysis and synthesis windowing cause loss of power when
	// cross-fading the noise where frames are completely uncorrelated
	// (generated with random phase), hence the factor sqrt(2).
	// This is not the case for the speech signal where the input is overlapping
	// (strong correlation).
	N_low->re[0] = N_low->re[kFftLengthBy2] = N_high->re[0] =
	N_high->re[kFftLengthBy2] = 0.f;
	for (size_t k = 1; k < kFftLengthBy2; k++) {
	constexpr int kIndexMask = 32 - 1;
	// Generate a random 31-bit integer.
	seed[0] = (seed[0] * 69069 + 1) & (0x80000000 - 1);
	// Convert to a 5-bit index.
	int i = seed[0] >> 26;

	// y = sqrt(2) * sin(a)
	const float x = kSqrt2Sin[i];
	// x = sqrt(2) * cos(a) = sqrt(2) * sin(a + pi/2)
	const float y = kSqrt2Sin[(i + 8) & kIndexMask];

	// Form low-frequency noise via spectral shaping.
	N_low->re[k] = N[k] * x;
	N_low->im[k] = N[k] * y;

	// Form the high-frequency noise via simple levelling.
	N_high->re[k] = high_band_noise_level * x;
	N_high->im[k] = high_band_noise_level * y;
	}
	}

	} // namespace

	ComfortNoiseGenerator::ComfortNoiseGenerator(const EchoCanceller3Config& config,
	Aec3Optimization optimization,
	size_t num_capture_channels)
	: optimization_(optimization),
	seed_(42),
	num_capture_channels_(num_capture_channels),
	noise_floor_(GetNoiseFloorFactor(config.comfort_noise.noise_floor_dbfs)),
	N2_initial_(
	std::make_unique<std::vector<std::array<float, kFftLengthBy2Plus1>>>(
	num_capture_channels_)),
	Y2_smoothed_(num_capture_channels_),
	N2_(num_capture_channels_) {
	for (size_t ch = 0; ch < num_capture_channels_; ++ch) {
	(*N2_initial_)[ch].fill(0.f);
	Y2_smoothed_[ch].fill(0.f);
	N2_[ch].fill(1.0e6f);
	}
	}

	ComfortNoiseGenerator::~ComfortNoiseGenerator() = default;

	void ComfortNoiseGenerator::Compute(
	bool saturated_capture,
	rtc::ArrayView<const std::array<float, kFftLengthBy2Plus1>>
	capture_spectrum,
	rtc::ArrayView<FftData> lower_band_noise,
	rtc::ArrayView<FftData> upper_band_noise) {
	const auto& Y2 = capture_spectrum;

	if (!saturated_capture) {
	// Smooth Y2.
	for (size_t ch = 0; ch < num_capture_channels_; ++ch) {
	std::transform(Y2_smoothed_[ch].begin(), Y2_smoothed_[ch].end(),
	Y2[ch].begin(), Y2_smoothed_[ch].begin(),
	[](float a, float b) { return a + 0.1f * (b - a); });
	}

	if (N2_counter_ > 50) {
	// Update N2 from Y2_smoothed.
	for (size_t ch = 0; ch < num_capture_channels_; ++ch) {
	std::transform(N2_[ch].begin(), N2_[ch].end(), Y2_smoothed_[ch].begin(),
	N2_[ch].begin(), [](float a, float b) {
	return b < a ? (0.9f * b + 0.1f * a) * 1.0002f
	: a * 1.0002f;
	});
	}
	}

	if (N2_initial_) {
	if (++N2_counter_ == 1000) {
	N2_initial_.reset();
	} else {
	// Compute the N2_initial from N2.
	for (size_t ch = 0; ch < num_capture_channels_; ++ch) {
	std::transform(N2_[ch].begin(), N2_[ch].end(),
	(N2_initial_)[ch].begin(), (N2_initial_)[ch].begin(),
	[](float a, float b) {
	return a > b ? b + 0.001f * (a - b) : a;
	});
	}
	}
	}

	for (size_t ch = 0; ch < num_capture_channels_; ++ch) {
	for (auto& n : N2_[ch]) {
	n = std::max(n, noise_floor_);
	}
	if (N2_initial_) {
	for (auto& n : (*N2_initial_)[ch]) {
	n = std::max(n, noise_floor_);
	}
	}
	}
	}

	// Choose N2 estimate to use.
	const auto& N2 = N2_initial_ ? (*N2_initial_) : N2_;

	for (size_t ch = 0; ch < num_capture_channels_; ++ch) {
	GenerateComfortNoise(optimization_, N2[ch], &seed_, &lower_band_noise[ch],
	&upper_band_noise[ch]);
	}
	}

	} // namespace webrtc