modules/audio_processing/level_controller/signal_classifier.cc - src/webrtc - Git at Google

 /*
  *  Copyright (c) 2016 The WebRTC project authors. All Rights Reserved.
  *
  *  Use of this source code is governed by a BSD-style license
  *  that can be found in the LICENSE file in the root of the source
  *  tree. An additional intellectual property rights grant can be found
  *  in the file PATENTS.  All contributing project authors may
  *  be found in the AUTHORS file in the root of the source tree.
  */

 #include "webrtc/modules/audio_processing/level_controller/signal_classifier.h"

 #include <algorithm>
 #include <numeric>
 #include <vector>

 #include "webrtc/modules/audio_processing/audio_buffer.h"
 #include "webrtc/modules/audio_processing/level_controller/down_sampler.h"
 #include "webrtc/modules/audio_processing/level_controller/noise_spectrum_estimator.h"
 #include "webrtc/modules/audio_processing/logging/apm_data_dumper.h"
 #include "webrtc/rtc_base/array_view.h"
 #include "webrtc/rtc_base/constructormagic.h"

 namespace webrtc {
 namespace {

 void RemoveDcLevel(rtc::ArrayView<float> x) {
   RTC_DCHECK_LT(0, x.size());
   float mean = std::accumulate(x.data(), x.data() + x.size(), 0.f);
   mean /= x.size();

   for (float& v : x) {
     v -= mean;
   }
 }

 void PowerSpectrum(const OouraFft* ooura_fft,
                    rtc::ArrayView<const float> x,
                    rtc::ArrayView<float> spectrum) {
   RTC_DCHECK_EQ(65, spectrum.size());
   RTC_DCHECK_EQ(128, x.size());
   float X[128];
   std::copy(x.data(), x.data() + x.size(), X);
   ooura_fft->Fft(X);

   float* X_p = X;
   RTC_DCHECK_EQ(X_p, &X[0]);
   spectrum[0] = (*X_p) * (*X_p);
   ++X_p;
   RTC_DCHECK_EQ(X_p, &X[1]);
   spectrum[64] = (*X_p) * (*X_p);
   for (int k = 1; k < 64; ++k) {
     ++X_p;
     RTC_DCHECK_EQ(X_p, &X[2 * k]);
     spectrum[k] = (*X_p) * (*X_p);
     ++X_p;
     RTC_DCHECK_EQ(X_p, &X[2 * k + 1]);
     spectrum[k] += (*X_p) * (*X_p);
   }
 }

 webrtc::SignalClassifier::SignalType ClassifySignal(
     rtc::ArrayView<const float> signal_spectrum,
     rtc::ArrayView<const float> noise_spectrum,
     ApmDataDumper* data_dumper) {
   int num_stationary_bands = 0;
   int num_highly_nonstationary_bands = 0;

   // Detect stationary and highly nonstationary bands.
   for (size_t k = 1; k < 40; k++) {
     if (signal_spectrum[k] < 3 * noise_spectrum[k] &&
         signal_spectrum[k] * 3 > noise_spectrum[k]) {
       ++num_stationary_bands;
     } else if (signal_spectrum[k] > 9 * noise_spectrum[k]) {
       ++num_highly_nonstationary_bands;
     }
   }

   data_dumper->DumpRaw("lc_num_stationary_bands", 1, &num_stationary_bands);
   data_dumper->DumpRaw("lc_num_highly_nonstationary_bands", 1,
                        &num_highly_nonstationary_bands);

   // Use the detected number of bands to classify the overall signal
   // stationarity.
   if (num_stationary_bands > 15) {
     return SignalClassifier::SignalType::kStationary;
   } else if (num_highly_nonstationary_bands > 15) {
     return SignalClassifier::SignalType::kHighlyNonStationary;
   } else {
     return SignalClassifier::SignalType::kNonStationary;
   }
 }

 }  // namespace

 SignalClassifier::FrameExtender::FrameExtender(size_t frame_size,
                                                size_t extended_frame_size)
     : x_old_(extended_frame_size - frame_size, 0.f) {}

 SignalClassifier::FrameExtender::~FrameExtender() = default;

 void SignalClassifier::FrameExtender::ExtendFrame(
     rtc::ArrayView<const float> x,
     rtc::ArrayView<float> x_extended) {
   RTC_DCHECK_EQ(x_old_.size() + x.size(), x_extended.size());
   std::copy(x_old_.data(), x_old_.data() + x_old_.size(), x_extended.data());
   std::copy(x.data(), x.data() + x.size(), x_extended.data() + x_old_.size());
   std::copy(x_extended.data() + x_extended.size() - x_old_.size(),
             x_extended.data() + x_extended.size(), x_old_.data());
 }

 SignalClassifier::SignalClassifier(ApmDataDumper* data_dumper)
     : data_dumper_(data_dumper),
       down_sampler_(data_dumper_),
       noise_spectrum_estimator_(data_dumper_) {
   Initialize(AudioProcessing::kSampleRate48kHz);
 }
 SignalClassifier::~SignalClassifier() {}

 void SignalClassifier::Initialize(int sample_rate_hz) {
   down_sampler_.Initialize(sample_rate_hz);
   noise_spectrum_estimator_.Initialize();
   frame_extender_.reset(new FrameExtender(80, 128));
   sample_rate_hz_ = sample_rate_hz;
   initialization_frames_left_ = 2;
   consistent_classification_counter_ = 3;
   last_signal_type_ = SignalClassifier::SignalType::kNonStationary;
 }

 void SignalClassifier::Analyze(const AudioBuffer& audio,
                                SignalType* signal_type) {
   RTC_DCHECK_EQ(audio.num_frames(), sample_rate_hz_ / 100);

   // Compute the signal power spectrum.
   float downsampled_frame[80];
   down_sampler_.DownSample(rtc::ArrayView<const float>(
                                audio.channels_const_f()[0], audio.num_frames()),
                            downsampled_frame);
   float extended_frame[128];
   frame_extender_->ExtendFrame(downsampled_frame, extended_frame);
   RemoveDcLevel(extended_frame);
   float signal_spectrum[65];
   PowerSpectrum(&ooura_fft_, extended_frame, signal_spectrum);

   // Classify the signal based on the estimate of the noise spectrum and the
   // signal spectrum estimate.
   *signal_type = ClassifySignal(signal_spectrum,
                                 noise_spectrum_estimator_.GetNoiseSpectrum(),
                                 data_dumper_);

   // Update the noise spectrum based on the signal spectrum.
   noise_spectrum_estimator_.Update(signal_spectrum,
                                    initialization_frames_left_ > 0);

   // Update the number of frames until a reliable signal spectrum is achieved.
   initialization_frames_left_ = std::max(0, initialization_frames_left_ - 1);

   if (last_signal_type_ == *signal_type) {
     consistent_classification_counter_ =
         std::max(0, consistent_classification_counter_ - 1);
   } else {
     last_signal_type_ = *signal_type;
     consistent_classification_counter_ = 3;
   }

   if (consistent_classification_counter_ > 0) {
     *signal_type = SignalClassifier::SignalType::kNonStationary;
   }
 }

 }  // namespace webrtc
	/*
	* Copyright (c) 2016 The WebRTC project authors. All Rights Reserved.
	*
	* Use of this source code is governed by a BSD-style license
	* that can be found in the LICENSE file in the root of the source
	* tree. An additional intellectual property rights grant can be found
	* in the file PATENTS. All contributing project authors may
	* be found in the AUTHORS file in the root of the source tree.
	*/

	#include "webrtc/modules/audio_processing/level_controller/signal_classifier.h"

	#include <algorithm>
	#include <numeric>
	#include <vector>

	#include "webrtc/modules/audio_processing/audio_buffer.h"
	#include "webrtc/modules/audio_processing/level_controller/down_sampler.h"
	#include "webrtc/modules/audio_processing/level_controller/noise_spectrum_estimator.h"
	#include "webrtc/modules/audio_processing/logging/apm_data_dumper.h"
	#include "webrtc/rtc_base/array_view.h"
	#include "webrtc/rtc_base/constructormagic.h"

	namespace webrtc {
	namespace {

	void RemoveDcLevel(rtc::ArrayView<float> x) {
	RTC_DCHECK_LT(0, x.size());
	float mean = std::accumulate(x.data(), x.data() + x.size(), 0.f);
	mean /= x.size();

	for (float& v : x) {
	v -= mean;
	}
	}

	void PowerSpectrum(const OouraFft* ooura_fft,
	rtc::ArrayView<const float> x,
	rtc::ArrayView<float> spectrum) {
	RTC_DCHECK_EQ(65, spectrum.size());
	RTC_DCHECK_EQ(128, x.size());
	float X[128];
	std::copy(x.data(), x.data() + x.size(), X);
	ooura_fft->Fft(X);

	float* X_p = X;
	RTC_DCHECK_EQ(X_p, &X[0]);
	spectrum[0] = (X_p) (*X_p);
	++X_p;
	RTC_DCHECK_EQ(X_p, &X[1]);
	spectrum[64] = (X_p) (*X_p);
	for (int k = 1; k < 64; ++k) {
	++X_p;
	RTC_DCHECK_EQ(X_p, &X[2 * k]);
	spectrum[k] = (X_p) (*X_p);
	++X_p;
	RTC_DCHECK_EQ(X_p, &X[2 * k + 1]);
	spectrum[k] += (X_p) (*X_p);
	}
	}

	webrtc::SignalClassifier::SignalType ClassifySignal(
	rtc::ArrayView<const float> signal_spectrum,
	rtc::ArrayView<const float> noise_spectrum,
	ApmDataDumper* data_dumper) {
	int num_stationary_bands = 0;
	int num_highly_nonstationary_bands = 0;

	// Detect stationary and highly nonstationary bands.
	for (size_t k = 1; k < 40; k++) {
	if (signal_spectrum[k] < 3 * noise_spectrum[k] &&
	signal_spectrum[k] * 3 > noise_spectrum[k]) {
	++num_stationary_bands;
	} else if (signal_spectrum[k] > 9 * noise_spectrum[k]) {
	++num_highly_nonstationary_bands;
	}
	}

	data_dumper->DumpRaw("lc_num_stationary_bands", 1, &num_stationary_bands);
	data_dumper->DumpRaw("lc_num_highly_nonstationary_bands", 1,
	&num_highly_nonstationary_bands);

	// Use the detected number of bands to classify the overall signal
	// stationarity.
	if (num_stationary_bands > 15) {
	return SignalClassifier::SignalType::kStationary;
	} else if (num_highly_nonstationary_bands > 15) {
	return SignalClassifier::SignalType::kHighlyNonStationary;
	} else {
	return SignalClassifier::SignalType::kNonStationary;
	}
	}

	} // namespace

	SignalClassifier::FrameExtender::FrameExtender(size_t frame_size,
	size_t extended_frame_size)
	: x_old_(extended_frame_size - frame_size, 0.f) {}

	SignalClassifier::FrameExtender::~FrameExtender() = default;

	void SignalClassifier::FrameExtender::ExtendFrame(
	rtc::ArrayView<const float> x,
	rtc::ArrayView<float> x_extended) {
	RTC_DCHECK_EQ(x_old_.size() + x.size(), x_extended.size());
	std::copy(x_old_.data(), x_old_.data() + x_old_.size(), x_extended.data());
	std::copy(x.data(), x.data() + x.size(), x_extended.data() + x_old_.size());
	std::copy(x_extended.data() + x_extended.size() - x_old_.size(),
	x_extended.data() + x_extended.size(), x_old_.data());
	}

	SignalClassifier::SignalClassifier(ApmDataDumper* data_dumper)
	: data_dumper_(data_dumper),
	down_sampler_(data_dumper_),
	noise_spectrum_estimator_(data_dumper_) {
	Initialize(AudioProcessing::kSampleRate48kHz);
	}
	SignalClassifier::~SignalClassifier() {}

	void SignalClassifier::Initialize(int sample_rate_hz) {
	down_sampler_.Initialize(sample_rate_hz);
	noise_spectrum_estimator_.Initialize();
	frame_extender_.reset(new FrameExtender(80, 128));
	sample_rate_hz_ = sample_rate_hz;
	initialization_frames_left_ = 2;
	consistent_classification_counter_ = 3;
	last_signal_type_ = SignalClassifier::SignalType::kNonStationary;
	}

	void SignalClassifier::Analyze(const AudioBuffer& audio,
	SignalType* signal_type) {
	RTC_DCHECK_EQ(audio.num_frames(), sample_rate_hz_ / 100);

	// Compute the signal power spectrum.
	float downsampled_frame[80];
	down_sampler_.DownSample(rtc::ArrayView<const float>(
	audio.channels_const_f()[0], audio.num_frames()),
	downsampled_frame);
	float extended_frame[128];
	frame_extender_->ExtendFrame(downsampled_frame, extended_frame);
	RemoveDcLevel(extended_frame);
	float signal_spectrum[65];
	PowerSpectrum(&ooura_fft_, extended_frame, signal_spectrum);

	// Classify the signal based on the estimate of the noise spectrum and the
	// signal spectrum estimate.
	*signal_type = ClassifySignal(signal_spectrum,
	noise_spectrum_estimator_.GetNoiseSpectrum(),
	data_dumper_);

	// Update the noise spectrum based on the signal spectrum.
	noise_spectrum_estimator_.Update(signal_spectrum,
	initialization_frames_left_ > 0);

	// Update the number of frames until a reliable signal spectrum is achieved.
	initialization_frames_left_ = std::max(0, initialization_frames_left_ - 1);

	if (last_signal_type_ == *signal_type) {
	consistent_classification_counter_ =
	std::max(0, consistent_classification_counter_ - 1);
	} else {
	last_signal_type_ = *signal_type;
	consistent_classification_counter_ = 3;
	}

	if (consistent_classification_counter_ > 0) {
	*signal_type = SignalClassifier::SignalType::kNonStationary;
	}
	}

	} // namespace webrtc