modules/audio_processing/voice_detection_impl.cc - src/webrtc - Git at Google

 /*
  *  Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
  *
  *  Use of this source code is governed by a BSD-style license
  *  that can be found in the LICENSE file in the root of the source
  *  tree. An additional intellectual property rights grant can be found
  *  in the file PATENTS.  All contributing project authors may
  *  be found in the AUTHORS file in the root of the source tree.
  */

 #include "webrtc/modules/audio_processing/voice_detection_impl.h"

 #include "webrtc/common_audio/vad/include/webrtc_vad.h"
 #include "webrtc/modules/audio_processing/audio_buffer.h"
 #include "webrtc/rtc_base/constructormagic.h"

 namespace webrtc {
 class VoiceDetectionImpl::Vad {
  public:
   Vad() {
     state_ = WebRtcVad_Create();
     RTC_CHECK(state_);
     int error = WebRtcVad_Init(state_);
     RTC_DCHECK_EQ(0, error);
   }
   ~Vad() {
     WebRtcVad_Free(state_);
   }
   VadInst* state() { return state_; }
  private:
   VadInst* state_ = nullptr;
   RTC_DISALLOW_COPY_AND_ASSIGN(Vad);
 };

 VoiceDetectionImpl::VoiceDetectionImpl(rtc::CriticalSection* crit)
     : crit_(crit) {
   RTC_DCHECK(crit);
 }

 VoiceDetectionImpl::~VoiceDetectionImpl() {}

 void VoiceDetectionImpl::Initialize(int sample_rate_hz) {
   rtc::CritScope cs(crit_);
   sample_rate_hz_ = sample_rate_hz;
   std::unique_ptr<Vad> new_vad;
   if (enabled_) {
     new_vad.reset(new Vad());
   }
   vad_.swap(new_vad);
   using_external_vad_ = false;
   frame_size_samples_ =
       static_cast<size_t>(frame_size_ms_ * sample_rate_hz_) / 1000;
   set_likelihood(likelihood_);
 }

 void VoiceDetectionImpl::ProcessCaptureAudio(AudioBuffer* audio) {
   rtc::CritScope cs(crit_);
   if (!enabled_) {
     return;
   }
   if (using_external_vad_) {
     using_external_vad_ = false;
     return;
   }

   RTC_DCHECK_GE(160, audio->num_frames_per_band());
   // TODO(ajm): concatenate data in frame buffer here.
   int vad_ret = WebRtcVad_Process(vad_->state(), sample_rate_hz_,
                                   audio->mixed_low_pass_data(),
                                   frame_size_samples_);
   if (vad_ret == 0) {
     stream_has_voice_ = false;
     audio->set_activity(AudioFrame::kVadPassive);
   } else if (vad_ret == 1) {
     stream_has_voice_ = true;
     audio->set_activity(AudioFrame::kVadActive);
   } else {
     RTC_NOTREACHED();
   }
 }

 int VoiceDetectionImpl::Enable(bool enable) {
   rtc::CritScope cs(crit_);
   if (enabled_ != enable) {
     enabled_ = enable;
     Initialize(sample_rate_hz_);
   }
   return AudioProcessing::kNoError;
 }

 bool VoiceDetectionImpl::is_enabled() const {
   rtc::CritScope cs(crit_);
   return enabled_;
 }

 int VoiceDetectionImpl::set_stream_has_voice(bool has_voice) {
   rtc::CritScope cs(crit_);
   using_external_vad_ = true;
   stream_has_voice_ = has_voice;
   return AudioProcessing::kNoError;
 }

 bool VoiceDetectionImpl::stream_has_voice() const {
   rtc::CritScope cs(crit_);
   // TODO(ajm): enable this assertion?
   //RTC_DCHECK(using_external_vad_ || is_component_enabled());
   return stream_has_voice_;
 }

 int VoiceDetectionImpl::set_likelihood(VoiceDetection::Likelihood likelihood) {
   rtc::CritScope cs(crit_);
   likelihood_ = likelihood;
   if (enabled_) {
     int mode = 2;
     switch (likelihood) {
       case VoiceDetection::kVeryLowLikelihood:
         mode = 3;
         break;
       case VoiceDetection::kLowLikelihood:
         mode = 2;
         break;
       case VoiceDetection::kModerateLikelihood:
         mode = 1;
         break;
       case VoiceDetection::kHighLikelihood:
         mode = 0;
         break;
       default:
         RTC_NOTREACHED();
         break;
     }
     int error = WebRtcVad_set_mode(vad_->state(), mode);
     RTC_DCHECK_EQ(0, error);
   }
   return AudioProcessing::kNoError;
 }

 VoiceDetection::Likelihood VoiceDetectionImpl::likelihood() const {
   rtc::CritScope cs(crit_);
   return likelihood_;
 }

 int VoiceDetectionImpl::set_frame_size_ms(int size) {
   rtc::CritScope cs(crit_);
   RTC_DCHECK_EQ(10, size); // TODO(ajm): remove when supported.
   frame_size_ms_ = size;
   Initialize(sample_rate_hz_);
   return AudioProcessing::kNoError;
 }

 int VoiceDetectionImpl::frame_size_ms() const {
   rtc::CritScope cs(crit_);
   return frame_size_ms_;
 }
 }  // namespace webrtc
	/*
	* Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
	*
	* Use of this source code is governed by a BSD-style license
	* that can be found in the LICENSE file in the root of the source
	* tree. An additional intellectual property rights grant can be found
	* in the file PATENTS. All contributing project authors may
	* be found in the AUTHORS file in the root of the source tree.
	*/

	#include "webrtc/modules/audio_processing/voice_detection_impl.h"

	#include "webrtc/common_audio/vad/include/webrtc_vad.h"
	#include "webrtc/modules/audio_processing/audio_buffer.h"
	#include "webrtc/rtc_base/constructormagic.h"

	namespace webrtc {
	class VoiceDetectionImpl::Vad {
	public:
	Vad() {
	state_ = WebRtcVad_Create();
	RTC_CHECK(state_);
	int error = WebRtcVad_Init(state_);
	RTC_DCHECK_EQ(0, error);
	}
	~Vad() {
	WebRtcVad_Free(state_);
	}
	VadInst* state() { return state_; }
	private:
	VadInst* state_ = nullptr;
	RTC_DISALLOW_COPY_AND_ASSIGN(Vad);
	};

	VoiceDetectionImpl::VoiceDetectionImpl(rtc::CriticalSection* crit)
	: crit_(crit) {
	RTC_DCHECK(crit);
	}

	VoiceDetectionImpl::~VoiceDetectionImpl() {}

	void VoiceDetectionImpl::Initialize(int sample_rate_hz) {
	rtc::CritScope cs(crit_);
	sample_rate_hz_ = sample_rate_hz;
	std::unique_ptr<Vad> new_vad;
	if (enabled_) {
	new_vad.reset(new Vad());
	}
	vad_.swap(new_vad);
	using_external_vad_ = false;
	frame_size_samples_ =
	static_cast<size_t>(frame_size_ms_ * sample_rate_hz_) / 1000;
	set_likelihood(likelihood_);
	}

	void VoiceDetectionImpl::ProcessCaptureAudio(AudioBuffer* audio) {
	rtc::CritScope cs(crit_);
	if (!enabled_) {
	return;
	}
	if (using_external_vad_) {
	using_external_vad_ = false;
	return;
	}

	RTC_DCHECK_GE(160, audio->num_frames_per_band());
	// TODO(ajm): concatenate data in frame buffer here.
	int vad_ret = WebRtcVad_Process(vad_->state(), sample_rate_hz_,
	audio->mixed_low_pass_data(),
	frame_size_samples_);
	if (vad_ret == 0) {
	stream_has_voice_ = false;
	audio->set_activity(AudioFrame::kVadPassive);
	} else if (vad_ret == 1) {
	stream_has_voice_ = true;
	audio->set_activity(AudioFrame::kVadActive);
	} else {
	RTC_NOTREACHED();
	}
	}

	int VoiceDetectionImpl::Enable(bool enable) {
	rtc::CritScope cs(crit_);
	if (enabled_ != enable) {
	enabled_ = enable;
	Initialize(sample_rate_hz_);
	}
	return AudioProcessing::kNoError;
	}

	bool VoiceDetectionImpl::is_enabled() const {
	rtc::CritScope cs(crit_);
	return enabled_;
	}

	int VoiceDetectionImpl::set_stream_has_voice(bool has_voice) {
	rtc::CritScope cs(crit_);
	using_external_vad_ = true;
	stream_has_voice_ = has_voice;
	return AudioProcessing::kNoError;
	}

	bool VoiceDetectionImpl::stream_has_voice() const {
	rtc::CritScope cs(crit_);
	// TODO(ajm): enable this assertion?
	//RTC_DCHECK(using_external_vad_ \|\| is_component_enabled());
	return stream_has_voice_;
	}

	int VoiceDetectionImpl::set_likelihood(VoiceDetection::Likelihood likelihood) {
	rtc::CritScope cs(crit_);
	likelihood_ = likelihood;
	if (enabled_) {
	int mode = 2;
	switch (likelihood) {
	case VoiceDetection::kVeryLowLikelihood:
	mode = 3;
	break;
	case VoiceDetection::kLowLikelihood:
	mode = 2;
	break;
	case VoiceDetection::kModerateLikelihood:
	mode = 1;
	break;
	case VoiceDetection::kHighLikelihood:
	mode = 0;
	break;
	default:
	RTC_NOTREACHED();
	break;
	}
	int error = WebRtcVad_set_mode(vad_->state(), mode);
	RTC_DCHECK_EQ(0, error);
	}
	return AudioProcessing::kNoError;
	}

	VoiceDetection::Likelihood VoiceDetectionImpl::likelihood() const {
	rtc::CritScope cs(crit_);
	return likelihood_;
	}

	int VoiceDetectionImpl::set_frame_size_ms(int size) {
	rtc::CritScope cs(crit_);
	RTC_DCHECK_EQ(10, size); // TODO(ajm): remove when supported.
	frame_size_ms_ = size;
	Initialize(sample_rate_hz_);
	return AudioProcessing::kNoError;
	}

	int VoiceDetectionImpl::frame_size_ms() const {
	rtc::CritScope cs(crit_);
	return frame_size_ms_;
	}
	} // namespace webrtc