Remove all AudioBuffer code that is not related to storing audio data
This CL moves/removes all code from the AudioBuffer that:
-Is not directly handling audio data (e.g., keytaps, VAD descisions).
-Is caching aggregated versions of the rest of the audio data.
-Is not used (or only used in testing)
Bug: webrtc:10882
Change-Id: I737deb3f692748eff30f46ad806b2c6f6292802c
Reviewed-on: https://webrtc-review.googlesource.com/c/src/+/149072
Reviewed-by: Gustaf Ullberg <gustaf@webrtc.org>
Commit-Queue: Per Åhgren <peah@webrtc.org>
Cr-Commit-Position: refs/heads/master@{#28866}
diff --git a/modules/audio_processing/audio_buffer.cc b/modules/audio_processing/audio_buffer.cc
index 1a99463..584111c 100644
--- a/modules/audio_processing/audio_buffer.cc
+++ b/modules/audio_processing/audio_buffer.cc
@@ -27,15 +27,6 @@
const size_t kSamplesPer32kHzChannel = 320;
const size_t kSamplesPer48kHzChannel = 480;
-int KeyboardChannelIndex(const StreamConfig& stream_config) {
- if (!stream_config.has_keyboard()) {
- RTC_NOTREACHED();
- return 0;
- }
-
- return stream_config.num_channels();
-}
-
size_t NumBandsFromSamplesPerChannel(size_t num_frames) {
size_t num_bands = 1;
if (num_frames == kSamplesPer32kHzChannel ||
@@ -60,10 +51,6 @@
num_channels_(num_process_channels),
num_bands_(NumBandsFromSamplesPerChannel(proc_num_frames_)),
num_split_frames_(rtc::CheckedDivExact(proc_num_frames_, num_bands_)),
- mixed_low_pass_valid_(false),
- reference_copied_(false),
- activity_(AudioFrame::kVadUnknown),
- keyboard_data_(NULL),
data_(new IFChannelBuffer(proc_num_frames_, num_proc_channels_)),
output_buffer_(new IFChannelBuffer(output_num_frames_, num_channels_)) {
RTC_DCHECK_GT(input_num_frames_, 0);
@@ -118,10 +105,6 @@
new IFChannelBuffer(input_num_frames_, num_proc_channels_));
}
- if (stream_config.has_keyboard()) {
- keyboard_data_ = data[KeyboardChannelIndex(stream_config)];
- }
-
// Downmix.
const float* const* data_ptr = data;
if (need_to_downmix) {
@@ -179,10 +162,6 @@
}
void AudioBuffer::InitForNewData() {
- keyboard_data_ = NULL;
- mixed_low_pass_valid_ = false;
- reference_copied_ = false;
- activity_ = AudioFrame::kVadUnknown;
num_channels_ = num_proc_channels_;
data_->set_num_channels(num_proc_channels_);
if (split_data_.get()) {
@@ -195,7 +174,6 @@
}
int16_t* const* AudioBuffer::channels() {
- mixed_low_pass_valid_ = false;
return data_->ibuf()->channels();
}
@@ -205,7 +183,6 @@
}
int16_t* const* AudioBuffer::split_bands(size_t channel) {
- mixed_low_pass_valid_ = false;
return split_data_.get() ? split_data_->ibuf()->bands(channel)
: data_->ibuf()->bands(channel);
}
@@ -218,39 +195,11 @@
}
}
-int16_t* const* AudioBuffer::split_channels(Band band) {
- mixed_low_pass_valid_ = false;
- if (split_data_.get()) {
- return split_data_->ibuf()->channels(band);
- } else {
- return band == kBand0To8kHz ? data_->ibuf()->channels() : nullptr;
- }
-}
-
-ChannelBuffer<int16_t>* AudioBuffer::data() {
- mixed_low_pass_valid_ = false;
- return data_->ibuf();
-}
-
-const ChannelBuffer<int16_t>* AudioBuffer::data() const {
- return data_->ibuf_const();
-}
-
-ChannelBuffer<int16_t>* AudioBuffer::split_data() {
- mixed_low_pass_valid_ = false;
- return split_data_.get() ? split_data_->ibuf() : data_->ibuf();
-}
-
-const ChannelBuffer<int16_t>* AudioBuffer::split_data() const {
- return split_data_.get() ? split_data_->ibuf_const() : data_->ibuf_const();
-}
-
const float* const* AudioBuffer::channels_const_f() const {
return data_->fbuf_const()->channels();
}
float* const* AudioBuffer::channels_f() {
- mixed_low_pass_valid_ = false;
return data_->fbuf()->channels();
}
@@ -260,85 +209,10 @@
}
float* const* AudioBuffer::split_bands_f(size_t channel) {
- mixed_low_pass_valid_ = false;
return split_data_.get() ? split_data_->fbuf()->bands(channel)
: data_->fbuf()->bands(channel);
}
-const float* const* AudioBuffer::split_channels_const_f(Band band) const {
- if (split_data_.get()) {
- return split_data_->fbuf_const()->channels(band);
- } else {
- return band == kBand0To8kHz ? data_->fbuf_const()->channels() : nullptr;
- }
-}
-
-float* const* AudioBuffer::split_channels_f(Band band) {
- mixed_low_pass_valid_ = false;
- if (split_data_.get()) {
- return split_data_->fbuf()->channels(band);
- } else {
- return band == kBand0To8kHz ? data_->fbuf()->channels() : nullptr;
- }
-}
-
-ChannelBuffer<float>* AudioBuffer::data_f() {
- mixed_low_pass_valid_ = false;
- return data_->fbuf();
-}
-
-const ChannelBuffer<float>* AudioBuffer::data_f() const {
- return data_->fbuf_const();
-}
-
-ChannelBuffer<float>* AudioBuffer::split_data_f() {
- mixed_low_pass_valid_ = false;
- return split_data_.get() ? split_data_->fbuf() : data_->fbuf();
-}
-
-const ChannelBuffer<float>* AudioBuffer::split_data_f() const {
- return split_data_.get() ? split_data_->fbuf_const() : data_->fbuf_const();
-}
-
-const int16_t* AudioBuffer::mixed_low_pass_data() {
- if (num_proc_channels_ == 1) {
- return split_bands_const(0)[kBand0To8kHz];
- }
-
- if (!mixed_low_pass_valid_) {
- if (!mixed_low_pass_channels_.get()) {
- mixed_low_pass_channels_.reset(
- new ChannelBuffer<int16_t>(num_split_frames_, 1));
- }
-
- DownmixToMono<int16_t, int32_t>(split_channels_const(kBand0To8kHz),
- num_split_frames_, num_channels_,
- mixed_low_pass_channels_->channels()[0]);
- mixed_low_pass_valid_ = true;
- }
- return mixed_low_pass_channels_->channels()[0];
-}
-
-const int16_t* AudioBuffer::low_pass_reference(int channel) const {
- if (!reference_copied_) {
- return NULL;
- }
-
- return low_pass_reference_channels_->channels()[channel];
-}
-
-const float* AudioBuffer::keyboard_data() const {
- return keyboard_data_;
-}
-
-void AudioBuffer::set_activity(AudioFrame::VADActivity activity) {
- activity_ = activity;
-}
-
-AudioFrame::VADActivity AudioBuffer::activity() const {
- return activity_;
-}
-
size_t AudioBuffer::num_channels() const {
return num_channels_;
}
@@ -359,17 +233,12 @@
return num_split_frames_;
}
-size_t AudioBuffer::num_keyboard_frames() const {
- // We don't resample the keyboard channel.
- return input_num_frames_;
-}
-
size_t AudioBuffer::num_bands() const {
return num_bands_;
}
// The resampler is only for supporting 48kHz to 16kHz in the reverse stream.
-void AudioBuffer::DeinterleaveFrom(AudioFrame* frame) {
+void AudioBuffer::DeinterleaveFrom(const AudioFrame* frame) {
RTC_DCHECK_EQ(frame->num_channels_, num_input_channels_);
RTC_DCHECK_EQ(frame->samples_per_channel_, input_num_frames_);
InitForNewData();
@@ -378,7 +247,6 @@
input_buffer_.reset(
new IFChannelBuffer(input_num_frames_, num_proc_channels_));
}
- activity_ = frame->vad_activity_;
int16_t* const* deinterleaved;
if (input_num_frames_ == proc_num_frames_) {
@@ -407,12 +275,7 @@
}
}
-void AudioBuffer::InterleaveTo(AudioFrame* frame, bool data_changed) const {
- frame->vad_activity_ = activity_;
- if (!data_changed) {
- return;
- }
-
+void AudioBuffer::InterleaveTo(AudioFrame* frame) const {
RTC_DCHECK(frame->num_channels_ == num_channels_ || num_channels_ == 1);
RTC_DCHECK_EQ(frame->samples_per_channel_, output_num_frames_);
@@ -437,21 +300,6 @@
}
}
-void AudioBuffer::CopyLowPassToReference() {
- reference_copied_ = true;
- if (!low_pass_reference_channels_.get() ||
- low_pass_reference_channels_->num_channels() != num_channels_) {
- low_pass_reference_channels_.reset(
- new ChannelBuffer<int16_t>(num_split_frames_, num_proc_channels_));
- }
- for (size_t i = 0; i < num_proc_channels_; i++) {
- memcpy(low_pass_reference_channels_->channels()[i],
- split_bands_const(i)[kBand0To8kHz],
- low_pass_reference_channels_->num_frames_per_band() *
- sizeof(split_bands_const(i)[kBand0To8kHz][0]));
- }
-}
-
void AudioBuffer::SplitIntoFrequencyBands() {
splitting_filter_->Analysis(data_.get(), split_data_.get());
}
diff --git a/modules/audio_processing/audio_buffer.h b/modules/audio_processing/audio_buffer.h
index 8fba9f9..c1bfb63 100644
--- a/modules/audio_processing/audio_buffer.h
+++ b/modules/audio_processing/audio_buffer.h
@@ -40,10 +40,10 @@
virtual ~AudioBuffer();
size_t num_channels() const;
+ size_t num_proc_channels() const { return num_proc_channels_; }
void set_num_channels(size_t num_channels);
size_t num_frames() const;
size_t num_frames_per_band() const;
- size_t num_keyboard_frames() const;
size_t num_bands() const;
// Returns a pointer array to the full-band channels.
@@ -76,44 +76,17 @@
// 0 <= band < |num_bands_|
// 0 <= channel < |num_proc_channels_|
// 0 <= sample < |num_split_frames_|
- int16_t* const* split_channels(Band band);
const int16_t* const* split_channels_const(Band band) const;
- float* const* split_channels_f(Band band);
- const float* const* split_channels_const_f(Band band) const;
-
- // Returns a pointer to the ChannelBuffer that encapsulates the full-band
- // data.
- ChannelBuffer<int16_t>* data();
- const ChannelBuffer<int16_t>* data() const;
- ChannelBuffer<float>* data_f();
- const ChannelBuffer<float>* data_f() const;
-
- // Returns a pointer to the ChannelBuffer that encapsulates the split data.
- ChannelBuffer<int16_t>* split_data();
- const ChannelBuffer<int16_t>* split_data() const;
- ChannelBuffer<float>* split_data_f();
- const ChannelBuffer<float>* split_data_f() const;
-
- // Returns a pointer to the low-pass data downmixed to mono. If this data
- // isn't already available it re-calculates it.
- const int16_t* mixed_low_pass_data();
- const int16_t* low_pass_reference(int channel) const;
-
- const float* keyboard_data() const;
-
- void set_activity(AudioFrame::VADActivity activity);
- AudioFrame::VADActivity activity() const;
// Use for int16 interleaved data.
- void DeinterleaveFrom(AudioFrame* audioFrame);
+ void DeinterleaveFrom(const AudioFrame* audioFrame);
// If |data_changed| is false, only the non-audio data members will be copied
// to |frame|.
- void InterleaveTo(AudioFrame* frame, bool data_changed) const;
+ void InterleaveTo(AudioFrame* frame) const;
// Use for float deinterleaved data.
void CopyFrom(const float* const* data, const StreamConfig& stream_config);
void CopyTo(const StreamConfig& stream_config, float* const* data);
- void CopyLowPassToReference();
// Splits the signal into different bands.
void SplitIntoFrequencyBands();
@@ -142,16 +115,10 @@
size_t num_bands_;
size_t num_split_frames_;
- bool mixed_low_pass_valid_;
- bool reference_copied_;
- AudioFrame::VADActivity activity_;
- const float* keyboard_data_;
std::unique_ptr<IFChannelBuffer> data_;
std::unique_ptr<IFChannelBuffer> split_data_;
std::unique_ptr<SplittingFilter> splitting_filter_;
- std::unique_ptr<ChannelBuffer<int16_t>> mixed_low_pass_channels_;
- std::unique_ptr<ChannelBuffer<int16_t>> low_pass_reference_channels_;
std::unique_ptr<IFChannelBuffer> input_buffer_;
std::unique_ptr<IFChannelBuffer> output_buffer_;
std::unique_ptr<ChannelBuffer<float>> process_buffer_;
diff --git a/modules/audio_processing/audio_buffer_unittest.cc b/modules/audio_processing/audio_buffer_unittest.cc
index 5c23159..b884799 100644
--- a/modules/audio_processing/audio_buffer_unittest.cc
+++ b/modules/audio_processing/audio_buffer_unittest.cc
@@ -21,10 +21,6 @@
const size_t kMono = 1u;
void ExpectNumChannels(const AudioBuffer& ab, size_t num_channels) {
- EXPECT_EQ(ab.data()->num_channels(), num_channels);
- EXPECT_EQ(ab.data_f()->num_channels(), num_channels);
- EXPECT_EQ(ab.split_data()->num_channels(), num_channels);
- EXPECT_EQ(ab.split_data_f()->num_channels(), num_channels);
EXPECT_EQ(ab.num_channels(), num_channels);
}
diff --git a/modules/audio_processing/audio_processing_impl.cc b/modules/audio_processing/audio_processing_impl.cc
index 9b4ae81..804802f 100644
--- a/modules/audio_processing/audio_processing_impl.cc
+++ b/modules/audio_processing/audio_processing_impl.cc
@@ -949,6 +949,7 @@
RecordUnprocessedCaptureStream(src);
}
+ capture_.keyboard_info.Extract(src, formats_.api_format.input_stream());
capture_.capture_audio->CopyFrom(src, formats_.api_format.input_stream());
RETURN_ON_ERR(ProcessCaptureStreamLocked());
capture_.capture_audio->CopyTo(formats_.api_format.output_stream(), dest);
@@ -1243,11 +1244,14 @@
RecordUnprocessedCaptureStream(*frame);
}
+ capture_.vad_activity = frame->vad_activity_;
capture_.capture_audio->DeinterleaveFrom(frame);
RETURN_ON_ERR(ProcessCaptureStreamLocked());
- capture_.capture_audio->InterleaveTo(
- frame, submodule_states_.CaptureMultiBandProcessingActive() ||
- submodule_states_.CaptureFullBandProcessingActive());
+ if (submodule_states_.CaptureMultiBandProcessingActive() ||
+ submodule_states_.CaptureFullBandProcessingActive()) {
+ capture_.capture_audio->InterleaveTo(frame);
+ }
+ frame->vad_activity_ = capture_.vad_activity;
if (aec_dump_) {
RecordProcessedCaptureStream(*frame);
@@ -1361,7 +1365,8 @@
}
if (public_submodules_->noise_suppression->is_enabled()) {
- capture_buffer->CopyLowPassToReference();
+ private_submodules_->echo_control_mobile->CopyLowPassReference(
+ capture_buffer);
}
public_submodules_->noise_suppression->ProcessCaptureAudio(capture_buffer);
@@ -1393,7 +1398,15 @@
public_submodules_->noise_suppression->ProcessCaptureAudio(capture_buffer);
}
- public_submodules_->voice_detection->ProcessCaptureAudio(capture_buffer);
+ if (public_submodules_->voice_detection->is_enabled() &&
+ !public_submodules_->voice_detection->using_external_vad()) {
+ bool voice_active =
+ public_submodules_->voice_detection->ProcessCaptureAudio(
+ capture_buffer);
+ capture_.vad_activity =
+ voice_active ? AudioFrame::kVadActive : AudioFrame::kVadPassive;
+ }
+
if (config_.voice_detection.enabled) {
private_submodules_->voice_detector->ProcessCaptureAudio(capture_buffer);
capture_.stats.voice_detected =
@@ -1440,8 +1453,9 @@
capture_buffer->channels_f()[0], capture_buffer->num_frames(),
capture_buffer->num_channels(),
capture_buffer->split_bands_const_f(0)[kBand0To8kHz],
- capture_buffer->num_frames_per_band(), capture_buffer->keyboard_data(),
- capture_buffer->num_keyboard_frames(), voice_probability,
+ capture_buffer->num_frames_per_band(),
+ capture_.keyboard_info.keyboard_data,
+ capture_.keyboard_info.num_keyboard_frames, voice_probability,
capture_.key_pressed);
}
@@ -1598,9 +1612,10 @@
render_.render_audio->DeinterleaveFrom(frame);
RETURN_ON_ERR(ProcessRenderStreamLocked());
- render_.render_audio->InterleaveTo(
- frame, submodule_states_.RenderMultiBandProcessingActive() ||
- submodule_states_.RenderFullBandProcessingActive());
+ if (submodule_states_.RenderMultiBandProcessingActive() ||
+ submodule_states_.RenderFullBandProcessingActive()) {
+ render_.render_audio->InterleaveTo(frame);
+ }
return kNoError;
}
@@ -2117,6 +2132,17 @@
AudioProcessingImpl::ApmCaptureState::~ApmCaptureState() = default;
+void AudioProcessingImpl::ApmCaptureState::KeyboardInfo::Extract(
+ const float* const* data,
+ const StreamConfig& stream_config) {
+ if (stream_config.has_keyboard()) {
+ keyboard_data = data[stream_config.num_channels()];
+ } else {
+ keyboard_data = NULL;
+ }
+ num_keyboard_frames = stream_config.num_frames();
+}
+
AudioProcessingImpl::ApmRenderState::ApmRenderState() = default;
AudioProcessingImpl::ApmRenderState::~ApmRenderState() = default;
diff --git a/modules/audio_processing/audio_processing_impl.h b/modules/audio_processing/audio_processing_impl.h
index 05dbb50..1539cd5 100644
--- a/modules/audio_processing/audio_processing_impl.h
+++ b/modules/audio_processing/audio_processing_impl.h
@@ -394,6 +394,12 @@
int playout_volume;
int prev_playout_volume;
AudioProcessingStats stats;
+ struct KeyboardInfo {
+ void Extract(const float* const* data, const StreamConfig& stream_config);
+ size_t num_keyboard_frames = 0;
+ const float* keyboard_data = nullptr;
+ } keyboard_info;
+ AudioFrame::VADActivity vad_activity = AudioFrame::kVadUnknown;
} capture_ RTC_GUARDED_BY(crit_capture_);
struct ApmCaptureNonLockedState {
diff --git a/modules/audio_processing/echo_control_mobile_impl.cc b/modules/audio_processing/echo_control_mobile_impl.cc
index 69dfafe..c8084ea 100644
--- a/modules/audio_processing/echo_control_mobile_impl.cc
+++ b/modules/audio_processing/echo_control_mobile_impl.cc
@@ -101,7 +101,10 @@
};
EchoControlMobileImpl::EchoControlMobileImpl()
- : routing_mode_(kSpeakerphone), comfort_noise_enabled_(false) {}
+ : routing_mode_(kSpeakerphone), comfort_noise_enabled_(false) {
+ low_pass_reference_[0].fill(0);
+ low_pass_reference_[1].fill(0);
+}
EchoControlMobileImpl::~EchoControlMobileImpl() {}
@@ -168,7 +171,9 @@
for (size_t capture = 0; capture < audio->num_channels(); ++capture) {
// TODO(ajm): improve how this works, possibly inside AECM.
// This is kind of hacked up.
- const int16_t* noisy = audio->low_pass_reference(capture);
+ RTC_DCHECK_LT(capture, low_pass_reference_.size());
+ const int16_t* noisy =
+ reference_copied_ ? low_pass_reference_[capture].data() : nullptr;
const int16_t* clean = audio->split_bands_const(capture)[kBand0To8kHz];
if (noisy == NULL) {
noisy = clean;
@@ -195,6 +200,16 @@
return AudioProcessing::kNoError;
}
+void EchoControlMobileImpl::CopyLowPassReference(AudioBuffer* audio) {
+ RTC_DCHECK_LE(audio->num_channels(), low_pass_reference_.size());
+ reference_copied_ = true;
+ for (size_t capture = 0; capture < audio->num_channels(); ++capture) {
+ memcpy(low_pass_reference_[capture].data(),
+ audio->split_bands_const(capture)[kBand0To8kHz],
+ audio->num_frames_per_band() * sizeof(int16_t));
+ }
+}
+
int EchoControlMobileImpl::set_routing_mode(RoutingMode mode) {
if (MapSetting(mode) == -1) {
return AudioProcessing::kBadParameterError;
@@ -219,6 +234,9 @@
void EchoControlMobileImpl::Initialize(int sample_rate_hz,
size_t num_reverse_channels,
size_t num_output_channels) {
+ low_pass_reference_[0].fill(0);
+ low_pass_reference_[1].fill(0);
+
stream_properties_.reset(new StreamProperties(
sample_rate_hz, num_reverse_channels, num_output_channels));
diff --git a/modules/audio_processing/echo_control_mobile_impl.h b/modules/audio_processing/echo_control_mobile_impl.h
index d84a15e..718819d 100644
--- a/modules/audio_processing/echo_control_mobile_impl.h
+++ b/modules/audio_processing/echo_control_mobile_impl.h
@@ -54,6 +54,7 @@
void ProcessRenderAudio(rtc::ArrayView<const int16_t> packed_render_audio);
int ProcessCaptureAudio(AudioBuffer* audio, int stream_delay_ms);
+ void CopyLowPassReference(AudioBuffer* audio);
void Initialize(int sample_rate_hz,
size_t num_reverse_channels,
@@ -78,6 +79,8 @@
std::vector<std::unique_ptr<Canceller>> cancellers_;
std::unique_ptr<StreamProperties> stream_properties_;
+ std::array<std::array<int16_t, 160>, 2> low_pass_reference_;
+ bool reference_copied_ = false;
};
} // namespace webrtc
diff --git a/modules/audio_processing/gain_control_impl.cc b/modules/audio_processing/gain_control_impl.cc
index 2ca522c..5855943 100644
--- a/modules/audio_processing/gain_control_impl.cc
+++ b/modules/audio_processing/gain_control_impl.cc
@@ -120,10 +120,28 @@
std::vector<int16_t>* packed_buffer) {
RTC_DCHECK_GE(160, audio->num_frames_per_band());
+ std::array<int16_t, 160> mixed_low_pass_data;
+ rtc::ArrayView<const int16_t> mixed_low_pass;
+ if (audio->num_proc_channels() == 1) {
+ mixed_low_pass =
+ rtc::ArrayView<const int16_t>(audio->split_bands_const(0)[kBand0To8kHz],
+ audio->num_frames_per_band());
+ } else {
+ const int num_channels = static_cast<int>(audio->num_channels());
+ for (size_t i = 0; i < audio->num_frames_per_band(); ++i) {
+ int32_t value = audio->split_channels_const(kBand0To8kHz)[0][i];
+ for (int j = 1; j < num_channels; ++j) {
+ value += audio->split_channels_const(kBand0To8kHz)[j][i];
+ }
+ mixed_low_pass_data[i] = value / num_channels;
+ }
+ mixed_low_pass = rtc::ArrayView<const int16_t>(
+ mixed_low_pass_data.data(), audio->num_frames_per_band());
+ }
+
packed_buffer->clear();
- packed_buffer->insert(
- packed_buffer->end(), audio->mixed_low_pass_data(),
- (audio->mixed_low_pass_data() + audio->num_frames_per_band()));
+ packed_buffer->insert(packed_buffer->end(), mixed_low_pass.data(),
+ (mixed_low_pass.data() + audio->num_frames_per_band()));
}
int GainControlImpl::AnalyzeCaptureAudio(AudioBuffer* audio) {
diff --git a/modules/audio_processing/voice_detection_impl.cc b/modules/audio_processing/voice_detection_impl.cc
index 7bf6c4a..0263de4 100644
--- a/modules/audio_processing/voice_detection_impl.cc
+++ b/modules/audio_processing/voice_detection_impl.cc
@@ -54,30 +54,42 @@
set_likelihood(likelihood_);
}
-void VoiceDetectionImpl::ProcessCaptureAudio(AudioBuffer* audio) {
+bool VoiceDetectionImpl::ProcessCaptureAudio(AudioBuffer* audio) {
rtc::CritScope cs(crit_);
- if (!enabled_) {
- return;
- }
- if (using_external_vad_) {
- using_external_vad_ = false;
- return;
- }
+ RTC_DCHECK(enabled_);
RTC_DCHECK_GE(160, audio->num_frames_per_band());
- // TODO(ajm): concatenate data in frame buffer here.
- int vad_ret =
- WebRtcVad_Process(vad_->state(), sample_rate_hz_,
- audio->mixed_low_pass_data(), frame_size_samples_);
+ std::array<int16_t, 160> mixed_low_pass_data;
+ rtc::ArrayView<const int16_t> mixed_low_pass;
+ if (audio->num_proc_channels() == 1) {
+ mixed_low_pass =
+ rtc::ArrayView<const int16_t>(audio->split_bands_const(0)[kBand0To8kHz],
+ audio->num_frames_per_band());
+ } else {
+ const int num_channels = static_cast<int>(audio->num_channels());
+ for (size_t i = 0; i < audio->num_frames_per_band(); ++i) {
+ int32_t value = audio->split_channels_const(kBand0To8kHz)[0][i];
+ for (int j = 1; j < num_channels; ++j) {
+ value += audio->split_channels_const(kBand0To8kHz)[j][i];
+ }
+ mixed_low_pass_data[i] = value / num_channels;
+ }
+ mixed_low_pass = rtc::ArrayView<const int16_t>(
+ mixed_low_pass_data.data(), audio->num_frames_per_band());
+ }
+
+ int vad_ret = WebRtcVad_Process(vad_->state(), sample_rate_hz_,
+ mixed_low_pass.data(), frame_size_samples_);
if (vad_ret == 0) {
stream_has_voice_ = false;
- audio->set_activity(AudioFrame::kVadPassive);
+ return false;
} else if (vad_ret == 1) {
stream_has_voice_ = true;
- audio->set_activity(AudioFrame::kVadActive);
} else {
RTC_NOTREACHED();
}
+
+ return stream_has_voice_;
}
int VoiceDetectionImpl::Enable(bool enable) {
diff --git a/modules/audio_processing/voice_detection_impl.h b/modules/audio_processing/voice_detection_impl.h
index 4007f67..7ee303f 100644
--- a/modules/audio_processing/voice_detection_impl.h
+++ b/modules/audio_processing/voice_detection_impl.h
@@ -31,7 +31,14 @@
// TODO(peah): Fold into ctor, once public API is removed.
void Initialize(int sample_rate_hz);
- void ProcessCaptureAudio(AudioBuffer* audio);
+
+ // Returns the VAD activity.
+ bool ProcessCaptureAudio(AudioBuffer* audio);
+
+ bool using_external_vad() const {
+ rtc::CritScope cs(crit_);
+ return using_external_vad_;
+ }
// VoiceDetection implementation.
int Enable(bool enable) override;