Delete voice_detection() pointer to submodule
The new configuration path is via AudioProcessing::ApplyConfig and
AudioProcessing::GetStatistics.
ApmTest.Process passes with unchanged reference files if
audio_processing_impl would initialize the VAD with
VoiceDetection::kLowLikelihood instead of kVeryLowLikelihood.
This was verified by testing this CL with that modification.
Bug: webrtc:9878
Change-Id: I4d08df37a07e5c72feeec02a07d6b9435f917d72
Reviewed-on: https://webrtc-review.googlesource.com/c/src/+/155445
Commit-Queue: Sam Zackrisson <saza@webrtc.org>
Reviewed-by: Ivo Creusen <ivoc@webrtc.org>
Cr-Commit-Position: refs/heads/master@{#29395}
diff --git a/modules/audio_processing/BUILD.gn b/modules/audio_processing/BUILD.gn
index d2be4f8..4ca9188 100644
--- a/modules/audio_processing/BUILD.gn
+++ b/modules/audio_processing/BUILD.gn
@@ -156,8 +156,8 @@
"transient/wpd_tree.h",
"typing_detection.cc",
"typing_detection.h",
- "voice_detection_impl.cc",
- "voice_detection_impl.h",
+ "voice_detection.cc",
+ "voice_detection.h",
]
defines = []
diff --git a/modules/audio_processing/audio_processing_impl.cc b/modules/audio_processing/audio_processing_impl.cc
index b1187fa..c661848 100644
--- a/modules/audio_processing/audio_processing_impl.cc
+++ b/modules/audio_processing/audio_processing_impl.cc
@@ -40,7 +40,7 @@
#include "modules/audio_processing/noise_suppression_proxy.h"
#include "modules/audio_processing/residual_echo_detector.h"
#include "modules/audio_processing/transient/transient_suppressor.h"
-#include "modules/audio_processing/voice_detection_impl.h"
+#include "modules/audio_processing/voice_detection.h"
#include "rtc_base/atomic_ops.h"
#include "rtc_base/checks.h"
#include "rtc_base/constructor_magic.h"
@@ -165,8 +165,7 @@
bool gain_controller2_enabled,
bool pre_amplifier_enabled,
bool echo_controller_enabled,
- bool voice_activity_detector_enabled,
- bool private_voice_detector_enabled,
+ bool voice_detector_enabled,
bool level_estimator_enabled,
bool transient_suppressor_enabled) {
bool changed = false;
@@ -183,10 +182,7 @@
changed |= (pre_amplifier_enabled_ != pre_amplifier_enabled);
changed |= (echo_controller_enabled != echo_controller_enabled_);
changed |= (level_estimator_enabled != level_estimator_enabled_);
- changed |=
- (voice_activity_detector_enabled != voice_activity_detector_enabled_);
- changed |=
- (private_voice_detector_enabled != private_voice_detector_enabled_);
+ changed |= (voice_detector_enabled != voice_detector_enabled_);
changed |= (transient_suppressor_enabled != transient_suppressor_enabled_);
if (changed) {
high_pass_filter_enabled_ = high_pass_filter_enabled;
@@ -199,8 +195,7 @@
pre_amplifier_enabled_ = pre_amplifier_enabled;
echo_controller_enabled_ = echo_controller_enabled;
level_estimator_enabled_ = level_estimator_enabled;
- voice_activity_detector_enabled_ = voice_activity_detector_enabled;
- private_voice_detector_enabled_ = private_voice_detector_enabled;
+ voice_detector_enabled_ = voice_detector_enabled;
transient_suppressor_enabled_ = transient_suppressor_enabled;
}
@@ -211,8 +206,7 @@
bool AudioProcessingImpl::ApmSubmoduleStates::CaptureMultiBandSubModulesActive()
const {
- return CaptureMultiBandProcessingActive() ||
- voice_activity_detector_enabled_ || private_voice_detector_enabled_;
+ return CaptureMultiBandProcessingActive() || voice_detector_enabled_;
}
bool AudioProcessingImpl::ApmSubmoduleStates::CaptureMultiBandProcessingActive()
@@ -263,7 +257,6 @@
std::unique_ptr<LevelEstimatorImpl> level_estimator;
std::unique_ptr<NoiseSuppressionImpl> noise_suppression;
std::unique_ptr<NoiseSuppressionProxy> noise_suppression_proxy;
- std::unique_ptr<VoiceDetectionImpl> voice_detection;
std::unique_ptr<GainControlImpl> gain_control;
std::unique_ptr<GainControlForExperimentalAgc>
gain_control_for_experimental_agc;
@@ -295,7 +288,7 @@
std::unique_ptr<GainApplier> pre_amplifier;
std::unique_ptr<CustomAudioAnalyzer> capture_analyzer;
std::unique_ptr<LevelEstimatorImpl> output_level_estimator;
- std::unique_ptr<VoiceDetectionImpl> voice_detector;
+ std::unique_ptr<VoiceDetection> voice_detector;
};
AudioProcessingBuilder::AudioProcessingBuilder() = default;
@@ -415,8 +408,6 @@
new NoiseSuppressionImpl(&crit_capture_));
public_submodules_->noise_suppression_proxy.reset(new NoiseSuppressionProxy(
this, public_submodules_->noise_suppression.get()));
- public_submodules_->voice_detection.reset(
- new VoiceDetectionImpl(&crit_capture_));
public_submodules_->gain_control_for_experimental_agc.reset(
new GainControlForExperimentalAgc(
public_submodules_->gain_control.get()));
@@ -556,11 +547,7 @@
InitializeHighPassFilter();
public_submodules_->noise_suppression->Initialize(num_proc_channels(),
proc_sample_rate_hz());
- public_submodules_->voice_detection->Initialize(proc_split_sample_rate_hz());
- if (private_submodules_->voice_detector) {
- private_submodules_->voice_detector->Initialize(
- proc_split_sample_rate_hz());
- }
+ InitializeVoiceDetector();
public_submodules_->level_estimator->Initialize();
InitializeResidualEchoDetector();
InitializeEchoController();
@@ -702,6 +689,9 @@
config_.gain_controller1.analog_level_maximum !=
config.gain_controller1.analog_level_maximum;
+ const bool voice_detection_config_changed =
+ config_.voice_detection.enabled != config.voice_detection.enabled;
+
config_ = config;
if (aec_config_changed) {
@@ -745,14 +735,8 @@
private_submodules_->output_level_estimator->Enable(true);
}
- if (config_.voice_detection.enabled && !private_submodules_->voice_detector) {
- private_submodules_->voice_detector.reset(
- new VoiceDetectionImpl(&crit_capture_));
- private_submodules_->voice_detector->Enable(true);
- private_submodules_->voice_detector->set_likelihood(
- VoiceDetection::kVeryLowLikelihood);
- private_submodules_->voice_detector->Initialize(
- proc_split_sample_rate_hz());
+ if (voice_detection_config_changed) {
+ InitializeVoiceDetector();
}
// Reinitialization must happen after all submodule configuration to avoid
@@ -1276,14 +1260,17 @@
RecordUnprocessedCaptureStream(*frame);
}
- capture_.vad_activity = frame->vad_activity_;
capture_.capture_audio->CopyFrom(frame);
RETURN_ON_ERR(ProcessCaptureStreamLocked());
if (submodule_states_.CaptureMultiBandProcessingActive() ||
submodule_states_.CaptureFullBandProcessingActive()) {
capture_.capture_audio->CopyTo(frame);
}
- frame->vad_activity_ = capture_.vad_activity;
+ if (capture_.stats.voice_detected) {
+ frame->vad_activity_ = *capture_.stats.voice_detected
+ ? AudioFrame::kVadActive
+ : AudioFrame::kVadPassive;
+ }
if (aec_dump_) {
RecordProcessedCaptureStream(*frame);
@@ -1432,19 +1419,10 @@
public_submodules_->noise_suppression->ProcessCaptureAudio(capture_buffer);
}
- if (public_submodules_->voice_detection->is_enabled() &&
- !public_submodules_->voice_detection->using_external_vad()) {
- bool voice_active =
- public_submodules_->voice_detection->ProcessCaptureAudio(
- capture_buffer);
- capture_.vad_activity =
- voice_active ? AudioFrame::kVadActive : AudioFrame::kVadPassive;
- }
-
if (config_.voice_detection.enabled) {
- private_submodules_->voice_detector->ProcessCaptureAudio(capture_buffer);
capture_.stats.voice_detected =
- private_submodules_->voice_detector->stream_has_voice();
+ private_submodules_->voice_detector->ProcessCaptureAudio(
+ capture_buffer);
} else {
capture_.stats.voice_detected = absl::nullopt;
}
@@ -1817,10 +1795,6 @@
return public_submodules_->noise_suppression_proxy.get();
}
-VoiceDetection* AudioProcessingImpl::voice_detection() const {
- return public_submodules_->voice_detection.get();
-}
-
void AudioProcessingImpl::MutateConfig(
rtc::FunctionView<void(AudioProcessing::Config*)> mutator) {
rtc::CritScope cs_render(&crit_render_);
@@ -1845,7 +1819,6 @@
public_submodules_->gain_control->is_enabled(),
config_.gain_controller2.enabled, config_.pre_amplifier.enabled,
capture_nonlocked_.echo_controller_enabled,
- public_submodules_->voice_detection->is_enabled(),
config_.voice_detection.enabled,
public_submodules_->level_estimator->is_enabled(),
capture_.transient_suppressor_enabled);
@@ -1871,6 +1844,14 @@
}
}
+void AudioProcessingImpl::InitializeVoiceDetector() {
+ if (config_.voice_detection.enabled) {
+ private_submodules_->voice_detector = std::make_unique<VoiceDetection>(
+ proc_split_sample_rate_hz(), VoiceDetection::kVeryLowLikelihood);
+ } else {
+ private_submodules_->voice_detector.reset();
+ }
+}
void AudioProcessingImpl::InitializeEchoController() {
bool use_echo_controller =
echo_control_factory_ ||
diff --git a/modules/audio_processing/audio_processing_impl.h b/modules/audio_processing/audio_processing_impl.h
index 4d5b368..eb75362 100644
--- a/modules/audio_processing/audio_processing_impl.h
+++ b/modules/audio_processing/audio_processing_impl.h
@@ -122,7 +122,6 @@
GainControl* gain_control() const override;
LevelEstimator* level_estimator() const override;
NoiseSuppression* noise_suppression() const override;
- VoiceDetection* voice_detection() const override;
// TODO(peah): Remove MutateConfig once the new API allows that.
void MutateConfig(rtc::FunctionView<void(AudioProcessing::Config*)> mutator);
@@ -182,8 +181,7 @@
bool gain_controller2_enabled,
bool pre_amplifier_enabled,
bool echo_controller_enabled,
- bool voice_activity_detector_enabled,
- bool private_voice_detector_enabled,
+ bool voice_detector_enabled,
bool level_estimator_enabled,
bool transient_suppressor_enabled);
bool CaptureMultiBandSubModulesActive() const;
@@ -209,8 +207,7 @@
bool pre_amplifier_enabled_ = false;
bool echo_controller_enabled_ = false;
bool level_estimator_enabled_ = false;
- bool voice_activity_detector_enabled_ = false;
- bool private_voice_detector_enabled_ = false;
+ bool voice_detector_enabled_ = false;
bool transient_suppressor_enabled_ = false;
bool first_update_ = true;
};
@@ -239,6 +236,7 @@
void InitializeResidualEchoDetector()
RTC_EXCLUSIVE_LOCKS_REQUIRED(crit_render_, crit_capture_);
void InitializeHighPassFilter() RTC_EXCLUSIVE_LOCKS_REQUIRED(crit_capture_);
+ void InitializeVoiceDetector() RTC_EXCLUSIVE_LOCKS_REQUIRED(crit_capture_);
void InitializeEchoController()
RTC_EXCLUSIVE_LOCKS_REQUIRED(crit_render_, crit_capture_);
void InitializeGainController2() RTC_EXCLUSIVE_LOCKS_REQUIRED(crit_capture_);
@@ -405,7 +403,6 @@
size_t num_keyboard_frames = 0;
const float* keyboard_data = nullptr;
} keyboard_info;
- AudioFrame::VADActivity vad_activity = AudioFrame::kVadUnknown;
} capture_ RTC_GUARDED_BY(crit_capture_);
struct ApmCaptureNonLockedState {
diff --git a/modules/audio_processing/audio_processing_impl_locking_unittest.cc b/modules/audio_processing/audio_processing_impl_locking_unittest.cc
index 9182d2c..c974a3c 100644
--- a/modules/audio_processing/audio_processing_impl_locking_unittest.cc
+++ b/modules/audio_processing/audio_processing_impl_locking_unittest.cc
@@ -595,7 +595,6 @@
// The below return values are not testable.
apm_->noise_suppression()->speech_probability();
- apm_->voice_detection()->is_enabled();
apm_->GetStatistics(/*has_remote_tracks=*/true);
}
diff --git a/modules/audio_processing/audio_processing_unittest.cc b/modules/audio_processing/audio_processing_unittest.cc
index 14ca329..2b8abd9 100644
--- a/modules/audio_processing/audio_processing_unittest.cc
+++ b/modules/audio_processing/audio_processing_unittest.cc
@@ -193,12 +193,11 @@
apm_config.high_pass_filter.enabled = true;
apm_config.level_estimation.enabled = true;
+ apm_config.voice_detection.enabled = true;
ap->ApplyConfig(apm_config);
EXPECT_NOERR(ap->level_estimator()->Enable(true));
EXPECT_NOERR(ap->noise_suppression()->Enable(true));
-
- EXPECT_NOERR(ap->voice_detection()->Enable(true));
}
// These functions are only used by ApmTest.Process.
@@ -1114,63 +1113,6 @@
EXPECT_EQ(90, apm_->level_estimator()->RMS());
}
-TEST_F(ApmTest, VoiceDetection) {
- // Test external VAD
- EXPECT_EQ(apm_->kNoError,
- apm_->voice_detection()->set_stream_has_voice(true));
- EXPECT_TRUE(apm_->voice_detection()->stream_has_voice());
- EXPECT_EQ(apm_->kNoError,
- apm_->voice_detection()->set_stream_has_voice(false));
- EXPECT_FALSE(apm_->voice_detection()->stream_has_voice());
-
- // Test valid likelihoods
- VoiceDetection::Likelihood likelihood[] = {
- VoiceDetection::kVeryLowLikelihood, VoiceDetection::kLowLikelihood,
- VoiceDetection::kModerateLikelihood, VoiceDetection::kHighLikelihood};
- for (size_t i = 0; i < arraysize(likelihood); i++) {
- EXPECT_EQ(apm_->kNoError,
- apm_->voice_detection()->set_likelihood(likelihood[i]));
- EXPECT_EQ(likelihood[i], apm_->voice_detection()->likelihood());
- }
-
- /* TODO(bjornv): Enable once VAD supports other frame lengths than 10 ms
- // Test invalid frame sizes
- EXPECT_EQ(apm_->kBadParameterError,
- apm_->voice_detection()->set_frame_size_ms(12));
-
- // Test valid frame sizes
- for (int i = 10; i <= 30; i += 10) {
- EXPECT_EQ(apm_->kNoError,
- apm_->voice_detection()->set_frame_size_ms(i));
- EXPECT_EQ(i, apm_->voice_detection()->frame_size_ms());
- }
- */
-
- // Turn VAD on/off
- EXPECT_EQ(apm_->kNoError, apm_->voice_detection()->Enable(true));
- EXPECT_TRUE(apm_->voice_detection()->is_enabled());
- EXPECT_EQ(apm_->kNoError, apm_->voice_detection()->Enable(false));
- EXPECT_FALSE(apm_->voice_detection()->is_enabled());
-
- // Test that AudioFrame activity is maintained when VAD is disabled.
- EXPECT_EQ(apm_->kNoError, apm_->voice_detection()->Enable(false));
- AudioFrame::VADActivity activity[] = {
- AudioFrame::kVadActive, AudioFrame::kVadPassive, AudioFrame::kVadUnknown};
- for (size_t i = 0; i < arraysize(activity); i++) {
- frame_->vad_activity_ = activity[i];
- EXPECT_EQ(apm_->kNoError, apm_->ProcessStream(frame_));
- EXPECT_EQ(activity[i], frame_->vad_activity_);
- }
-
- // Test that AudioFrame activity is set when VAD is enabled.
- EXPECT_EQ(apm_->kNoError, apm_->voice_detection()->Enable(true));
- frame_->vad_activity_ = AudioFrame::kVadUnknown;
- EXPECT_EQ(apm_->kNoError, apm_->ProcessStream(frame_));
- EXPECT_NE(AudioFrame::kVadUnknown, frame_->vad_activity_);
-
- // TODO(bjornv): Add tests for streamed voice; stream_has_voice()
-}
-
TEST_F(ApmTest, AllProcessingDisabledByDefault) {
AudioProcessing::Config config = apm_->GetConfig();
EXPECT_FALSE(config.echo_canceller.enabled);
@@ -1180,7 +1122,6 @@
EXPECT_FALSE(apm_->gain_control()->is_enabled());
EXPECT_FALSE(apm_->level_estimator()->is_enabled());
EXPECT_FALSE(apm_->noise_suppression()->is_enabled());
- EXPECT_FALSE(apm_->voice_detection()->is_enabled());
}
TEST_F(ApmTest, NoProcessingWhenAllComponentsDisabled) {
@@ -1282,16 +1223,7 @@
EXPECT_TRUE(FrameDataAreEqual(*frame_, frame_copy));
EXPECT_EQ(apm_->kNoError, apm_->level_estimator()->Enable(false));
- // 3. Only VAD is enabled...
- SetFrameTo(frame_, 1000);
- frame_copy.CopyFrom(*frame_);
- EXPECT_EQ(apm_->kNoError, apm_->voice_detection()->Enable(true));
- EXPECT_EQ(apm_->kNoError, apm_->ProcessStream(frame_));
- EXPECT_EQ(apm_->kNoError, apm_->ProcessStream(frame_));
- EXPECT_TRUE(FrameDataAreEqual(*frame_, frame_copy));
- EXPECT_EQ(apm_->kNoError, apm_->voice_detection()->Enable(false));
-
- // 4. Only GetStatistics-reporting VAD is enabled...
+ // 3. Only GetStatistics-reporting VAD is enabled...
SetFrameTo(frame_, 1000);
frame_copy.CopyFrom(*frame_);
auto apm_config = apm_->GetConfig();
@@ -1303,18 +1235,16 @@
apm_config.voice_detection.enabled = false;
apm_->ApplyConfig(apm_config);
- // 5. Both VADs and the level estimator are enabled...
+ // 4. Both the VAD and the level estimator are enabled...
SetFrameTo(frame_, 1000);
frame_copy.CopyFrom(*frame_);
EXPECT_EQ(apm_->kNoError, apm_->level_estimator()->Enable(true));
- EXPECT_EQ(apm_->kNoError, apm_->voice_detection()->Enable(true));
apm_config.voice_detection.enabled = true;
apm_->ApplyConfig(apm_config);
EXPECT_EQ(apm_->kNoError, apm_->ProcessStream(frame_));
EXPECT_EQ(apm_->kNoError, apm_->ProcessStream(frame_));
EXPECT_TRUE(FrameDataAreEqual(*frame_, frame_copy));
EXPECT_EQ(apm_->kNoError, apm_->level_estimator()->Enable(false));
- EXPECT_EQ(apm_->kNoError, apm_->voice_detection()->Enable(false));
apm_config.voice_detection.enabled = false;
apm_->ApplyConfig(apm_config);
@@ -1652,18 +1582,15 @@
if (apm_->gain_control()->stream_is_saturated()) {
is_saturated_count++;
}
- if (apm_->voice_detection()->stream_has_voice()) {
- has_voice_count++;
- EXPECT_EQ(AudioFrame::kVadActive, frame_->vad_activity_);
- } else {
- EXPECT_EQ(AudioFrame::kVadPassive, frame_->vad_activity_);
- }
-
- ns_speech_prob_average += apm_->noise_suppression()->speech_probability();
AudioProcessingStats stats =
apm_->GetStatistics(/*has_remote_tracks=*/false);
+ EXPECT_TRUE(stats.voice_detected);
+ EXPECT_TRUE(stats.output_rms_dbfs);
+ has_voice_count += *stats.voice_detected ? 1 : 0;
rms_dbfs_average += *stats.output_rms_dbfs;
+ ns_speech_prob_average += apm_->noise_suppression()->speech_probability();
+
size_t frame_size = frame_->samples_per_channel_ * frame_->num_channels_;
size_t write_count =
fwrite(frame_->data(), sizeof(int16_t), frame_size, out_file_);
@@ -2566,7 +2493,6 @@
EXPECT_EQ(apm->gain_control()->Enable(false), 0);
EXPECT_EQ(apm->level_estimator()->Enable(false), 0);
EXPECT_EQ(apm->noise_suppression()->Enable(false), 0);
- EXPECT_EQ(apm->voice_detection()->Enable(false), 0);
return apm;
}
diff --git a/modules/audio_processing/include/audio_processing.h b/modules/audio_processing/include/audio_processing.h
index e063e95..ec2f32b 100644
--- a/modules/audio_processing/include/audio_processing.h
+++ b/modules/audio_processing/include/audio_processing.h
@@ -53,7 +53,6 @@
class NoiseSuppression;
class CustomAudioAnalyzer;
class CustomProcessing;
-class VoiceDetection;
// Use to enable the extended filter mode in the AEC, along with robustness
// measures around the reported system delays. It comes with a significant
@@ -287,7 +286,10 @@
Level level = kModerate;
} noise_suppression;
- // Enables reporting of |has_voice| in webrtc::AudioProcessingStats.
+ // Enables reporting of |voice_detected| in webrtc::AudioProcessingStats.
+ // In addition to |voice_detected|, VAD decision is provided through the
+ // |AudioFrame| passed to |ProcessStream()|. The |vad_activity_| member will
+ // be modified to reflect the current decision.
struct VoiceDetection {
bool enabled = false;
} voice_detection;
@@ -685,7 +687,6 @@
virtual GainControl* gain_control() const = 0;
virtual LevelEstimator* level_estimator() const = 0;
virtual NoiseSuppression* noise_suppression() const = 0;
- virtual VoiceDetection* voice_detection() const = 0;
// Returns the last applied configuration.
virtual AudioProcessing::Config GetConfig() const = 0;
@@ -981,56 +982,6 @@
virtual Metrics GetMetrics() const = 0;
};
-// The voice activity detection (VAD) component analyzes the stream to
-// determine if voice is present. A facility is also provided to pass in an
-// external VAD decision.
-//
-// In addition to |stream_has_voice()| the VAD decision is provided through the
-// |AudioFrame| passed to |ProcessStream()|. The |vad_activity_| member will be
-// modified to reflect the current decision.
-class VoiceDetection {
- public:
- virtual int Enable(bool enable) = 0;
- virtual bool is_enabled() const = 0;
-
- // Returns true if voice is detected in the current frame. Should be called
- // after |ProcessStream()|.
- virtual bool stream_has_voice() const = 0;
-
- // Some of the APM functionality requires a VAD decision. In the case that
- // a decision is externally available for the current frame, it can be passed
- // in here, before |ProcessStream()| is called.
- //
- // VoiceDetection does _not_ need to be enabled to use this. If it happens to
- // be enabled, detection will be skipped for any frame in which an external
- // VAD decision is provided.
- virtual int set_stream_has_voice(bool has_voice) = 0;
-
- // Specifies the likelihood that a frame will be declared to contain voice.
- // A higher value makes it more likely that speech will not be clipped, at
- // the expense of more noise being detected as voice.
- enum Likelihood {
- kVeryLowLikelihood,
- kLowLikelihood,
- kModerateLikelihood,
- kHighLikelihood
- };
-
- virtual int set_likelihood(Likelihood likelihood) = 0;
- virtual Likelihood likelihood() const = 0;
-
- // Sets the |size| of the frames in ms on which the VAD will operate. Larger
- // frames will improve detection accuracy, but reduce the frequency of
- // updates.
- //
- // This does not impact the size of frames passed to |ProcessStream()|.
- virtual int set_frame_size_ms(int size) = 0;
- virtual int frame_size_ms() const = 0;
-
- protected:
- virtual ~VoiceDetection() {}
-};
-
} // namespace webrtc
#endif // MODULES_AUDIO_PROCESSING_INCLUDE_AUDIO_PROCESSING_H_
diff --git a/modules/audio_processing/include/mock_audio_processing.h b/modules/audio_processing/include/mock_audio_processing.h
index 1c08726..e989fea 100644
--- a/modules/audio_processing/include/mock_audio_processing.h
+++ b/modules/audio_processing/include/mock_audio_processing.h
@@ -91,26 +91,12 @@
MOCK_METHOD1(SetAudioBufferDelay, void(size_t delay_ms));
};
-class MockVoiceDetection : public VoiceDetection {
- public:
- virtual ~MockVoiceDetection() {}
- MOCK_METHOD1(Enable, int(bool enable));
- MOCK_CONST_METHOD0(is_enabled, bool());
- MOCK_CONST_METHOD0(stream_has_voice, bool());
- MOCK_METHOD1(set_stream_has_voice, int(bool has_voice));
- MOCK_METHOD1(set_likelihood, int(Likelihood likelihood));
- MOCK_CONST_METHOD0(likelihood, Likelihood());
- MOCK_METHOD1(set_frame_size_ms, int(int size));
- MOCK_CONST_METHOD0(frame_size_ms, int());
-};
-
class MockAudioProcessing : public ::testing::NiceMock<AudioProcessing> {
public:
MockAudioProcessing()
: gain_control_(new ::testing::NiceMock<MockGainControl>()),
level_estimator_(new ::testing::NiceMock<MockLevelEstimator>()),
- noise_suppression_(new ::testing::NiceMock<MockNoiseSuppression>()),
- voice_detection_(new ::testing::NiceMock<MockVoiceDetection>()) {}
+ noise_suppression_(new ::testing::NiceMock<MockNoiseSuppression>()) {}
virtual ~MockAudioProcessing() {}
@@ -183,9 +169,6 @@
virtual MockNoiseSuppression* noise_suppression() const {
return noise_suppression_.get();
}
- virtual MockVoiceDetection* voice_detection() const {
- return voice_detection_.get();
- }
MOCK_CONST_METHOD0(GetConfig, AudioProcessing::Config());
@@ -193,7 +176,6 @@
std::unique_ptr<MockGainControl> gain_control_;
std::unique_ptr<MockLevelEstimator> level_estimator_;
std::unique_ptr<MockNoiseSuppression> noise_suppression_;
- std::unique_ptr<MockVoiceDetection> voice_detection_;
};
} // namespace test
diff --git a/modules/audio_processing/test/audio_processing_simulator.cc b/modules/audio_processing/test/audio_processing_simulator.cc
index e0b7730..5cda89a 100644
--- a/modules/audio_processing/test/audio_processing_simulator.cc
+++ b/modules/audio_processing/test/audio_processing_simulator.cc
@@ -455,6 +455,10 @@
apm_config.high_pass_filter.enabled = *settings_.use_hpf;
}
+ if (settings_.use_vad) {
+ apm_config.voice_detection.enabled = *settings_.use_vad;
+ }
+
if (settings_.use_refined_adaptive_filter) {
config.Set<RefinedAdaptiveFilter>(
new RefinedAdaptiveFilter(*settings_.use_refined_adaptive_filter));
@@ -502,10 +506,6 @@
RTC_CHECK_EQ(AudioProcessing::kNoError,
ap_->level_estimator()->Enable(*settings_.use_le));
}
- if (settings_.use_vad) {
- RTC_CHECK_EQ(AudioProcessing::kNoError,
- ap_->voice_detection()->Enable(*settings_.use_vad));
- }
if (settings_.use_agc_limiter) {
RTC_CHECK_EQ(AudioProcessing::kNoError, ap_->gain_control()->enable_limiter(
*settings_.use_agc_limiter));
@@ -526,13 +526,6 @@
ap_->gain_control()->set_mode(
static_cast<webrtc::GainControl::Mode>(*settings_.agc_mode)));
}
-
- if (settings_.vad_likelihood) {
- RTC_CHECK_EQ(AudioProcessing::kNoError,
- ap_->voice_detection()->set_likelihood(
- static_cast<webrtc::VoiceDetection::Likelihood>(
- *settings_.vad_likelihood)));
- }
if (settings_.ns_level) {
RTC_CHECK_EQ(
AudioProcessing::kNoError,
diff --git a/modules/audio_processing/test/audio_processing_simulator.h b/modules/audio_processing/test/audio_processing_simulator.h
index 270cdcc..c7087be 100644
--- a/modules/audio_processing/test/audio_processing_simulator.h
+++ b/modules/audio_processing/test/audio_processing_simulator.h
@@ -79,7 +79,6 @@
AudioProcessing::Config::GainController2::LevelEstimator
agc2_adaptive_level_estimator;
absl::optional<float> pre_amplifier_gain_factor;
- absl::optional<int> vad_likelihood;
absl::optional<int> ns_level;
absl::optional<int> maximum_internal_processing_rate;
absl::optional<bool> use_refined_adaptive_filter;
diff --git a/modules/audio_processing/test/audioproc_float_impl.cc b/modules/audio_processing/test/audioproc_float_impl.cc
index d24b881..2b24823 100644
--- a/modules/audio_processing/test/audioproc_float_impl.cc
+++ b/modules/audio_processing/test/audioproc_float_impl.cc
@@ -186,10 +186,6 @@
kParameterNotSpecifiedValue,
"Pre-amplifier gain factor (linear) to apply");
ABSL_FLAG(int,
- vad_likelihood,
- kParameterNotSpecifiedValue,
- "Specify the VAD likelihood (0-3)");
-ABSL_FLAG(int,
ns_level,
kParameterNotSpecifiedValue,
"Specify the NS level (0-3)");
@@ -423,8 +419,6 @@
absl::GetFlag(FLAGS_agc2_adaptive_level_estimator));
SetSettingIfSpecified(absl::GetFlag(FLAGS_pre_amplifier_gain_factor),
&settings.pre_amplifier_gain_factor);
- SetSettingIfSpecified(absl::GetFlag(FLAGS_vad_likelihood),
- &settings.vad_likelihood);
SetSettingIfSpecified(absl::GetFlag(FLAGS_ns_level), &settings.ns_level);
SetSettingIfSpecified(absl::GetFlag(FLAGS_maximum_internal_processing_rate),
&settings.maximum_internal_processing_rate);
@@ -556,11 +550,6 @@
"Error: --agc2_fixed_gain_db must be specified between 0 and 90.\n");
ReportConditionalErrorAndExit(
- settings.vad_likelihood &&
- ((*settings.vad_likelihood) < 0 || (*settings.vad_likelihood) > 3),
- "Error: --vad_likelihood must be specified between 0 and 3.\n");
-
- ReportConditionalErrorAndExit(
settings.ns_level &&
((*settings.ns_level) < 0 || (*settings.ns_level) > 3),
"Error: --ns_level must be specified between 0 and 3.\n");
diff --git a/modules/audio_processing/voice_detection.cc b/modules/audio_processing/voice_detection.cc
new file mode 100644
index 0000000..2774e35
--- /dev/null
+++ b/modules/audio_processing/voice_detection.cc
@@ -0,0 +1,93 @@
+/*
+ * Copyright (c) 2019 The WebRTC project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include "modules/audio_processing/voice_detection.h"
+
+#include "api/audio/audio_frame.h"
+#include "common_audio/vad/include/webrtc_vad.h"
+#include "modules/audio_processing/audio_buffer.h"
+#include "rtc_base/checks.h"
+
+namespace webrtc {
+class VoiceDetection::Vad {
+ public:
+ Vad() {
+ state_ = WebRtcVad_Create();
+ RTC_CHECK(state_);
+ int error = WebRtcVad_Init(state_);
+ RTC_DCHECK_EQ(0, error);
+ }
+ ~Vad() { WebRtcVad_Free(state_); }
+
+ Vad(Vad&) = delete;
+ Vad& operator=(Vad&) = delete;
+
+ VadInst* state() { return state_; }
+
+ private:
+ VadInst* state_ = nullptr;
+};
+
+VoiceDetection::VoiceDetection(int sample_rate_hz, Likelihood likelihood)
+ : sample_rate_hz_(sample_rate_hz),
+ frame_size_samples_(static_cast<size_t>(sample_rate_hz_ / 100)),
+ likelihood_(likelihood),
+ vad_(new Vad()) {
+ int mode = 2;
+ switch (likelihood) {
+ case VoiceDetection::kVeryLowLikelihood:
+ mode = 3;
+ break;
+ case VoiceDetection::kLowLikelihood:
+ mode = 2;
+ break;
+ case VoiceDetection::kModerateLikelihood:
+ mode = 1;
+ break;
+ case VoiceDetection::kHighLikelihood:
+ mode = 0;
+ break;
+ default:
+ RTC_NOTREACHED();
+ break;
+ }
+ int error = WebRtcVad_set_mode(vad_->state(), mode);
+ RTC_DCHECK_EQ(0, error);
+}
+
+VoiceDetection::~VoiceDetection() {}
+
+bool VoiceDetection::ProcessCaptureAudio(AudioBuffer* audio) {
+ RTC_DCHECK_GE(AudioBuffer::kMaxSplitFrameLength,
+ audio->num_frames_per_band());
+ std::array<int16_t, AudioBuffer::kMaxSplitFrameLength> mixed_low_pass_data;
+ rtc::ArrayView<const int16_t> mixed_low_pass(mixed_low_pass_data.data(),
+ audio->num_frames_per_band());
+ if (audio->num_channels() == 1) {
+ FloatS16ToS16(audio->split_bands_const(0)[kBand0To8kHz],
+ audio->num_frames_per_band(), mixed_low_pass_data.data());
+ } else {
+ const int num_channels = static_cast<int>(audio->num_channels());
+ for (size_t i = 0; i < audio->num_frames_per_band(); ++i) {
+ int32_t value =
+ FloatS16ToS16(audio->split_channels_const(kBand0To8kHz)[0][i]);
+ for (int j = 1; j < num_channels; ++j) {
+ value += FloatS16ToS16(audio->split_channels_const(kBand0To8kHz)[j][i]);
+ }
+ mixed_low_pass_data[i] = value / num_channels;
+ }
+ }
+
+ int vad_ret = WebRtcVad_Process(vad_->state(), sample_rate_hz_,
+ mixed_low_pass.data(), frame_size_samples_);
+ RTC_DCHECK(vad_ret == 0 || vad_ret == 1);
+ return vad_ret == 0 ? false : true;
+}
+} // namespace webrtc
diff --git a/modules/audio_processing/voice_detection.h b/modules/audio_processing/voice_detection.h
new file mode 100644
index 0000000..79d44e6
--- /dev/null
+++ b/modules/audio_processing/voice_detection.h
@@ -0,0 +1,59 @@
+/*
+ * Copyright (c) 2019 The WebRTC project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#ifndef MODULES_AUDIO_PROCESSING_VOICE_DETECTION_H_
+#define MODULES_AUDIO_PROCESSING_VOICE_DETECTION_H_
+
+#include <stddef.h>
+
+#include <memory>
+
+#include "modules/audio_processing/include/audio_processing.h"
+
+namespace webrtc {
+
+class AudioBuffer;
+
+// The voice activity detection (VAD) component analyzes the stream to
+// determine if voice is present.
+class VoiceDetection {
+ public:
+ // Specifies the likelihood that a frame will be declared to contain voice.
+ // A higher value makes it more likely that speech will not be clipped, at
+ // the expense of more noise being detected as voice.
+ enum Likelihood {
+ kVeryLowLikelihood,
+ kLowLikelihood,
+ kModerateLikelihood,
+ kHighLikelihood
+ };
+
+ VoiceDetection(int sample_rate_hz, Likelihood likelihood);
+ ~VoiceDetection();
+
+ VoiceDetection(VoiceDetection&) = delete;
+ VoiceDetection& operator=(VoiceDetection&) = delete;
+
+ // Returns true if voice is detected in the current frame.
+ bool ProcessCaptureAudio(AudioBuffer* audio);
+
+ Likelihood likelihood() const { return likelihood_; }
+
+ private:
+ class Vad;
+
+ int sample_rate_hz_;
+ size_t frame_size_samples_;
+ Likelihood likelihood_;
+ std::unique_ptr<Vad> vad_;
+};
+} // namespace webrtc
+
+#endif // MODULES_AUDIO_PROCESSING_VOICE_DETECTION_H_
diff --git a/modules/audio_processing/voice_detection_impl.cc b/modules/audio_processing/voice_detection_impl.cc
deleted file mode 100644
index 80b633c..0000000
--- a/modules/audio_processing/voice_detection_impl.cc
+++ /dev/null
@@ -1,168 +0,0 @@
-/*
- * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
- *
- * Use of this source code is governed by a BSD-style license
- * that can be found in the LICENSE file in the root of the source
- * tree. An additional intellectual property rights grant can be found
- * in the file PATENTS. All contributing project authors may
- * be found in the AUTHORS file in the root of the source tree.
- */
-
-#include "modules/audio_processing/voice_detection_impl.h"
-
-#include "api/audio/audio_frame.h"
-#include "common_audio/vad/include/webrtc_vad.h"
-#include "modules/audio_processing/audio_buffer.h"
-#include "rtc_base/checks.h"
-#include "rtc_base/constructor_magic.h"
-
-namespace webrtc {
-class VoiceDetectionImpl::Vad {
- public:
- Vad() {
- state_ = WebRtcVad_Create();
- RTC_CHECK(state_);
- int error = WebRtcVad_Init(state_);
- RTC_DCHECK_EQ(0, error);
- }
- ~Vad() { WebRtcVad_Free(state_); }
- VadInst* state() { return state_; }
-
- private:
- VadInst* state_ = nullptr;
- RTC_DISALLOW_COPY_AND_ASSIGN(Vad);
-};
-
-VoiceDetectionImpl::VoiceDetectionImpl(rtc::CriticalSection* crit)
- : crit_(crit) {
- RTC_DCHECK(crit);
-}
-
-VoiceDetectionImpl::~VoiceDetectionImpl() {}
-
-void VoiceDetectionImpl::Initialize(int sample_rate_hz) {
- rtc::CritScope cs(crit_);
- sample_rate_hz_ = sample_rate_hz;
- std::unique_ptr<Vad> new_vad;
- if (enabled_) {
- new_vad.reset(new Vad());
- }
- vad_.swap(new_vad);
- using_external_vad_ = false;
- frame_size_samples_ =
- static_cast<size_t>(frame_size_ms_ * sample_rate_hz_) / 1000;
- set_likelihood(likelihood_);
-}
-
-bool VoiceDetectionImpl::ProcessCaptureAudio(AudioBuffer* audio) {
- rtc::CritScope cs(crit_);
- RTC_DCHECK(enabled_);
-
- RTC_DCHECK_GE(AudioBuffer::kMaxSplitFrameLength,
- audio->num_frames_per_band());
- std::array<int16_t, AudioBuffer::kMaxSplitFrameLength> mixed_low_pass_data;
- rtc::ArrayView<const int16_t> mixed_low_pass(mixed_low_pass_data.data(),
- audio->num_frames_per_band());
- if (audio->num_channels() == 1) {
- FloatS16ToS16(audio->split_bands_const(0)[kBand0To8kHz],
- audio->num_frames_per_band(), mixed_low_pass_data.data());
- } else {
- const int num_channels = static_cast<int>(audio->num_channels());
- for (size_t i = 0; i < audio->num_frames_per_band(); ++i) {
- int32_t value =
- FloatS16ToS16(audio->split_channels_const(kBand0To8kHz)[0][i]);
- for (int j = 1; j < num_channels; ++j) {
- value += FloatS16ToS16(audio->split_channels_const(kBand0To8kHz)[j][i]);
- }
- mixed_low_pass_data[i] = value / num_channels;
- }
- }
-
- int vad_ret = WebRtcVad_Process(vad_->state(), sample_rate_hz_,
- mixed_low_pass.data(), frame_size_samples_);
- if (vad_ret == 0) {
- stream_has_voice_ = false;
- return false;
- } else if (vad_ret == 1) {
- stream_has_voice_ = true;
- } else {
- RTC_NOTREACHED();
- }
-
- return stream_has_voice_;
-}
-
-int VoiceDetectionImpl::Enable(bool enable) {
- rtc::CritScope cs(crit_);
- if (enabled_ != enable) {
- enabled_ = enable;
- Initialize(sample_rate_hz_);
- }
- return AudioProcessing::kNoError;
-}
-
-bool VoiceDetectionImpl::is_enabled() const {
- rtc::CritScope cs(crit_);
- return enabled_;
-}
-
-int VoiceDetectionImpl::set_stream_has_voice(bool has_voice) {
- rtc::CritScope cs(crit_);
- using_external_vad_ = true;
- stream_has_voice_ = has_voice;
- return AudioProcessing::kNoError;
-}
-
-bool VoiceDetectionImpl::stream_has_voice() const {
- rtc::CritScope cs(crit_);
- // TODO(ajm): enable this assertion?
- // RTC_DCHECK(using_external_vad_ || is_component_enabled());
- return stream_has_voice_;
-}
-
-int VoiceDetectionImpl::set_likelihood(VoiceDetection::Likelihood likelihood) {
- rtc::CritScope cs(crit_);
- likelihood_ = likelihood;
- if (enabled_) {
- int mode = 2;
- switch (likelihood) {
- case VoiceDetection::kVeryLowLikelihood:
- mode = 3;
- break;
- case VoiceDetection::kLowLikelihood:
- mode = 2;
- break;
- case VoiceDetection::kModerateLikelihood:
- mode = 1;
- break;
- case VoiceDetection::kHighLikelihood:
- mode = 0;
- break;
- default:
- RTC_NOTREACHED();
- break;
- }
- int error = WebRtcVad_set_mode(vad_->state(), mode);
- RTC_DCHECK_EQ(0, error);
- }
- return AudioProcessing::kNoError;
-}
-
-VoiceDetection::Likelihood VoiceDetectionImpl::likelihood() const {
- rtc::CritScope cs(crit_);
- return likelihood_;
-}
-
-int VoiceDetectionImpl::set_frame_size_ms(int size) {
- rtc::CritScope cs(crit_);
- RTC_DCHECK_EQ(10, size); // TODO(ajm): remove when supported.
- frame_size_ms_ = size;
- Initialize(sample_rate_hz_);
- return AudioProcessing::kNoError;
-}
-
-int VoiceDetectionImpl::frame_size_ms() const {
- rtc::CritScope cs(crit_);
- return frame_size_ms_;
-}
-} // namespace webrtc
diff --git a/modules/audio_processing/voice_detection_impl.h b/modules/audio_processing/voice_detection_impl.h
deleted file mode 100644
index 7ee303f..0000000
--- a/modules/audio_processing/voice_detection_impl.h
+++ /dev/null
@@ -1,69 +0,0 @@
-/*
- * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
- *
- * Use of this source code is governed by a BSD-style license
- * that can be found in the LICENSE file in the root of the source
- * tree. An additional intellectual property rights grant can be found
- * in the file PATENTS. All contributing project authors may
- * be found in the AUTHORS file in the root of the source tree.
- */
-
-#ifndef MODULES_AUDIO_PROCESSING_VOICE_DETECTION_IMPL_H_
-#define MODULES_AUDIO_PROCESSING_VOICE_DETECTION_IMPL_H_
-
-#include <stddef.h>
-
-#include <memory>
-
-#include "modules/audio_processing/include/audio_processing.h"
-#include "rtc_base/constructor_magic.h"
-#include "rtc_base/critical_section.h"
-#include "rtc_base/thread_annotations.h"
-
-namespace webrtc {
-
-class AudioBuffer;
-
-class VoiceDetectionImpl : public VoiceDetection {
- public:
- explicit VoiceDetectionImpl(rtc::CriticalSection* crit);
- ~VoiceDetectionImpl() override;
-
- // TODO(peah): Fold into ctor, once public API is removed.
- void Initialize(int sample_rate_hz);
-
- // Returns the VAD activity.
- bool ProcessCaptureAudio(AudioBuffer* audio);
-
- bool using_external_vad() const {
- rtc::CritScope cs(crit_);
- return using_external_vad_;
- }
-
- // VoiceDetection implementation.
- int Enable(bool enable) override;
- bool is_enabled() const override;
- int set_stream_has_voice(bool has_voice) override;
- bool stream_has_voice() const override;
- int set_likelihood(Likelihood likelihood) override;
- Likelihood likelihood() const override;
- int set_frame_size_ms(int size) override;
- int frame_size_ms() const override;
-
- private:
- class Vad;
-
- rtc::CriticalSection* const crit_;
- bool enabled_ RTC_GUARDED_BY(crit_) = false;
- bool stream_has_voice_ RTC_GUARDED_BY(crit_) = false;
- bool using_external_vad_ RTC_GUARDED_BY(crit_) = false;
- Likelihood likelihood_ RTC_GUARDED_BY(crit_) = kLowLikelihood;
- int frame_size_ms_ RTC_GUARDED_BY(crit_) = 10;
- size_t frame_size_samples_ RTC_GUARDED_BY(crit_) = 0;
- int sample_rate_hz_ RTC_GUARDED_BY(crit_) = 0;
- std::unique_ptr<Vad> vad_ RTC_GUARDED_BY(crit_);
- RTC_DISALLOW_IMPLICIT_CONSTRUCTORS(VoiceDetectionImpl);
-};
-} // namespace webrtc
-
-#endif // MODULES_AUDIO_PROCESSING_VOICE_DETECTION_IMPL_H_
diff --git a/modules/audio_processing/voice_detection_unittest.cc b/modules/audio_processing/voice_detection_unittest.cc
index 52332f2..9a52fa6 100644
--- a/modules/audio_processing/voice_detection_unittest.cc
+++ b/modules/audio_processing/voice_detection_unittest.cc
@@ -13,7 +13,7 @@
#include "modules/audio_processing/audio_buffer.h"
#include "modules/audio_processing/test/audio_buffer_tools.h"
#include "modules/audio_processing/test/bitexactness_tools.h"
-#include "modules/audio_processing/voice_detection_impl.h"
+#include "modules/audio_processing/voice_detection.h"
#include "test/gtest.h"
namespace webrtc {
@@ -22,27 +22,24 @@
const int kNumFramesToProcess = 1000;
// Process one frame of data and produce the output.
-void ProcessOneFrame(int sample_rate_hz,
+bool ProcessOneFrame(int sample_rate_hz,
AudioBuffer* audio_buffer,
- VoiceDetectionImpl* voice_detection) {
+ VoiceDetection* voice_detection) {
if (sample_rate_hz > AudioProcessing::kSampleRate16kHz) {
audio_buffer->SplitIntoFrequencyBands();
}
- voice_detection->ProcessCaptureAudio(audio_buffer);
+ return voice_detection->ProcessCaptureAudio(audio_buffer);
}
// Processes a specified amount of frames, verifies the results and reports
// any errors.
void RunBitexactnessTest(int sample_rate_hz,
size_t num_channels,
- int frame_size_ms_reference,
- bool stream_has_voice_reference,
- VoiceDetection::Likelihood likelihood_reference) {
- rtc::CriticalSection crit_capture;
- VoiceDetectionImpl voice_detection(&crit_capture);
- voice_detection.Initialize(sample_rate_hz > 16000 ? 16000 : sample_rate_hz);
- voice_detection.Enable(true);
+ bool stream_has_voice_reference) {
+ int sample_rate_to_use = std::min(sample_rate_hz, 16000);
+ VoiceDetection voice_detection(sample_rate_to_use,
+ VoiceDetection::kLowLikelihood);
int samples_per_channel = rtc::CheckedDivExact(sample_rate_hz, 100);
const StreamConfig capture_config(sample_rate_hz, num_channels, false);
@@ -53,6 +50,7 @@
test::InputAudioFile capture_file(
test::GetApmCaptureTestVectorFileName(sample_rate_hz));
std::vector<float> capture_input(samples_per_channel * num_channels);
+ bool stream_has_voice = false;
for (int frame_no = 0; frame_no < kNumFramesToProcess; ++frame_no) {
ReadFloatSamplesFromStereoFile(samples_per_channel, num_channels,
&capture_file, capture_input);
@@ -60,64 +58,47 @@
test::CopyVectorToAudioBuffer(capture_config, capture_input,
&capture_buffer);
- ProcessOneFrame(sample_rate_hz, &capture_buffer, &voice_detection);
+ stream_has_voice =
+ ProcessOneFrame(sample_rate_hz, &capture_buffer, &voice_detection);
}
- int frame_size_ms = voice_detection.frame_size_ms();
- bool stream_has_voice = voice_detection.stream_has_voice();
- VoiceDetection::Likelihood likelihood = voice_detection.likelihood();
-
- // Compare the outputs to the references.
- EXPECT_EQ(frame_size_ms_reference, frame_size_ms);
EXPECT_EQ(stream_has_voice_reference, stream_has_voice);
- EXPECT_EQ(likelihood_reference, likelihood);
}
-const int kFrameSizeMsReference = 10;
const bool kStreamHasVoiceReference = true;
-const VoiceDetection::Likelihood kLikelihoodReference =
- VoiceDetection::kLowLikelihood;
} // namespace
TEST(VoiceDetectionBitExactnessTest, Mono8kHz) {
- RunBitexactnessTest(8000, 1, kFrameSizeMsReference, kStreamHasVoiceReference,
- kLikelihoodReference);
+ RunBitexactnessTest(8000, 1, kStreamHasVoiceReference);
}
TEST(VoiceDetectionBitExactnessTest, Mono16kHz) {
- RunBitexactnessTest(16000, 1, kFrameSizeMsReference, kStreamHasVoiceReference,
- kLikelihoodReference);
+ RunBitexactnessTest(16000, 1, kStreamHasVoiceReference);
}
TEST(VoiceDetectionBitExactnessTest, Mono32kHz) {
- RunBitexactnessTest(32000, 1, kFrameSizeMsReference, kStreamHasVoiceReference,
- kLikelihoodReference);
+ RunBitexactnessTest(32000, 1, kStreamHasVoiceReference);
}
TEST(VoiceDetectionBitExactnessTest, Mono48kHz) {
- RunBitexactnessTest(48000, 1, kFrameSizeMsReference, kStreamHasVoiceReference,
- kLikelihoodReference);
+ RunBitexactnessTest(48000, 1, kStreamHasVoiceReference);
}
TEST(VoiceDetectionBitExactnessTest, Stereo8kHz) {
- RunBitexactnessTest(8000, 2, kFrameSizeMsReference, kStreamHasVoiceReference,
- kLikelihoodReference);
+ RunBitexactnessTest(8000, 2, kStreamHasVoiceReference);
}
TEST(VoiceDetectionBitExactnessTest, Stereo16kHz) {
- RunBitexactnessTest(16000, 2, kFrameSizeMsReference, kStreamHasVoiceReference,
- kLikelihoodReference);
+ RunBitexactnessTest(16000, 2, kStreamHasVoiceReference);
}
TEST(VoiceDetectionBitExactnessTest, Stereo32kHz) {
- RunBitexactnessTest(32000, 2, kFrameSizeMsReference, kStreamHasVoiceReference,
- kLikelihoodReference);
+ RunBitexactnessTest(32000, 2, kStreamHasVoiceReference);
}
TEST(VoiceDetectionBitExactnessTest, Stereo48kHz) {
- RunBitexactnessTest(48000, 2, kFrameSizeMsReference, kStreamHasVoiceReference,
- kLikelihoodReference);
+ RunBitexactnessTest(48000, 2, kStreamHasVoiceReference);
}
} // namespace webrtc
diff --git a/resources/audio_processing/output_data_fixed.pb.sha1 b/resources/audio_processing/output_data_fixed.pb.sha1
index 0eb2da5..ea3d979 100644
--- a/resources/audio_processing/output_data_fixed.pb.sha1
+++ b/resources/audio_processing/output_data_fixed.pb.sha1
@@ -1 +1 @@
-e540fa8940b41d0cda26cdef937be3a455a04be7
\ No newline at end of file
+e9569d846d21e027bfdcae76a40146bc10d49d54
\ No newline at end of file
diff --git a/resources/audio_processing/output_data_float.pb.sha1 b/resources/audio_processing/output_data_float.pb.sha1
index 624d609..b5d123d 100644
--- a/resources/audio_processing/output_data_float.pb.sha1
+++ b/resources/audio_processing/output_data_float.pb.sha1
@@ -1 +1 @@
-2811f534082857ac9b9447a3e53028ef11851052
\ No newline at end of file
+53dd63154cc2694a3425596d9a8300fa2c66215d
\ No newline at end of file
diff --git a/resources/audio_processing/output_data_mac.pb.sha1 b/resources/audio_processing/output_data_mac.pb.sha1
index 0ebfdd2..26e1a52 100644
--- a/resources/audio_processing/output_data_mac.pb.sha1
+++ b/resources/audio_processing/output_data_mac.pb.sha1
@@ -1 +1 @@
-cc82c345f1e7ef17b12c2da41a0a9f73b09ca8f6
\ No newline at end of file
+2b31852bbce2b0b19ee36c47b18352e035cb08c5
\ No newline at end of file
diff --git a/test/fuzzers/audio_processing_configs_fuzzer.cc b/test/fuzzers/audio_processing_configs_fuzzer.cc
index 87d017c..0dee80e 100644
--- a/test/fuzzers/audio_processing_configs_fuzzer.cc
+++ b/test/fuzzers/audio_processing_configs_fuzzer.cc
@@ -145,7 +145,6 @@
apm->ApplyConfig(apm_config);
apm->level_estimator()->Enable(use_le);
- apm->voice_detection()->Enable(use_vad);
return apm;
}