Stop using the beamformer inside APM
Removes the usage of an injected/enabled beamformer in APM, and marks
the API parts as deprecated.
Initialization and process calls are removed, and all enabled/disabled
flags are replaced by assuming no beamforming. Additionally, an AGC test
relying on the beamformer as a VAD is removed.
Bug: webrtc:9402
Change-Id: I0d3d0b9773da083ce43c28045db9a77278f59f95
Reviewed-on: https://webrtc-review.googlesource.com/83341
Reviewed-by: Minyue Li <minyue@webrtc.org>
Commit-Queue: Sam Zackrisson <saza@webrtc.org>
Cr-Commit-Position: refs/heads/master@{#23643}
diff --git a/modules/audio_processing/audio_processing_impl.cc b/modules/audio_processing/audio_processing_impl.cc
index 0443c11..71fefe4 100644
--- a/modules/audio_processing/audio_processing_impl.cc
+++ b/modules/audio_processing/audio_processing_impl.cc
@@ -166,7 +166,6 @@
bool residual_echo_detector_enabled,
bool noise_suppressor_enabled,
bool intelligibility_enhancer_enabled,
- bool beamformer_enabled,
bool adaptive_gain_controller_enabled,
bool gain_controller2_enabled,
bool pre_amplifier_enabled,
@@ -184,7 +183,6 @@
changed |= (noise_suppressor_enabled != noise_suppressor_enabled_);
changed |=
(intelligibility_enhancer_enabled != intelligibility_enhancer_enabled_);
- changed |= (beamformer_enabled != beamformer_enabled_);
changed |=
(adaptive_gain_controller_enabled != adaptive_gain_controller_enabled_);
changed |=
@@ -202,7 +200,6 @@
residual_echo_detector_enabled_ = residual_echo_detector_enabled;
noise_suppressor_enabled_ = noise_suppressor_enabled;
intelligibility_enhancer_enabled_ = intelligibility_enhancer_enabled;
- beamformer_enabled_ = beamformer_enabled;
adaptive_gain_controller_enabled_ = adaptive_gain_controller_enabled;
gain_controller2_enabled_ = gain_controller2_enabled;
pre_amplifier_enabled_ = pre_amplifier_enabled;
@@ -231,8 +228,7 @@
const {
return low_cut_filter_enabled_ || echo_canceller_enabled_ ||
mobile_echo_controller_enabled_ || noise_suppressor_enabled_ ||
- beamformer_enabled_ || adaptive_gain_controller_enabled_ ||
- echo_controller_enabled_;
+ adaptive_gain_controller_enabled_ || echo_controller_enabled_;
}
bool AudioProcessingImpl::ApmSubmoduleStates::CaptureFullBandProcessingActive()
@@ -388,14 +384,11 @@
config.Get<ExperimentalAgc>().enabled),
#endif
#if defined(WEBRTC_ANDROID) || defined(WEBRTC_IOS)
- capture_(false,
+ capture_(false),
#else
- capture_(config.Get<ExperimentalNs>().enabled,
+ capture_(config.Get<ExperimentalNs>().enabled),
#endif
- config.Get<Beamforming>().array_geometry,
- config.Get<Beamforming>().target_direction),
- capture_nonlocked_(config.Get<Beamforming>().enabled,
- config.Get<Intelligibility>().enabled) {
+ capture_nonlocked_(config.Get<Intelligibility>().enabled) {
{
rtc::CritScope cs_render(&crit_render_);
rtc::CritScope cs_capture(&crit_capture_);
@@ -509,11 +502,6 @@
int AudioProcessingImpl::InitializeLocked() {
UpdateActiveSubmoduleStates();
- const int capture_audiobuffer_num_channels =
- capture_nonlocked_.beamformer_enabled
- ? formats_.api_format.input_stream().num_channels()
- : formats_.api_format.output_stream().num_channels();
-
const int render_audiobuffer_num_output_frames =
formats_.api_format.reverse_output_stream().num_frames() == 0
? formats_.render_processing_format.num_frames()
@@ -544,7 +532,7 @@
new AudioBuffer(formats_.api_format.input_stream().num_frames(),
formats_.api_format.input_stream().num_channels(),
capture_nonlocked_.capture_processing_format.num_frames(),
- capture_audiobuffer_num_channels,
+ formats_.api_format.output_stream().num_channels(),
formats_.api_format.output_stream().num_frames()));
public_submodules_->echo_cancellation->Initialize(
@@ -575,7 +563,6 @@
public_submodules_->gain_control_for_experimental_agc->Initialize();
}
InitializeTransient();
- InitializeBeamformer();
#if WEBRTC_INTELLIGIBILITY_ENHANCER
InitializeIntelligibility();
#endif
@@ -615,11 +602,6 @@
return kBadNumberChannelsError;
}
- if (capture_nonlocked_.beamformer_enabled &&
- num_in_channels != capture_.array_geometry.size()) {
- return kBadNumberChannelsError;
- }
-
formats_.api_format = config;
int capture_processing_rate = FindNativeProcessRateToUse(
@@ -735,18 +717,6 @@
InitializeIntelligibility();
}
#endif
-
-#ifdef WEBRTC_ANDROID_PLATFORM_BUILD
- if (capture_nonlocked_.beamformer_enabled !=
- config.Get<Beamforming>().enabled) {
- capture_nonlocked_.beamformer_enabled = config.Get<Beamforming>().enabled;
- if (config.Get<Beamforming>().array_geometry.size() > 1) {
- capture_.array_geometry = config.Get<Beamforming>().array_geometry;
- }
- capture_.target_direction = config.Get<Beamforming>().target_direction;
- InitializeBeamformer();
- }
-#endif // WEBRTC_ANDROID_PLATFORM_BUILD
}
int AudioProcessingImpl::proc_sample_rate_hz() const {
@@ -771,10 +741,7 @@
size_t AudioProcessingImpl::num_proc_channels() const {
// Used as callback from submodules, hence locking is not allowed.
- return (capture_nonlocked_.beamformer_enabled ||
- capture_nonlocked_.echo_controller_enabled)
- ? 1
- : num_output_channels();
+ return capture_nonlocked_.echo_controller_enabled ? 1 : num_output_channels();
}
size_t AudioProcessingImpl::num_output_channels() const {
@@ -1265,13 +1232,6 @@
capture_buffer->set_num_channels(1);
}
- if (capture_nonlocked_.beamformer_enabled) {
- private_submodules_->beamformer->AnalyzeChunk(
- *capture_buffer->split_data_f());
- // Discards all channels by the leftmost one.
- capture_buffer->set_num_channels(1);
- }
-
// TODO(peah): Move the AEC3 low-cut filter to this place.
if (private_submodules_->low_cut_filter &&
!private_submodules_->echo_controller) {
@@ -1334,16 +1294,10 @@
capture_buffer, stream_delay_ms()));
}
- if (capture_nonlocked_.beamformer_enabled) {
- private_submodules_->beamformer->PostFilter(capture_buffer->split_data_f());
- }
-
public_submodules_->voice_detection->ProcessCaptureAudio(capture_buffer);
if (constants_.use_experimental_agc &&
- public_submodules_->gain_control->is_enabled() &&
- (!capture_nonlocked_.beamformer_enabled ||
- private_submodules_->beamformer->is_target_present())) {
+ public_submodules_->gain_control->is_enabled()) {
private_submodules_->agc_manager->Process(
capture_buffer->split_bands_const(0)[kBand0To8kHz],
capture_buffer->num_frames_per_band(), capture_nonlocked_.split_rate);
@@ -1811,7 +1765,6 @@
config_.residual_echo_detector.enabled,
public_submodules_->noise_suppression->is_enabled(),
capture_nonlocked_.intelligibility_enabled,
- capture_nonlocked_.beamformer_enabled,
public_submodules_->gain_control->is_enabled(),
config_.gain_controller2.enabled, config_.pre_amplifier.enabled,
capture_nonlocked_.echo_controller_enabled,
@@ -1832,17 +1785,6 @@
}
}
-void AudioProcessingImpl::InitializeBeamformer() {
- if (capture_nonlocked_.beamformer_enabled) {
- if (!private_submodules_->beamformer) {
- private_submodules_->beamformer.reset(new NonlinearBeamformer(
- capture_.array_geometry, 1u, capture_.target_direction));
- }
- private_submodules_->beamformer->Initialize(kChunkSizeMs,
- capture_nonlocked_.split_rate);
- }
-}
-
void AudioProcessingImpl::InitializeIntelligibility() {
#if WEBRTC_INTELLIGIBILITY_ENHANCER
if (capture_nonlocked_.intelligibility_enabled) {
@@ -2102,9 +2044,7 @@
}
AudioProcessingImpl::ApmCaptureState::ApmCaptureState(
- bool transient_suppressor_enabled,
- const std::vector<Point>& array_geometry,
- SphericalPointf target_direction)
+ bool transient_suppressor_enabled)
: aec_system_delay_jumps(-1),
delay_offset_ms(0),
was_stream_delay_set(false),
@@ -2114,8 +2054,6 @@
output_will_be_muted(false),
key_pressed(false),
transient_suppressor_enabled(transient_suppressor_enabled),
- array_geometry(array_geometry),
- target_direction(target_direction),
capture_processing_format(kSampleRate16kHz),
split_rate(kSampleRate16kHz),
echo_path_gain_change(false) {}
diff --git a/modules/audio_processing/audio_processing_impl.h b/modules/audio_processing/audio_processing_impl.h
index 7974982..ff6448f 100644
--- a/modules/audio_processing/audio_processing_impl.h
+++ b/modules/audio_processing/audio_processing_impl.h
@@ -185,7 +185,6 @@
bool residual_echo_detector_enabled,
bool noise_suppressor_enabled,
bool intelligibility_enhancer_enabled,
- bool beamformer_enabled,
bool adaptive_gain_controller_enabled,
bool gain_controller2_enabled,
bool pre_amplifier_enabled,
@@ -209,7 +208,6 @@
bool residual_echo_detector_enabled_ = false;
bool noise_suppressor_enabled_ = false;
bool intelligibility_enhancer_enabled_ = false;
- bool beamformer_enabled_ = false;
bool adaptive_gain_controller_enabled_ = false;
bool gain_controller2_enabled_ = false;
bool pre_amplifier_enabled_ = false;
@@ -370,9 +368,7 @@
} constants_;
struct ApmCaptureState {
- ApmCaptureState(bool transient_suppressor_enabled,
- const std::vector<Point>& array_geometry,
- SphericalPointf target_direction);
+ ApmCaptureState(bool transient_suppressor_enabled);
~ApmCaptureState();
int aec_system_delay_jumps;
int delay_offset_ms;
@@ -383,8 +379,6 @@
bool output_will_be_muted;
bool key_pressed;
bool transient_suppressor_enabled;
- std::vector<Point> array_geometry;
- SphericalPointf target_direction;
std::unique_ptr<AudioBuffer> capture_audio;
// Only the rate and samples fields of capture_processing_format_ are used
// because the capture processing number of channels is mutable and is
@@ -395,12 +389,10 @@
} capture_ RTC_GUARDED_BY(crit_capture_);
struct ApmCaptureNonLockedState {
- ApmCaptureNonLockedState(bool beamformer_enabled,
- bool intelligibility_enabled)
+ ApmCaptureNonLockedState(bool intelligibility_enabled)
: capture_processing_format(kSampleRate16kHz),
split_rate(kSampleRate16kHz),
stream_delay_ms(0),
- beamformer_enabled(beamformer_enabled),
intelligibility_enabled(intelligibility_enabled) {}
// Only the rate and samples fields of capture_processing_format_ are used
// because the forward processing number of channels is mutable and is
@@ -408,7 +400,6 @@
StreamConfig capture_processing_format;
int split_rate;
int stream_delay_ms;
- bool beamformer_enabled;
bool intelligibility_enabled;
bool echo_controller_enabled = false;
} capture_nonlocked_;
diff --git a/modules/audio_processing/audio_processing_unittest.cc b/modules/audio_processing/audio_processing_unittest.cc
index efbe3c8..0954190 100644
--- a/modules/audio_processing/audio_processing_unittest.cc
+++ b/modules/audio_processing/audio_processing_unittest.cc
@@ -1300,95 +1300,6 @@
}
}
-#if !defined(WEBRTC_ANDROID) && !defined(WEBRTC_IOS)
-TEST_F(ApmTest, AgcOnlyAdaptsWhenTargetSignalIsPresent) {
- const int kSampleRateHz = 16000;
- const size_t kSamplesPerChannel =
- static_cast<size_t>(AudioProcessing::kChunkSizeMs * kSampleRateHz / 1000);
- const size_t kNumInputChannels = 2;
- const size_t kNumOutputChannels = 1;
- const size_t kNumChunks = 700;
- const float kScaleFactor = 0.25f;
- Config config;
- std::vector<webrtc::Point> geometry;
- geometry.push_back(webrtc::Point(0.f, 0.f, 0.f));
- geometry.push_back(webrtc::Point(0.05f, 0.f, 0.f));
- config.Set<Beamforming>(new Beamforming(true, geometry));
- testing::NiceMock<MockNonlinearBeamformer>* beamformer =
- new testing::NiceMock<MockNonlinearBeamformer>(geometry, 1u);
- std::unique_ptr<AudioProcessing> apm(
- AudioProcessingBuilder()
- .SetNonlinearBeamformer(
- std::unique_ptr<webrtc::NonlinearBeamformer>(beamformer))
- .Create(config));
- EXPECT_EQ(kNoErr, apm->gain_control()->Enable(true));
- ChannelBuffer<float> src_buf(kSamplesPerChannel, kNumInputChannels);
- ChannelBuffer<float> dest_buf(kSamplesPerChannel, kNumOutputChannels);
- const size_t max_length = kSamplesPerChannel * std::max(kNumInputChannels,
- kNumOutputChannels);
- std::unique_ptr<int16_t[]> int_data(new int16_t[max_length]);
- std::unique_ptr<float[]> float_data(new float[max_length]);
- std::string filename = ResourceFilePath("far", kSampleRateHz);
- FILE* far_file = fopen(filename.c_str(), "rb");
- ASSERT_TRUE(far_file != NULL) << "Could not open file " << filename << "\n";
- const int kDefaultVolume = apm->gain_control()->stream_analog_level();
- const int kDefaultCompressionGain =
- apm->gain_control()->compression_gain_db();
- bool is_target = false;
- EXPECT_CALL(*beamformer, is_target_present())
- .WillRepeatedly(testing::ReturnPointee(&is_target));
- for (size_t i = 0; i < kNumChunks; ++i) {
- ASSERT_TRUE(ReadChunk(far_file,
- int_data.get(),
- float_data.get(),
- &src_buf));
- for (size_t j = 0; j < kNumInputChannels; ++j) {
- for (size_t k = 0; k < kSamplesPerChannel; ++k) {
- src_buf.channels()[j][k] *= kScaleFactor;
- }
- }
- EXPECT_EQ(kNoErr,
- apm->ProcessStream(src_buf.channels(),
- src_buf.num_frames(),
- kSampleRateHz,
- LayoutFromChannels(src_buf.num_channels()),
- kSampleRateHz,
- LayoutFromChannels(dest_buf.num_channels()),
- dest_buf.channels()));
- }
- EXPECT_EQ(kDefaultVolume,
- apm->gain_control()->stream_analog_level());
- EXPECT_EQ(kDefaultCompressionGain,
- apm->gain_control()->compression_gain_db());
- rewind(far_file);
- is_target = true;
- for (size_t i = 0; i < kNumChunks; ++i) {
- ASSERT_TRUE(ReadChunk(far_file,
- int_data.get(),
- float_data.get(),
- &src_buf));
- for (size_t j = 0; j < kNumInputChannels; ++j) {
- for (size_t k = 0; k < kSamplesPerChannel; ++k) {
- src_buf.channels()[j][k] *= kScaleFactor;
- }
- }
- EXPECT_EQ(kNoErr,
- apm->ProcessStream(src_buf.channels(),
- src_buf.num_frames(),
- kSampleRateHz,
- LayoutFromChannels(src_buf.num_channels()),
- kSampleRateHz,
- LayoutFromChannels(dest_buf.num_channels()),
- dest_buf.channels()));
- }
- EXPECT_LT(kDefaultVolume,
- apm->gain_control()->stream_analog_level());
- EXPECT_LT(kDefaultCompressionGain,
- apm->gain_control()->compression_gain_db());
- ASSERT_EQ(0, fclose(far_file));
-}
-#endif
-
TEST_F(ApmTest, NoiseSuppression) {
// Test valid suppression levels.
NoiseSuppression::Level level[] = {
diff --git a/modules/audio_processing/include/audio_processing.h b/modules/audio_processing/include/audio_processing.h
index ee419df..e4fb9b2 100644
--- a/modules/audio_processing/include/audio_processing.h
+++ b/modules/audio_processing/include/audio_processing.h
@@ -674,6 +674,7 @@
AudioProcessingBuilder& SetRenderPreProcessing(
std::unique_ptr<CustomProcessing> render_pre_processing);
// The AudioProcessingBuilder takes ownership of the nonlinear beamformer.
+ RTC_DEPRECATED
AudioProcessingBuilder& SetNonlinearBeamformer(
std::unique_ptr<NonlinearBeamformer> nonlinear_beamformer);
// The AudioProcessingBuilder takes ownership of the echo_detector.
diff --git a/modules/audio_processing/include/config.h b/modules/audio_processing/include/config.h
index 338fcea..4e318c9 100644
--- a/modules/audio_processing/include/config.h
+++ b/modules/audio_processing/include/config.h
@@ -30,7 +30,7 @@
kDelayAgnostic,
kExperimentalAgc,
kExperimentalNs,
- kBeamforming,
+ kBeamforming, // Deprecated
kIntelligibility,
kEchoCanceller3, // Deprecated
kAecRefinedAdaptiveFilter,