Removing the intelligibility enhancer.
The intelligibility enhancer is always disabled and it is the only non-test
target using the lapped transform in common_audio (which we planned to remove).
Bug: webrtc:9689, webrtc:5298
Change-Id: Ida65d3aa11ac366471e7e5cbc053108b376c67d8
Reviewed-on: https://webrtc-review.googlesource.com/96460
Commit-Queue: Alessio Bazzica <alessiob@webrtc.org>
Reviewed-by: Alex Loiko <aleloi@webrtc.org>
Reviewed-by: Fredrik Solenberg <solenberg@webrtc.org>
Reviewed-by: Mirko Bonadei <mbonadei@webrtc.org>
Reviewed-by: Karl Wiberg <kwiberg@webrtc.org>
Reviewed-by: Per Ã…hgren <peah@webrtc.org>
Cr-Commit-Position: refs/heads/master@{#24504}
diff --git a/api/audio_options.h b/api/audio_options.h
index df66d36..aefc7a1 100644
--- a/api/audio_options.h
+++ b/api/audio_options.h
@@ -44,7 +44,6 @@
SetFrom(&extended_filter_aec, change.extended_filter_aec);
SetFrom(&delay_agnostic_aec, change.delay_agnostic_aec);
SetFrom(&experimental_ns, change.experimental_ns);
- SetFrom(&intelligibility_enhancer, change.intelligibility_enhancer);
SetFrom(&residual_echo_detector, change.residual_echo_detector);
SetFrom(&tx_agc_target_dbov, change.tx_agc_target_dbov);
SetFrom(&tx_agc_digital_compression_gain,
@@ -74,7 +73,6 @@
extended_filter_aec == o.extended_filter_aec &&
delay_agnostic_aec == o.delay_agnostic_aec &&
experimental_ns == o.experimental_ns &&
- intelligibility_enhancer == o.intelligibility_enhancer &&
residual_echo_detector == o.residual_echo_detector &&
tx_agc_target_dbov == o.tx_agc_target_dbov &&
tx_agc_digital_compression_gain ==
@@ -108,7 +106,6 @@
ost << ToStringIfSet("extended_filter_aec", extended_filter_aec);
ost << ToStringIfSet("delay_agnostic_aec", delay_agnostic_aec);
ost << ToStringIfSet("experimental_ns", experimental_ns);
- ost << ToStringIfSet("intelligibility_enhancer", intelligibility_enhancer);
ost << ToStringIfSet("residual_echo_detector", residual_echo_detector);
ost << ToStringIfSet("tx_agc_target_dbov", tx_agc_target_dbov);
ost << ToStringIfSet("tx_agc_digital_compression_gain",
@@ -153,7 +150,6 @@
absl::optional<bool> extended_filter_aec;
absl::optional<bool> delay_agnostic_aec;
absl::optional<bool> experimental_ns;
- absl::optional<bool> intelligibility_enhancer;
// Note that tx_agc_* only applies to non-experimental AGC.
absl::optional<bool> residual_echo_detector;
absl::optional<uint16_t> tx_agc_target_dbov;
diff --git a/api/mediaconstraintsinterface.cc b/api/mediaconstraintsinterface.cc
index 80c447d..5567786 100644
--- a/api/mediaconstraintsinterface.cc
+++ b/api/mediaconstraintsinterface.cc
@@ -104,8 +104,6 @@
"googNoiseSuppression";
const char MediaConstraintsInterface::kExperimentalNoiseSuppression[] =
"googNoiseSuppression2";
-const char MediaConstraintsInterface::kIntelligibilityEnhancer[] =
- "intelligibilityEnhancer";
const char MediaConstraintsInterface::kHighpassFilter[] = "googHighpassFilter";
const char MediaConstraintsInterface::kTypingNoiseDetection[] =
"googTypingNoiseDetection";
@@ -241,9 +239,6 @@
ConstraintToOptional<bool>(
constraints, MediaConstraintsInterface::kExperimentalNoiseSuppression,
&options->experimental_ns);
- ConstraintToOptional<bool>(
- constraints, MediaConstraintsInterface::kIntelligibilityEnhancer,
- &options->intelligibility_enhancer);
ConstraintToOptional<bool>(constraints,
MediaConstraintsInterface::kHighpassFilter,
&options->highpass_filter);
diff --git a/api/mediaconstraintsinterface.h b/api/mediaconstraintsinterface.h
index 6128e6a..c6a914a 100644
--- a/api/mediaconstraintsinterface.h
+++ b/api/mediaconstraintsinterface.h
@@ -73,7 +73,6 @@
static const char kExperimentalAutoGainControl[]; // googAutoGainControl2
static const char kNoiseSuppression[]; // googNoiseSuppression
static const char kExperimentalNoiseSuppression[]; // googNoiseSuppression2
- static const char kIntelligibilityEnhancer[]; // intelligibilityEnhancer
static const char kHighpassFilter[]; // googHighpassFilter
static const char kTypingNoiseDetection[]; // googTypingNoiseDetection
static const char kAudioMirroring[]; // googAudioMirroring
diff --git a/media/BUILD.gn b/media/BUILD.gn
index 94c85e9..241856f 100644
--- a/media/BUILD.gn
+++ b/media/BUILD.gn
@@ -273,12 +273,6 @@
suppressed_configs += [ "//build/config/clang:find_bad_constructs" ]
}
- if (rtc_enable_intelligibility_enhancer) {
- defines += [ "WEBRTC_INTELLIGIBILITY_ENHANCER=1" ]
- } else {
- defines += [ "WEBRTC_INTELLIGIBILITY_ENHANCER=0" ]
- }
-
if (rtc_opus_support_120ms_ptime) {
defines += [ "WEBRTC_OPUS_SUPPORT_120MS_PTIME=1" ]
} else {
diff --git a/media/engine/webrtcvoiceengine.cc b/media/engine/webrtcvoiceengine.cc
index f8aa593..b981a20 100644
--- a/media/engine/webrtcvoiceengine.cc
+++ b/media/engine/webrtcvoiceengine.cc
@@ -53,14 +53,6 @@
constexpr int kNackRtpHistoryMs = 5000;
-// Check to verify that the define for the intelligibility enhancer is properly
-// set.
-#if !defined(WEBRTC_INTELLIGIBILITY_ENHANCER) || \
- (WEBRTC_INTELLIGIBILITY_ENHANCER != 0 && \
- WEBRTC_INTELLIGIBILITY_ENHANCER != 1)
-#error "Set WEBRTC_INTELLIGIBILITY_ENHANCER to either 0 or 1"
-#endif
-
// For SendSideBwe, Opus bitrate should be in the range between 6000 and 32000.
const int kOpusMinBitrateBps = 6000;
const int kOpusBitrateFbBps = 32000;
@@ -296,7 +288,6 @@
options.extended_filter_aec = false;
options.delay_agnostic_aec = false;
options.experimental_ns = false;
- options.intelligibility_enhancer = false;
options.residual_echo_detector = true;
bool error = ApplyOptions(options);
RTC_DCHECK(error);
@@ -410,11 +401,6 @@
}
#endif
-#if (WEBRTC_INTELLIGIBILITY_ENHANCER == 0)
- // Hardcode the intelligibility enhancer to be off.
- options.intelligibility_enhancer = false;
-#endif
-
if (options.echo_cancellation) {
// Check if platform supports built-in EC. Currently only supported on
// Android and in combination with Java based audio layer.
@@ -479,19 +465,9 @@
webrtc::apm_helpers::SetAgcConfig(apm(), default_agc_config_);
}
- if (options.intelligibility_enhancer) {
- intelligibility_enhancer_ = options.intelligibility_enhancer;
- }
- if (intelligibility_enhancer_ && *intelligibility_enhancer_) {
- RTC_LOG(LS_INFO) << "Enabling NS when Intelligibility Enhancer is active.";
- options.noise_suppression = intelligibility_enhancer_;
- }
-
if (options.noise_suppression) {
if (adm()->BuiltInNSIsAvailable()) {
- bool builtin_ns =
- *options.noise_suppression &&
- !(intelligibility_enhancer_ && *intelligibility_enhancer_);
+ bool builtin_ns = *options.noise_suppression;
if (adm()->EnableBuiltInNS(builtin_ns) == 0 && builtin_ns) {
// Disable internal software NS if built-in NS is enabled,
// i.e., replace the software NS with the built-in NS.
@@ -558,13 +534,6 @@
new webrtc::ExperimentalNs(*experimental_ns_));
}
- if (intelligibility_enhancer_) {
- RTC_LOG(LS_INFO) << "Intelligibility Enhancer is enabled? "
- << *intelligibility_enhancer_;
- config.Set<webrtc::Intelligibility>(
- new webrtc::Intelligibility(*intelligibility_enhancer_));
- }
-
webrtc::AudioProcessing::Config apm_config = apm()->GetConfig();
if (options.highpass_filter) {
diff --git a/media/engine/webrtcvoiceengine.h b/media/engine/webrtcvoiceengine.h
index cd0c55c..91e40c8 100644
--- a/media/engine/webrtcvoiceengine.h
+++ b/media/engine/webrtcvoiceengine.h
@@ -120,15 +120,13 @@
bool initialized_ = false;
webrtc::AgcConfig default_agc_config_;
- // Cache received extended_filter_aec, delay_agnostic_aec, experimental_ns
- // and intelligibility_enhancer values, and apply them
- // in case they are missing in the audio options. We need to do this because
- // SetExtraOptions() will revert to defaults for options which are not
- // provided.
+ // Cache received extended_filter_aec, delay_agnostic_aec and experimental_ns
+ // values, and apply them in case they are missing in the audio options.
+ // We need to do this because SetExtraOptions() will revert to defaults for
+ // options which are not provided.
absl::optional<bool> extended_filter_aec_;
absl::optional<bool> delay_agnostic_aec_;
absl::optional<bool> experimental_ns_;
- absl::optional<bool> intelligibility_enhancer_;
// Jitter buffer settings for new streams.
size_t audio_jitter_buffer_max_packets_ = 50;
bool audio_jitter_buffer_fast_accelerate_ = false;
diff --git a/modules/audio_processing/BUILD.gn b/modules/audio_processing/BUILD.gn
index 9e71446..052bb47 100644
--- a/modules/audio_processing/BUILD.gn
+++ b/modules/audio_processing/BUILD.gn
@@ -138,18 +138,6 @@
defines += [ "WEBRTC_UNTRUSTED_DELAY" ]
}
- if (rtc_enable_intelligibility_enhancer) {
- defines += [ "WEBRTC_INTELLIGIBILITY_ENHANCER=1" ]
- sources += [
- "intelligibility/intelligibility_enhancer.cc",
- "intelligibility/intelligibility_enhancer.h",
- "intelligibility/intelligibility_utils.cc",
- "intelligibility/intelligibility_utils.h",
- ]
- } else {
- defines += [ "WEBRTC_INTELLIGIBILITY_ENHANCER=0" ]
- }
-
if (rtc_prefer_fixed_point) {
defines += [ "WEBRTC_NS_FIXED" ]
} else {
@@ -337,10 +325,6 @@
":transient_suppression_test",
]
- if (rtc_enable_intelligibility_enhancer) {
- deps += [ ":intelligibility_proc" ]
- }
-
if (rtc_enable_protobuf) {
deps += [
":audioproc_f",
@@ -422,16 +406,6 @@
defines = []
- if (rtc_enable_intelligibility_enhancer) {
- defines += [ "WEBRTC_INTELLIGIBILITY_ENHANCER=1" ]
- sources += [
- "intelligibility/intelligibility_enhancer_unittest.cc",
- "intelligibility/intelligibility_utils_unittest.cc",
- ]
- } else {
- defines += [ "WEBRTC_INTELLIGIBILITY_ENHANCER=0" ]
- }
-
if (rtc_prefer_fixed_point) {
defines += [ "WEBRTC_AUDIOPROC_FIXED_PROFILE" ]
} else {
@@ -500,12 +474,6 @@
"../../test:perf_test",
"../../test:test_support",
]
-
- if (rtc_enable_intelligibility_enhancer) {
- defines = [ "WEBRTC_INTELLIGIBILITY_ENHANCER=1" ]
- } else {
- defines = [ "WEBRTC_INTELLIGIBILITY_ENHANCER=0" ]
- }
}
rtc_source_set("file_audio_generator_unittests") {
@@ -666,24 +634,6 @@
]
}
- if (rtc_enable_intelligibility_enhancer) {
- rtc_executable("intelligibility_proc") {
- testonly = true
- sources = [
- "intelligibility/test/intelligibility_proc.cc",
- ]
- deps = [
- ":audio_processing",
- ":audioproc_test_utils",
- "../../common_audio",
- "../../rtc_base:rtc_base_approved",
- "../../system_wrappers:metrics_default",
- "../../test:test_support",
- "//testing/gtest",
- ]
- }
- }
-
if (rtc_enable_protobuf) {
proto_library("audioproc_unittest_proto") {
sources = [
diff --git a/modules/audio_processing/aec_dump/aec_dump_impl.cc b/modules/audio_processing/aec_dump/aec_dump_impl.cc
index d94822d..9e07367 100644
--- a/modules/audio_processing/aec_dump/aec_dump_impl.cc
+++ b/modules/audio_processing/aec_dump/aec_dump_impl.cc
@@ -45,8 +45,6 @@
pb_cfg->set_transient_suppression_enabled(
config.transient_suppression_enabled);
- pb_cfg->set_intelligibility_enhancer_enabled(
- config.intelligibility_enhancer_enabled);
pb_cfg->set_pre_amplifier_enabled(config.pre_amplifier_enabled);
pb_cfg->set_pre_amplifier_fixed_gain_factor(
diff --git a/modules/audio_processing/audio_processing_impl.cc b/modules/audio_processing/audio_processing_impl.cc
index 4a1a86c..8848b73 100644
--- a/modules/audio_processing/audio_processing_impl.cc
+++ b/modules/audio_processing/audio_processing_impl.cc
@@ -38,9 +38,6 @@
#include "rtc_base/system/arch.h"
#include "rtc_base/timeutils.h"
#include "rtc_base/trace_event.h"
-#if WEBRTC_INTELLIGIBILITY_ENHANCER
-#include "modules/audio_processing/intelligibility/intelligibility_enhancer.h"
-#endif
#include "modules/audio_processing/level_estimator_impl.h"
#include "modules/audio_processing/low_cut_filter.h"
#include "modules/audio_processing/noise_suppression_impl.h"
@@ -50,14 +47,6 @@
#include "rtc_base/atomicops.h"
#include "system_wrappers/include/metrics.h"
-// Check to verify that the define for the intelligibility enhancer is properly
-// set.
-#if !defined(WEBRTC_INTELLIGIBILITY_ENHANCER) || \
- (WEBRTC_INTELLIGIBILITY_ENHANCER != 0 && \
- WEBRTC_INTELLIGIBILITY_ENHANCER != 1)
-#error "Set WEBRTC_INTELLIGIBILITY_ENHANCER to either 0 or 1"
-#endif
-
#define RETURN_ON_ERR(expr) \
do { \
int err = (expr); \
@@ -170,7 +159,6 @@
bool mobile_echo_controller_enabled,
bool residual_echo_detector_enabled,
bool noise_suppressor_enabled,
- bool intelligibility_enhancer_enabled,
bool adaptive_gain_controller_enabled,
bool gain_controller2_enabled,
bool pre_amplifier_enabled,
@@ -187,8 +175,6 @@
(residual_echo_detector_enabled != residual_echo_detector_enabled_);
changed |= (noise_suppressor_enabled != noise_suppressor_enabled_);
changed |=
- (intelligibility_enhancer_enabled != intelligibility_enhancer_enabled_);
- changed |=
(adaptive_gain_controller_enabled != adaptive_gain_controller_enabled_);
changed |=
(gain_controller2_enabled != gain_controller2_enabled_);
@@ -204,7 +190,6 @@
mobile_echo_controller_enabled_ = mobile_echo_controller_enabled;
residual_echo_detector_enabled_ = residual_echo_detector_enabled;
noise_suppressor_enabled_ = noise_suppressor_enabled;
- intelligibility_enhancer_enabled_ = intelligibility_enhancer_enabled;
adaptive_gain_controller_enabled_ = adaptive_gain_controller_enabled;
gain_controller2_enabled_ = gain_controller2_enabled;
pre_amplifier_enabled_ = pre_amplifier_enabled;
@@ -221,12 +206,7 @@
bool AudioProcessingImpl::ApmSubmoduleStates::CaptureMultiBandSubModulesActive()
const {
-#if WEBRTC_INTELLIGIBILITY_ENHANCER
- return CaptureMultiBandProcessingActive() ||
- intelligibility_enhancer_enabled_ || voice_activity_detector_enabled_;
-#else
return CaptureMultiBandProcessingActive() || voice_activity_detector_enabled_;
-#endif
}
bool AudioProcessingImpl::ApmSubmoduleStates::CaptureMultiBandProcessingActive()
@@ -260,11 +240,7 @@
bool AudioProcessingImpl::ApmSubmoduleStates::RenderMultiBandProcessingActive()
const {
-#if WEBRTC_INTELLIGIBILITY_ENHANCER
- return intelligibility_enhancer_enabled_;
-#else
return false;
-#endif
}
struct AudioProcessingImpl::ApmPublicSubmodules {
@@ -283,9 +259,6 @@
// Accessed internally from both render and capture.
std::unique_ptr<TransientSuppressor> transient_suppressor;
-#if WEBRTC_INTELLIGIBILITY_ENHANCER
- std::unique_ptr<IntelligibilityEnhancer> intelligibility_enhancer;
-#endif
};
struct AudioProcessingImpl::ApmPrivateSubmodules {
@@ -405,7 +378,7 @@
#else
capture_(config.Get<ExperimentalNs>().enabled),
#endif
- capture_nonlocked_(config.Get<Intelligibility>().enabled) {
+ capture_nonlocked_() {
{
rtc::CritScope cs_render(&crit_render_);
rtc::CritScope cs_capture(&crit_capture_);
@@ -589,9 +562,6 @@
public_submodules_->gain_control_for_experimental_agc->Initialize();
}
InitializeTransient();
-#if WEBRTC_INTELLIGIBILITY_ENHANCER
- InitializeIntelligibility();
-#endif
InitializeLowCutFilter();
public_submodules_->noise_suppression->Initialize(num_proc_channels(),
proc_sample_rate_hz());
@@ -742,15 +712,6 @@
config.Get<ExperimentalNs>().enabled;
InitializeTransient();
}
-
-#if WEBRTC_INTELLIGIBILITY_ENHANCER
- if (capture_nonlocked_.intelligibility_enabled !=
- config.Get<Intelligibility>().enabled) {
- capture_nonlocked_.intelligibility_enabled =
- config.Get<Intelligibility>().enabled;
- InitializeIntelligibility();
- }
-#endif
}
int AudioProcessingImpl::proc_sample_rate_hz() const {
@@ -1306,18 +1267,6 @@
capture_buffer->CopyLowPassToReference();
}
public_submodules_->noise_suppression->ProcessCaptureAudio(capture_buffer);
-#if WEBRTC_INTELLIGIBILITY_ENHANCER
- if (capture_nonlocked_.intelligibility_enabled) {
- RTC_DCHECK(public_submodules_->noise_suppression->is_enabled());
- const int gain_db =
- public_submodules_->gain_control->is_enabled()
- ? public_submodules_->gain_control->compression_gain_db()
- : 0;
- const float gain = DbToRatio(gain_db);
- public_submodules_->intelligibility_enhancer->SetCaptureNoiseEstimate(
- public_submodules_->noise_suppression->NoiseEstimate(), gain);
- }
-#endif
// Ensure that the stream delay was set before the call to the
// AECM ProcessCaptureAudio function.
@@ -1540,13 +1489,6 @@
render_buffer->SplitIntoFrequencyBands();
}
-#if WEBRTC_INTELLIGIBILITY_ENHANCER
- if (capture_nonlocked_.intelligibility_enabled) {
- public_submodules_->intelligibility_enhancer->ProcessRenderAudio(
- render_buffer);
- }
-#endif
-
if (submodule_states_.RenderMultiBandSubModulesActive()) {
QueueBandedRenderAudio(render_buffer);
}
@@ -1809,7 +1751,6 @@
public_submodules_->echo_control_mobile->is_enabled(),
config_.residual_echo_detector.enabled,
public_submodules_->noise_suppression->is_enabled(),
- capture_nonlocked_.intelligibility_enabled,
public_submodules_->gain_control->is_enabled(),
config_.gain_controller2.enabled, config_.pre_amplifier.enabled,
capture_nonlocked_.echo_controller_enabled,
@@ -1830,18 +1771,6 @@
}
}
-void AudioProcessingImpl::InitializeIntelligibility() {
-#if WEBRTC_INTELLIGIBILITY_ENHANCER
- if (capture_nonlocked_.intelligibility_enabled) {
- public_submodules_->intelligibility_enhancer.reset(
- new IntelligibilityEnhancer(capture_nonlocked_.split_rate,
- render_.render_audio->num_channels(),
- render_.render_audio->num_bands(),
- NoiseSuppressionImpl::num_noise_bins()));
- }
-#endif
-}
-
void AudioProcessingImpl::InitializeLowCutFilter() {
if (config_.high_pass_filter.enabled) {
private_submodules_->low_cut_filter.reset(
@@ -2029,8 +1958,6 @@
apm_config.transient_suppression_enabled =
capture_.transient_suppressor_enabled;
- apm_config.intelligibility_enhancer_enabled =
- capture_nonlocked_.intelligibility_enabled;
apm_config.experiments_description = experiments_description;
apm_config.pre_amplifier_enabled = config_.pre_amplifier.enabled;
apm_config.pre_amplifier_fixed_gain_factor =
diff --git a/modules/audio_processing/audio_processing_impl.h b/modules/audio_processing/audio_processing_impl.h
index 06fa70c..a95e150 100644
--- a/modules/audio_processing/audio_processing_impl.h
+++ b/modules/audio_processing/audio_processing_impl.h
@@ -183,7 +183,6 @@
bool mobile_echo_controller_enabled,
bool residual_echo_detector_enabled,
bool noise_suppressor_enabled,
- bool intelligibility_enhancer_enabled,
bool adaptive_gain_controller_enabled,
bool gain_controller2_enabled,
bool pre_amplifier_enabled,
@@ -208,7 +207,6 @@
bool mobile_echo_controller_enabled_ = false;
bool residual_echo_detector_enabled_ = false;
bool noise_suppressor_enabled_ = false;
- bool intelligibility_enhancer_enabled_ = false;
bool adaptive_gain_controller_enabled_ = false;
bool gain_controller2_enabled_ = false;
bool pre_amplifier_enabled_ = false;
@@ -245,8 +243,6 @@
// acquired.
void InitializeTransient()
RTC_EXCLUSIVE_LOCKS_REQUIRED(crit_render_, crit_capture_);
- void InitializeIntelligibility()
- RTC_EXCLUSIVE_LOCKS_REQUIRED(crit_render_, crit_capture_);
int InitializeLocked(const ProcessingConfig& config)
RTC_EXCLUSIVE_LOCKS_REQUIRED(crit_render_, crit_capture_);
void InitializeResidualEchoDetector()
@@ -399,18 +395,16 @@
} capture_ RTC_GUARDED_BY(crit_capture_);
struct ApmCaptureNonLockedState {
- ApmCaptureNonLockedState(bool intelligibility_enabled)
+ ApmCaptureNonLockedState()
: capture_processing_format(kSampleRate16kHz),
split_rate(kSampleRate16kHz),
- stream_delay_ms(0),
- intelligibility_enabled(intelligibility_enabled) {}
+ stream_delay_ms(0) {}
// Only the rate and samples fields of capture_processing_format_ are used
// because the forward processing number of channels is mutable and is
// tracked by the capture_audio_.
StreamConfig capture_processing_format;
int split_rate;
int stream_delay_ms;
- bool intelligibility_enabled;
bool echo_controller_enabled = false;
} capture_nonlocked_;
diff --git a/modules/audio_processing/audio_processing_performance_unittest.cc b/modules/audio_processing/audio_processing_performance_unittest.cc
index 56615cb..df8d5fe 100644
--- a/modules/audio_processing/audio_processing_performance_unittest.cc
+++ b/modules/audio_processing/audio_processing_performance_unittest.cc
@@ -26,14 +26,6 @@
#include "test/gtest.h"
#include "test/testsupport/perf_test.h"
-// Check to verify that the define for the intelligibility enhancer is properly
-// set.
-#if !defined(WEBRTC_INTELLIGIBILITY_ENHANCER) || \
- (WEBRTC_INTELLIGIBILITY_ENHANCER != 0 && \
- WEBRTC_INTELLIGIBILITY_ENHANCER != 1)
-#error "Set WEBRTC_INTELLIGIBILITY_ENHANCER to either 0 or 1"
-#endif
-
namespace webrtc {
namespace {
@@ -49,7 +41,6 @@
enum class SettingsType {
kDefaultApmDesktop,
kDefaultApmMobile,
- kDefaultApmDesktopAndIntelligibilityEnhancer,
kAllSubmodulesTurnedOff,
kDefaultApmDesktopWithoutDelayAgnostic,
kDefaultApmDesktopWithoutExtendedFilter
@@ -99,20 +90,6 @@
simulation_configs.push_back(SimulationConfig(sample_rate, settings));
}
}
-
-#if WEBRTC_INTELLIGIBILITY_ENHANCER == 1
- const SettingsType intelligibility_enhancer_settings[] = {
- SettingsType::kDefaultApmDesktopAndIntelligibilityEnhancer};
-
- const int intelligibility_enhancer_sample_rates[] = {8000, 16000, 32000,
- 48000};
-
- for (auto sample_rate : intelligibility_enhancer_sample_rates) {
- for (auto settings : intelligibility_enhancer_settings) {
- simulation_configs.push_back(SimulationConfig(sample_rate, settings));
- }
- }
-#endif
#endif
const SettingsType mobile_settings[] = {SettingsType::kDefaultApmMobile};
@@ -137,9 +114,6 @@
case SettingsType::kDefaultApmDesktop:
description = "DefaultApmDesktop";
break;
- case SettingsType::kDefaultApmDesktopAndIntelligibilityEnhancer:
- description = "DefaultApmDesktopAndIntelligibilityEnhancer";
- break;
case SettingsType::kAllSubmodulesTurnedOff:
description = "AllSubmodulesOff";
break;
@@ -538,16 +512,6 @@
apm_->SetExtraOptions(config);
break;
}
- case SettingsType::kDefaultApmDesktopAndIntelligibilityEnhancer: {
- Config config;
- config.Set<Intelligibility>(new Intelligibility(true));
- add_default_desktop_config(&config);
- apm_.reset(AudioProcessingBuilder().Create(config));
- ASSERT_TRUE(!!apm_);
- set_default_desktop_apm_runtime_settings(apm_.get());
- apm_->SetExtraOptions(config);
- break;
- }
case SettingsType::kAllSubmodulesTurnedOff: {
apm_.reset(AudioProcessingBuilder().Create());
ASSERT_TRUE(!!apm_);
diff --git a/modules/audio_processing/debug.proto b/modules/audio_processing/debug.proto
index ebfb84d..b19f7fe 100644
--- a/modules/audio_processing/debug.proto
+++ b/modules/audio_processing/debug.proto
@@ -47,7 +47,6 @@
// Contains the configurations of various APM component. A Config message is
// added when any of the fields are changed.
message Config {
- // Next field number 19.
// Acoustic echo canceler.
optional bool aec_enabled = 1;
optional bool aec_delay_agnostic_enabled = 2;
@@ -73,11 +72,12 @@
// Semicolon-separated string containing experimental feature
// descriptions.
optional string experiments_description = 17;
- // Intelligibility Enhancer.
- optional bool intelligibility_enhancer_enabled = 18;
+ reserved 18; // Intelligibility enhancer enabled (deprecated).
// Pre amplifier.
optional bool pre_amplifier_enabled = 19;
optional float pre_amplifier_fixed_gain_factor = 20;
+
+ // Next field number 21.
}
message Event {
diff --git a/modules/audio_processing/include/aec_dump.cc b/modules/audio_processing/include/aec_dump.cc
index c243b52..67809d0 100644
--- a/modules/audio_processing/include/aec_dump.cc
+++ b/modules/audio_processing/include/aec_dump.cc
@@ -32,8 +32,6 @@
hpf_enabled == other.hpf_enabled && ns_enabled == other.ns_enabled &&
ns_level == other.ns_level &&
transient_suppression_enabled == other.transient_suppression_enabled &&
- intelligibility_enhancer_enabled ==
- other.intelligibility_enhancer_enabled &&
noise_robust_agc_enabled == other.noise_robust_agc_enabled &&
pre_amplifier_enabled == other.pre_amplifier_enabled &&
pre_amplifier_fixed_gain_factor ==
diff --git a/modules/audio_processing/include/aec_dump.h b/modules/audio_processing/include/aec_dump.h
index 95c010b..e32fa67 100644
--- a/modules/audio_processing/include/aec_dump.h
+++ b/modules/audio_processing/include/aec_dump.h
@@ -49,7 +49,6 @@
bool ns_enabled = false;
int ns_level = 0;
bool transient_suppression_enabled = false;
- bool intelligibility_enhancer_enabled = false;
bool noise_robust_agc_enabled = false;
bool pre_amplifier_enabled = false;
float pre_amplifier_fixed_gain_factor = 1.f;
diff --git a/modules/audio_processing/include/audio_processing.h b/modules/audio_processing/include/audio_processing.h
index e194be7..f05d7b6 100644
--- a/modules/audio_processing/include/audio_processing.h
+++ b/modules/audio_processing/include/audio_processing.h
@@ -151,17 +151,6 @@
bool enabled;
};
-// Use to enable intelligibility enhancer in audio processing.
-//
-// Note: If enabled and the reverse stream has more than one output channel,
-// the reverse stream will become an upmixed mono signal.
-struct Intelligibility {
- Intelligibility() : enabled(false) {}
- explicit Intelligibility(bool enabled) : enabled(enabled) {}
- static const ConfigOptionID identifier = ConfigOptionID::kIntelligibility;
- bool enabled;
-};
-
// The Audio Processing Module (APM) provides a collection of voice processing
// components designed for real-time communications software.
//
diff --git a/modules/audio_processing/include/config.h b/modules/audio_processing/include/config.h
index 9232b2e..398aab6 100644
--- a/modules/audio_processing/include/config.h
+++ b/modules/audio_processing/include/config.h
@@ -30,9 +30,9 @@
kDelayAgnostic,
kExperimentalAgc,
kExperimentalNs,
- kBeamforming, // Deprecated
- kIntelligibility,
- kEchoCanceller3, // Deprecated
+ kBeamforming, // Deprecated
+ kIntelligibility, // Deprecated
+ kEchoCanceller3, // Deprecated
kAecRefinedAdaptiveFilter,
kLevelControl // Deprecated
};
diff --git a/modules/audio_processing/intelligibility/intelligibility_enhancer.cc b/modules/audio_processing/intelligibility/intelligibility_enhancer.cc
deleted file mode 100644
index 0f7b118..0000000
--- a/modules/audio_processing/intelligibility/intelligibility_enhancer.cc
+++ /dev/null
@@ -1,390 +0,0 @@
-/*
- * Copyright (c) 2014 The WebRTC project authors. All Rights Reserved.
- *
- * Use of this source code is governed by a BSD-style license
- * that can be found in the LICENSE file in the root of the source
- * tree. An additional intellectual property rights grant can be found
- * in the file PATENTS. All contributing project authors may
- * be found in the AUTHORS file in the root of the source tree.
- */
-
-#include "modules/audio_processing/intelligibility/intelligibility_enhancer.h"
-
-#include <math.h>
-#include <stdlib.h>
-#include <algorithm>
-#include <limits>
-#include <numeric>
-
-#include "common_audio/include/audio_util.h"
-#include "common_audio/window_generator.h"
-#include "rtc_base/checks.h"
-#include "rtc_base/logging.h"
-#include "rtc_base/numerics/safe_minmax.h"
-
-namespace webrtc {
-
-namespace {
-
-const size_t kErbResolution = 2;
-const int kWindowSizeMs = 16;
-const int kChunkSizeMs = 10; // Size provided by APM.
-const float kClipFreqKhz = 0.2f;
-const float kKbdAlpha = 1.5f;
-const float kLambdaBot = -1.f; // Extreme values in bisection
-const float kLambdaTop = -1e-5f; // search for lamda.
-const float kVoiceProbabilityThreshold = 0.5f;
-// Number of chunks after voice activity which is still considered speech.
-const size_t kSpeechOffsetDelay = 10;
-const float kDecayRate = 0.995f; // Power estimation decay rate.
-const float kMaxRelativeGainChange = 0.005f;
-const float kRho = 0.0004f; // Default production and interpretation SNR.
-const float kPowerNormalizationFactor = 1.f / (1 << 30);
-const float kMaxActiveSNR = 128.f; // 21dB
-const float kMinInactiveSNR = 32.f; // 15dB
-const size_t kGainUpdatePeriod = 10u;
-
-// Returns dot product of vectors |a| and |b| with size |length|.
-float DotProduct(const float* a, const float* b, size_t length) {
- float ret = 0.f;
- for (size_t i = 0; i < length; ++i) {
- ret += a[i] * b[i];
- }
- return ret;
-}
-
-// Computes the power across ERB bands from the power spectral density |pow|.
-// Stores it in |result|.
-void MapToErbBands(const float* pow,
- const std::vector<std::vector<float>>& filter_bank,
- float* result) {
- for (size_t i = 0; i < filter_bank.size(); ++i) {
- RTC_DCHECK_GT(filter_bank[i].size(), 0);
- result[i] = kPowerNormalizationFactor *
- DotProduct(filter_bank[i].data(), pow, filter_bank[i].size());
- }
-}
-
-} // namespace
-
-IntelligibilityEnhancer::IntelligibilityEnhancer(int sample_rate_hz,
- size_t num_render_channels,
- size_t num_bands,
- size_t num_noise_bins)
- : freqs_(RealFourier::ComplexLength(
- RealFourier::FftOrder(sample_rate_hz * kWindowSizeMs / 1000))),
- num_noise_bins_(num_noise_bins),
- chunk_length_(static_cast<size_t>(sample_rate_hz * kChunkSizeMs / 1000)),
- bank_size_(GetBankSize(sample_rate_hz, kErbResolution)),
- sample_rate_hz_(sample_rate_hz),
- num_render_channels_(num_render_channels),
- clear_power_estimator_(freqs_, kDecayRate),
- noise_power_estimator_(num_noise_bins, kDecayRate),
- filtered_clear_pow_(bank_size_, 0.f),
- filtered_noise_pow_(num_noise_bins, 0.f),
- center_freqs_(bank_size_),
- capture_filter_bank_(CreateErbBank(num_noise_bins)),
- render_filter_bank_(CreateErbBank(freqs_)),
- gains_eq_(bank_size_),
- gain_applier_(freqs_, kMaxRelativeGainChange),
- audio_s16_(chunk_length_),
- chunks_since_voice_(kSpeechOffsetDelay),
- is_speech_(false),
- snr_(kMaxActiveSNR),
- is_active_(false),
- num_chunks_(0u),
- num_active_chunks_(0u),
- noise_estimation_buffer_(num_noise_bins),
- noise_estimation_queue_(kMaxNumNoiseEstimatesToBuffer,
- std::vector<float>(num_noise_bins),
- RenderQueueItemVerifier<float>(num_noise_bins)) {
- RTC_DCHECK_LE(kRho, 1.f);
-
- const size_t erb_index = static_cast<size_t>(
- ceilf(11.17f * logf((kClipFreqKhz + 0.312f) / (kClipFreqKhz + 14.6575f)) +
- 43.f));
- start_freq_ = std::max(static_cast<size_t>(1), erb_index * kErbResolution);
-
- size_t window_size = static_cast<size_t>(1) << RealFourier::FftOrder(freqs_);
- std::vector<float> kbd_window(window_size);
- WindowGenerator::KaiserBesselDerived(kKbdAlpha, window_size,
- kbd_window.data());
- render_mangler_.reset(new LappedTransform(
- num_render_channels_, num_render_channels_, chunk_length_,
- kbd_window.data(), window_size, window_size / 2, this));
-
- const size_t initial_delay = render_mangler_->initial_delay();
- for (size_t i = 0u; i < num_bands - 1; ++i) {
- high_bands_buffers_.push_back(std::unique_ptr<intelligibility::DelayBuffer>(
- new intelligibility::DelayBuffer(initial_delay, num_render_channels_)));
- }
-}
-
-IntelligibilityEnhancer::~IntelligibilityEnhancer() {
- // Don't rely on this log, since the destructor isn't called when the
- // app/tab is killed.
- if (num_chunks_ > 0) {
- RTC_LOG(LS_INFO) << "Intelligibility Enhancer was active for "
- << 100.f * static_cast<float>(num_active_chunks_) /
- num_chunks_
- << "% of the call.";
- } else {
- RTC_LOG(LS_INFO) << "Intelligibility Enhancer processed no chunk.";
- }
-}
-
-void IntelligibilityEnhancer::SetCaptureNoiseEstimate(std::vector<float> noise,
- float gain) {
- RTC_DCHECK_EQ(noise.size(), num_noise_bins_);
- for (auto& bin : noise) {
- bin *= gain;
- }
- // Disregarding return value since buffer overflow is acceptable, because it
- // is not critical to get each noise estimate.
- if (noise_estimation_queue_.Insert(&noise)) {
- };
-}
-
-void IntelligibilityEnhancer::ProcessRenderAudio(AudioBuffer* audio) {
- RTC_DCHECK_EQ(num_render_channels_, audio->num_channels());
- while (noise_estimation_queue_.Remove(&noise_estimation_buffer_)) {
- noise_power_estimator_.Step(noise_estimation_buffer_.data());
- }
- float* const* low_band = audio->split_channels_f(kBand0To8kHz);
- is_speech_ = IsSpeech(low_band[0]);
- render_mangler_->ProcessChunk(low_band, low_band);
- DelayHighBands(audio);
-}
-
-void IntelligibilityEnhancer::ProcessAudioBlock(
- const std::complex<float>* const* in_block,
- size_t in_channels,
- size_t frames,
- size_t /* out_channels */,
- std::complex<float>* const* out_block) {
- RTC_DCHECK_EQ(freqs_, frames);
- if (is_speech_) {
- clear_power_estimator_.Step(in_block[0]);
- }
- SnrBasedEffectActivation();
- ++num_chunks_;
- if (is_active_) {
- ++num_active_chunks_;
- if (num_chunks_ % kGainUpdatePeriod == 0) {
- MapToErbBands(clear_power_estimator_.power().data(), render_filter_bank_,
- filtered_clear_pow_.data());
- MapToErbBands(noise_power_estimator_.power().data(), capture_filter_bank_,
- filtered_noise_pow_.data());
- SolveForGainsGivenLambda(kLambdaTop, start_freq_, gains_eq_.data());
- const float power_target =
- std::accumulate(filtered_clear_pow_.data(),
- filtered_clear_pow_.data() + bank_size_, 0.f);
- const float power_top =
- DotProduct(gains_eq_.data(), filtered_clear_pow_.data(), bank_size_);
- SolveForGainsGivenLambda(kLambdaBot, start_freq_, gains_eq_.data());
- const float power_bot =
- DotProduct(gains_eq_.data(), filtered_clear_pow_.data(), bank_size_);
- if (power_target >= power_bot && power_target <= power_top) {
- SolveForLambda(power_target);
- UpdateErbGains();
- } // Else experiencing power underflow, so do nothing.
- }
- }
- for (size_t i = 0; i < in_channels; ++i) {
- gain_applier_.Apply(in_block[i], out_block[i]);
- }
-}
-
-void IntelligibilityEnhancer::SnrBasedEffectActivation() {
- const float* clear_psd = clear_power_estimator_.power().data();
- const float* noise_psd = noise_power_estimator_.power().data();
- const float clear_power = std::accumulate(clear_psd, clear_psd + freqs_, 0.f);
- const float noise_power = std::accumulate(noise_psd, noise_psd + freqs_, 0.f);
- snr_ = kDecayRate * snr_ +
- (1.f - kDecayRate) * clear_power /
- (noise_power + std::numeric_limits<float>::epsilon());
- if (is_active_) {
- if (snr_ > kMaxActiveSNR) {
- RTC_LOG(LS_INFO) << "Intelligibility Enhancer was deactivated at chunk "
- << num_chunks_;
- is_active_ = false;
- // Set the target gains to unity.
- float* gains = gain_applier_.target();
- for (size_t i = 0; i < freqs_; ++i) {
- gains[i] = 1.f;
- }
- }
- } else {
- if (snr_ < kMinInactiveSNR) {
- RTC_LOG(LS_INFO) << "Intelligibility Enhancer was activated at chunk "
- << num_chunks_;
- is_active_ = true;
- }
- }
-}
-
-void IntelligibilityEnhancer::SolveForLambda(float power_target) {
- const float kConvergeThresh = 0.001f; // TODO(ekmeyerson): Find best values
- const int kMaxIters = 100; // for these, based on experiments.
-
- const float reciprocal_power_target =
- 1.f / (power_target + std::numeric_limits<float>::epsilon());
- float lambda_bot = kLambdaBot;
- float lambda_top = kLambdaTop;
- float power_ratio = 2.f; // Ratio of achieved power to target power.
- int iters = 0;
- while (std::fabs(power_ratio - 1.f) > kConvergeThresh && iters <= kMaxIters) {
- const float lambda = (lambda_bot + lambda_top) / 2.f;
- SolveForGainsGivenLambda(lambda, start_freq_, gains_eq_.data());
- const float power =
- DotProduct(gains_eq_.data(), filtered_clear_pow_.data(), bank_size_);
- if (power < power_target) {
- lambda_bot = lambda;
- } else {
- lambda_top = lambda;
- }
- power_ratio = std::fabs(power * reciprocal_power_target);
- ++iters;
- }
-}
-
-void IntelligibilityEnhancer::UpdateErbGains() {
- // (ERB gain) = filterbank' * (freq gain)
- float* gains = gain_applier_.target();
- for (size_t i = 0; i < freqs_; ++i) {
- gains[i] = 0.f;
- for (size_t j = 0; j < bank_size_; ++j) {
- gains[i] += render_filter_bank_[j][i] * gains_eq_[j];
- }
- }
-}
-
-size_t IntelligibilityEnhancer::GetBankSize(int sample_rate,
- size_t erb_resolution) {
- float freq_limit = sample_rate / 2000.f;
- size_t erb_scale = static_cast<size_t>(ceilf(
- 11.17f * logf((freq_limit + 0.312f) / (freq_limit + 14.6575f)) + 43.f));
- return erb_scale * erb_resolution;
-}
-
-std::vector<std::vector<float>> IntelligibilityEnhancer::CreateErbBank(
- size_t num_freqs) {
- std::vector<std::vector<float>> filter_bank(bank_size_);
- size_t lf = 1, rf = 4;
-
- for (size_t i = 0; i < bank_size_; ++i) {
- float abs_temp = fabsf((i + 1.f) / static_cast<float>(kErbResolution));
- center_freqs_[i] = 676170.4f / (47.06538f - expf(0.08950404f * abs_temp));
- center_freqs_[i] -= 14678.49f;
- }
- float last_center_freq = center_freqs_[bank_size_ - 1];
- for (size_t i = 0; i < bank_size_; ++i) {
- center_freqs_[i] *= 0.5f * sample_rate_hz_ / last_center_freq;
- }
-
- for (size_t i = 0; i < bank_size_; ++i) {
- filter_bank[i].resize(num_freqs);
- }
-
- for (size_t i = 1; i <= bank_size_; ++i) {
- size_t lll = static_cast<size_t>(
- round(center_freqs_[rtc::SafeMax<size_t>(1, i - lf) - 1] * num_freqs /
- (0.5f * sample_rate_hz_)));
- size_t ll = static_cast<size_t>(
- round(center_freqs_[rtc::SafeMax<size_t>(1, i) - 1] * num_freqs /
- (0.5f * sample_rate_hz_)));
- lll = rtc::SafeClamp<size_t>(lll, 1, num_freqs) - 1;
- ll = rtc::SafeClamp<size_t>(ll, 1, num_freqs) - 1;
-
- size_t rrr = static_cast<size_t>(
- round(center_freqs_[rtc::SafeMin<size_t>(bank_size_, i + rf) - 1] *
- num_freqs / (0.5f * sample_rate_hz_)));
- size_t rr = static_cast<size_t>(
- round(center_freqs_[rtc::SafeMin<size_t>(bank_size_, i + 1) - 1] *
- num_freqs / (0.5f * sample_rate_hz_)));
- rrr = rtc::SafeClamp<size_t>(rrr, 1, num_freqs) - 1;
- rr = rtc::SafeClamp<size_t>(rr, 1, num_freqs) - 1;
-
- float step = ll == lll ? 0.f : 1.f / (ll - lll);
- float element = 0.f;
- for (size_t j = lll; j <= ll; ++j) {
- filter_bank[i - 1][j] = element;
- element += step;
- }
- step = rr == rrr ? 0.f : 1.f / (rrr - rr);
- element = 1.f;
- for (size_t j = rr; j <= rrr; ++j) {
- filter_bank[i - 1][j] = element;
- element -= step;
- }
- for (size_t j = ll; j <= rr; ++j) {
- filter_bank[i - 1][j] = 1.f;
- }
- }
-
- for (size_t i = 0; i < num_freqs; ++i) {
- float sum = 0.f;
- for (size_t j = 0; j < bank_size_; ++j) {
- sum += filter_bank[j][i];
- }
- for (size_t j = 0; j < bank_size_; ++j) {
- filter_bank[j][i] /= sum;
- }
- }
- return filter_bank;
-}
-
-void IntelligibilityEnhancer::SolveForGainsGivenLambda(float lambda,
- size_t start_freq,
- float* sols) {
- const float kMinPower = 1e-5f;
-
- const float* pow_x0 = filtered_clear_pow_.data();
- const float* pow_n0 = filtered_noise_pow_.data();
-
- for (size_t n = 0; n < start_freq; ++n) {
- sols[n] = 1.f;
- }
-
- // Analytic solution for optimal gains. See paper for derivation.
- for (size_t n = start_freq; n < bank_size_; ++n) {
- if (pow_x0[n] < kMinPower || pow_n0[n] < kMinPower) {
- sols[n] = 1.f;
- } else {
- const float gamma0 = 0.5f * kRho * pow_x0[n] * pow_n0[n] +
- lambda * pow_x0[n] * pow_n0[n] * pow_n0[n];
- const float beta0 =
- lambda * pow_x0[n] * (2.f - kRho) * pow_x0[n] * pow_n0[n];
- const float alpha0 =
- lambda * pow_x0[n] * (1.f - kRho) * pow_x0[n] * pow_x0[n];
- RTC_DCHECK_LT(alpha0, 0.f);
- // The quadratic equation should always have real roots, but to guard
- // against numerical errors we limit it to a minimum of zero.
- sols[n] = std::max(
- 0.f, (-beta0 - std::sqrt(std::max(
- 0.f, beta0 * beta0 - 4.f * alpha0 * gamma0))) /
- (2.f * alpha0));
- }
- }
-}
-
-bool IntelligibilityEnhancer::IsSpeech(const float* audio) {
- FloatToS16(audio, chunk_length_, audio_s16_.data());
- vad_.ProcessChunk(audio_s16_.data(), chunk_length_, sample_rate_hz_);
- if (vad_.last_voice_probability() > kVoiceProbabilityThreshold) {
- chunks_since_voice_ = 0;
- } else if (chunks_since_voice_ < kSpeechOffsetDelay) {
- ++chunks_since_voice_;
- }
- return chunks_since_voice_ < kSpeechOffsetDelay;
-}
-
-void IntelligibilityEnhancer::DelayHighBands(AudioBuffer* audio) {
- RTC_DCHECK_EQ(audio->num_bands(), high_bands_buffers_.size() + 1);
- for (size_t i = 0u; i < high_bands_buffers_.size(); ++i) {
- Band band = static_cast<Band>(i + 1);
- high_bands_buffers_[i]->Delay(audio->split_channels_f(band), chunk_length_);
- }
-}
-
-} // namespace webrtc
diff --git a/modules/audio_processing/intelligibility/intelligibility_enhancer.h b/modules/audio_processing/intelligibility/intelligibility_enhancer.h
deleted file mode 100644
index 3513092..0000000
--- a/modules/audio_processing/intelligibility/intelligibility_enhancer.h
+++ /dev/null
@@ -1,137 +0,0 @@
-/*
- * Copyright (c) 2014 The WebRTC project authors. All Rights Reserved.
- *
- * Use of this source code is governed by a BSD-style license
- * that can be found in the LICENSE file in the root of the source
- * tree. An additional intellectual property rights grant can be found
- * in the file PATENTS. All contributing project authors may
- * be found in the AUTHORS file in the root of the source tree.
- */
-
-#ifndef MODULES_AUDIO_PROCESSING_INTELLIGIBILITY_INTELLIGIBILITY_ENHANCER_H_
-#define MODULES_AUDIO_PROCESSING_INTELLIGIBILITY_INTELLIGIBILITY_ENHANCER_H_
-
-#include <complex>
-#include <memory>
-#include <vector>
-
-#include "common_audio/channel_buffer.h"
-#include "common_audio/lapped_transform.h"
-#include "modules/audio_processing/audio_buffer.h"
-#include "modules/audio_processing/intelligibility/intelligibility_utils.h"
-#include "modules/audio_processing/render_queue_item_verifier.h"
-#include "modules/audio_processing/vad/voice_activity_detector.h"
-#include "rtc_base/swap_queue.h"
-
-namespace webrtc {
-
-// Speech intelligibility enhancement module. Reads render and capture
-// audio streams and modifies the render stream with a set of gains per
-// frequency bin to enhance speech against the noise background.
-// Details of the model and algorithm can be found in the original paper:
-// http://ieeexplore.ieee.org/stamp/stamp.jsp?tp=&arnumber=6882788
-class IntelligibilityEnhancer : public LappedTransform::Callback {
- public:
- IntelligibilityEnhancer(int sample_rate_hz,
- size_t num_render_channels,
- size_t num_bands,
- size_t num_noise_bins);
-
- ~IntelligibilityEnhancer() override;
-
- // Sets the capture noise magnitude spectrum estimate.
- void SetCaptureNoiseEstimate(std::vector<float> noise, float gain);
-
- // Reads chunk of speech in time domain and updates with modified signal.
- void ProcessRenderAudio(AudioBuffer* audio);
- bool active() const;
-
- protected:
- // All in frequency domain, receives input |in_block|, applies
- // intelligibility enhancement, and writes result to |out_block|.
- void ProcessAudioBlock(const std::complex<float>* const* in_block,
- size_t in_channels,
- size_t frames,
- size_t out_channels,
- std::complex<float>* const* out_block) override;
-
- private:
- FRIEND_TEST_ALL_PREFIXES(IntelligibilityEnhancerTest, TestRenderUpdate);
- FRIEND_TEST_ALL_PREFIXES(IntelligibilityEnhancerTest, TestErbCreation);
- FRIEND_TEST_ALL_PREFIXES(IntelligibilityEnhancerTest, TestSolveForGains);
- FRIEND_TEST_ALL_PREFIXES(IntelligibilityEnhancerTest,
- TestNoiseGainHasExpectedResult);
- FRIEND_TEST_ALL_PREFIXES(IntelligibilityEnhancerTest,
- TestAllBandsHaveSameDelay);
-
- // Updates the SNR estimation and enables or disables this component using a
- // hysteresis.
- void SnrBasedEffectActivation();
-
- // Bisection search for optimal |lambda|.
- void SolveForLambda(float power_target);
-
- // Transforms freq gains to ERB gains.
- void UpdateErbGains();
-
- // Returns number of ERB filters.
- static size_t GetBankSize(int sample_rate, size_t erb_resolution);
-
- // Initializes ERB filterbank.
- std::vector<std::vector<float>> CreateErbBank(size_t num_freqs);
-
- // Analytically solves quadratic for optimal gains given |lambda|.
- // Negative gains are set to 0. Stores the results in |sols|.
- void SolveForGainsGivenLambda(float lambda, size_t start_freq, float* sols);
-
- // Returns true if the audio is speech.
- bool IsSpeech(const float* audio);
-
- // Delays the high bands to compensate for the processing delay in the low
- // band.
- void DelayHighBands(AudioBuffer* audio);
-
- static const size_t kMaxNumNoiseEstimatesToBuffer = 5;
-
- const size_t freqs_; // Num frequencies in frequency domain.
- const size_t num_noise_bins_;
- const size_t chunk_length_; // Chunk size in samples.
- const size_t bank_size_; // Num ERB filters.
- const int sample_rate_hz_;
- const size_t num_render_channels_;
-
- intelligibility::PowerEstimator<std::complex<float>> clear_power_estimator_;
- intelligibility::PowerEstimator<float> noise_power_estimator_;
- std::vector<float> filtered_clear_pow_;
- std::vector<float> filtered_noise_pow_;
- std::vector<float> center_freqs_;
- std::vector<std::vector<float>> capture_filter_bank_;
- std::vector<std::vector<float>> render_filter_bank_;
- size_t start_freq_;
-
- std::vector<float> gains_eq_; // Pre-filter modified gains.
- intelligibility::GainApplier gain_applier_;
-
- std::unique_ptr<LappedTransform> render_mangler_;
-
- VoiceActivityDetector vad_;
- std::vector<int16_t> audio_s16_;
- size_t chunks_since_voice_;
- bool is_speech_;
- float snr_;
- bool is_active_;
-
- unsigned long int num_chunks_;
- unsigned long int num_active_chunks_;
-
- std::vector<float> noise_estimation_buffer_;
- SwapQueue<std::vector<float>, RenderQueueItemVerifier<float>>
- noise_estimation_queue_;
-
- std::vector<std::unique_ptr<intelligibility::DelayBuffer>>
- high_bands_buffers_;
-};
-
-} // namespace webrtc
-
-#endif // MODULES_AUDIO_PROCESSING_INTELLIGIBILITY_INTELLIGIBILITY_ENHANCER_H_
diff --git a/modules/audio_processing/intelligibility/intelligibility_enhancer_unittest.cc b/modules/audio_processing/intelligibility/intelligibility_enhancer_unittest.cc
deleted file mode 100644
index 98a8dae..0000000
--- a/modules/audio_processing/intelligibility/intelligibility_enhancer_unittest.cc
+++ /dev/null
@@ -1,536 +0,0 @@
-/*
- * Copyright (c) 2015 The WebRTC project authors. All Rights Reserved.
- *
- * Use of this source code is governed by a BSD-style license
- * that can be found in the LICENSE file in the root of the source
- * tree. An additional intellectual property rights grant can be found
- * in the file PATENTS. All contributing project authors may
- * be found in the AUTHORS file in the root of the source tree.
- */
-
-#include <math.h>
-#include <stdlib.h>
-
-#include <algorithm>
-#include <memory>
-#include <vector>
-
-#include "api/array_view.h"
-#include "common_audio/signal_processing/include/signal_processing_library.h"
-#include "modules/audio_processing/audio_buffer.h"
-#include "modules/audio_processing/intelligibility/intelligibility_enhancer.h"
-#include "modules/audio_processing/noise_suppression_impl.h"
-#include "modules/audio_processing/test/audio_buffer_tools.h"
-#include "modules/audio_processing/test/bitexactness_tools.h"
-#include "rtc_base/arraysize.h"
-#include "test/gtest.h"
-
-namespace webrtc {
-
-namespace {
-
-// Target output for ERB create test. Generated with matlab.
-const float kTestCenterFreqs[] = {
- 14.5213f, 29.735f, 45.6781f, 62.3884f, 79.9058f, 98.2691f, 117.521f,
- 137.708f, 158.879f, 181.084f, 204.378f, 228.816f, 254.459f, 281.371f,
- 309.618f, 339.273f, 370.411f, 403.115f, 437.469f, 473.564f, 511.497f,
- 551.371f, 593.293f, 637.386f, 683.77f, 732.581f, 783.96f, 838.06f,
- 895.046f, 955.09f, 1018.38f, 1085.13f, 1155.54f, 1229.85f, 1308.32f,
- 1391.22f, 1478.83f, 1571.5f, 1669.55f, 1773.37f, 1883.37f, 2000.f};
-const float kTestFilterBank[][33] = {
- {0.2f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f,
- 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f,
- 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f},
- {0.2f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f,
- 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f,
- 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f},
- {0.2f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f,
- 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f,
- 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f},
- {0.2f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f,
- 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f,
- 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f},
- {0.2f, 0.25f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f,
- 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f,
- 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f},
- {0.f, 0.25f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f,
- 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f,
- 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f},
- {0.f, 0.25f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f,
- 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f,
- 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f},
- {0.f, 0.25f, 0.25f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f,
- 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f,
- 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f},
- {0.f, 0.f, 0.25f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f,
- 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f,
- 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f},
- {0.f, 0.f, 0.25f, 0.142857f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f,
- 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f,
- 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f},
- {0.f, 0.f, 0.25f, 0.285714f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f,
- 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f,
- 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f},
- {0.f, 0.f, 0.f, 0.285714f, 0.142857f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f,
- 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f,
- 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f},
- {0.f, 0.f, 0.f, 0.285714f, 0.285714f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f,
- 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f,
- 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f},
- {0.f, 0.f, 0.f, 0.f, 0.285714f, 0.142857f, 0.f, 0.f, 0.f, 0.f, 0.f,
- 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f,
- 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f},
- {0.f, 0.f, 0.f, 0.f, 0.285714f, 0.285714f, 0.f, 0.f, 0.f, 0.f, 0.f,
- 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f,
- 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f},
- {0.f, 0.f, 0.f, 0.f, 0.f, 0.285714f, 0.142857f, 0.f, 0.f, 0.f, 0.f,
- 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f,
- 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f},
- {0.f, 0.f, 0.f, 0.f, 0.f, 0.285714f, 0.285714f, 0.f, 0.f, 0.f, 0.f,
- 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f,
- 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f},
- {0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.285714f, 0.142857f, 0.f, 0.f, 0.f,
- 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f,
- 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f},
- {0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.285714f, 0.285714f, 0.157895f, 0.f, 0.f,
- 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f,
- 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f},
- {0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.285714f, 0.210526f, 0.117647f, 0.f,
- 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f,
- 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f},
- {0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.285714f, 0.315789f, 0.176471f, 0.f,
- 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f,
- 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f},
- {0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.315789f, 0.352941f, 0.142857f,
- 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f,
- 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f},
- {0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.352941f, 0.285714f,
- 0.157895f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f,
- 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f},
- {0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.285714f,
- 0.210526f, 0.111111f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f,
- 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f},
- {0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f,
- 0.f, 0.285714f, 0.315789f, 0.222222f, 0.111111f, 0.f, 0.f, 0.f, 0.f,
- 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f,
- 0.f, 0.f, 0.f, 0.f, 0.f, 0.f},
- {0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f,
- 0.f, 0.f, 0.315789f, 0.333333f, 0.222222f, 0.111111f, 0.f, 0.f, 0.f,
- 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f,
- 0.f, 0.f, 0.f, 0.f, 0.f, 0.f},
- {0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f,
- 0.f, 0.f, 0.f, 0.333333f, 0.333333f, 0.222222f, 0.111111f, 0.f, 0.f,
- 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f,
- 0.f, 0.f, 0.f, 0.f, 0.f, 0.f},
- {0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f,
- 0.f, 0.f, 0.f, 0.f, 0.333333f, 0.333333f, 0.222222f, 0.111111f, 0.f,
- 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f,
- 0.f, 0.f, 0.f, 0.f, 0.f, 0.f},
- {0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f,
- 0.f, 0.f, 0.f, 0.f, 0.f, 0.333333f, 0.333333f, 0.222222f, 0.111111f,
- 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f,
- 0.f, 0.f, 0.f, 0.f, 0.f, 0.f},
- {0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f,
- 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.333333f, 0.333333f, 0.222222f,
- 0.108108f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f,
- 0.f, 0.f, 0.f, 0.f, 0.f, 0.f},
- {0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f,
- 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.333333f, 0.333333f,
- 0.243243f, 0.153846f, 0.0833333f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f,
- 0.f, 0.f, 0.f, 0.f, 0.f, 0.f},
- {0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f,
- 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.333333f,
- 0.324324f, 0.230769f, 0.166667f, 0.0909091f, 0.f, 0.f, 0.f, 0.f, 0.f,
- 0.f, 0.f, 0.f, 0.f, 0.f, 0.f},
- {0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f,
- 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f,
- 0.324324f, 0.307692f, 0.25f, 0.181818f, 0.0833333f, 0.f, 0.f, 0.f, 0.f,
- 0.f, 0.f, 0.f, 0.f, 0.f, 0.f},
- {0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f,
- 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f,
- 0.f, 0.f, 0.f, 0.f, 0.f, 0.307692f, 0.333333f,
- 0.363636f, 0.25f, 0.151515f, 0.0793651f, 0.f, 0.f, 0.f,
- 0.f, 0.f, 0.f, 0.f, 0.f},
- {0.f, 0.f, 0.f, 0.f, 0.f, 0.f,
- 0.f, 0.f, 0.f, 0.f, 0.f, 0.f,
- 0.f, 0.f, 0.f, 0.f, 0.f, 0.f,
- 0.f, 0.f, 0.166667f, 0.363636f, 0.333333f, 0.242424f,
- 0.190476f, 0.133333f, 0.0689655f, 0.f, 0.f, 0.f,
- 0.f, 0.f, 0.f},
- {0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f,
- 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f,
- 0.f, 0.f, 0.f, 0.f, 0.333333f, 0.30303f, 0.253968f, 0.2f, 0.137931f,
- 0.0714286f, 0.f, 0.f, 0.f, 0.f, 0.f},
- {0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f,
- 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f,
- 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f,
- 0.f, 0.f, 0.30303f, 0.31746f, 0.333333f, 0.275862f, 0.214286f,
- 0.125f, 0.0655738f, 0.f, 0.f, 0.f},
- {0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f,
- 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f,
- 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f,
- 0.f, 0.f, 0.f, 0.15873f, 0.333333f, 0.344828f, 0.357143f,
- 0.25f, 0.196721f, 0.137931f, 0.0816327f, 0.f},
- {0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f,
- 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f,
- 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f,
- 0.f, 0.f, 0.f, 0.f, 0.f, 0.172414f, 0.357143f,
- 0.3125f, 0.245902f, 0.172414f, 0.102041f, 0.f},
- {0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f,
- 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f,
- 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f,
- 0.f, 0.3125f, 0.327869f, 0.344828f, 0.204082f, 0.f},
- {0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f,
- 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f,
- 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.163934f, 0.344828f, 0.408163f, 0.5f},
- {0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f,
- 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f,
- 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.204082f, 0.5f}};
-static_assert(arraysize(kTestCenterFreqs) == arraysize(kTestFilterBank),
- "Test filterbank badly initialized.");
-
-// Target output for gain solving test. Generated with matlab.
-const size_t kTestStartFreq = 12; // Lowest integral frequency for ERBs.
-const float kTestZeroVar = 1.f;
-const float kTestNonZeroVarLambdaTop[] = {
- 1.f, 1.f, 1.f, 1.f, 1.f, 1.f, 1.f, 1.f, 1.f, 1.f, 1.f, 1.f, 0.f, 0.f,
- 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f,
- 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f};
-static_assert(arraysize(kTestCenterFreqs) ==
- arraysize(kTestNonZeroVarLambdaTop),
- "Power test data badly initialized.");
-const float kMaxTestError = 0.005f;
-
-// Enhancer initialization parameters.
-const int kSamples = 10000;
-const int kSampleRate = 4000;
-const int kNumChannels = 1;
-const int kFragmentSize = kSampleRate / 100;
-const size_t kNumNoiseBins = 129;
-const size_t kNumBands = 1;
-
-// Number of frames to process in the bitexactness tests.
-const size_t kNumFramesToProcess = 1000;
-
-int IntelligibilityEnhancerSampleRate(int sample_rate_hz) {
- return (sample_rate_hz > AudioProcessing::kSampleRate16kHz
- ? AudioProcessing::kSampleRate16kHz
- : sample_rate_hz);
-}
-
-// Process one frame of data and produce the output.
-void ProcessOneFrame(int sample_rate_hz,
- AudioBuffer* render_audio_buffer,
- AudioBuffer* capture_audio_buffer,
- NoiseSuppressionImpl* noise_suppressor,
- IntelligibilityEnhancer* intelligibility_enhancer) {
- if (sample_rate_hz > AudioProcessing::kSampleRate16kHz) {
- render_audio_buffer->SplitIntoFrequencyBands();
- capture_audio_buffer->SplitIntoFrequencyBands();
- }
-
- intelligibility_enhancer->ProcessRenderAudio(render_audio_buffer);
-
- noise_suppressor->AnalyzeCaptureAudio(capture_audio_buffer);
- noise_suppressor->ProcessCaptureAudio(capture_audio_buffer);
-
- intelligibility_enhancer->SetCaptureNoiseEstimate(
- noise_suppressor->NoiseEstimate(), 0);
-
- if (sample_rate_hz > AudioProcessing::kSampleRate16kHz) {
- render_audio_buffer->MergeFrequencyBands();
- }
-}
-
-// Processes a specified amount of frames, verifies the results and reports
-// any errors.
-void RunBitexactnessTest(int sample_rate_hz,
- size_t num_channels,
- rtc::ArrayView<const float> output_reference) {
- const StreamConfig render_config(sample_rate_hz, num_channels, false);
- AudioBuffer render_buffer(
- render_config.num_frames(), render_config.num_channels(),
- render_config.num_frames(), render_config.num_channels(),
- render_config.num_frames());
- test::InputAudioFile render_file(
- test::GetApmRenderTestVectorFileName(sample_rate_hz));
- std::vector<float> render_input(render_buffer.num_frames() *
- render_buffer.num_channels());
-
- const StreamConfig capture_config(sample_rate_hz, num_channels, false);
- AudioBuffer capture_buffer(
- capture_config.num_frames(), capture_config.num_channels(),
- capture_config.num_frames(), capture_config.num_channels(),
- capture_config.num_frames());
- test::InputAudioFile capture_file(
- test::GetApmCaptureTestVectorFileName(sample_rate_hz));
- std::vector<float> capture_input(render_buffer.num_frames() *
- capture_buffer.num_channels());
-
- rtc::CriticalSection crit_capture;
- NoiseSuppressionImpl noise_suppressor(&crit_capture);
- noise_suppressor.Initialize(capture_config.num_channels(), sample_rate_hz);
- noise_suppressor.Enable(true);
-
- IntelligibilityEnhancer intelligibility_enhancer(
- IntelligibilityEnhancerSampleRate(sample_rate_hz),
- render_config.num_channels(), kNumBands,
- NoiseSuppressionImpl::num_noise_bins());
-
- for (size_t frame_no = 0u; frame_no < kNumFramesToProcess; ++frame_no) {
- ReadFloatSamplesFromStereoFile(render_buffer.num_frames(),
- render_buffer.num_channels(), &render_file,
- render_input);
- ReadFloatSamplesFromStereoFile(capture_buffer.num_frames(),
- capture_buffer.num_channels(), &capture_file,
- capture_input);
-
- test::CopyVectorToAudioBuffer(render_config, render_input, &render_buffer);
- test::CopyVectorToAudioBuffer(capture_config, capture_input,
- &capture_buffer);
-
- ProcessOneFrame(sample_rate_hz, &render_buffer, &capture_buffer,
- &noise_suppressor, &intelligibility_enhancer);
- }
-
- // Extract and verify the test results.
- std::vector<float> render_output;
- test::ExtractVectorFromAudioBuffer(render_config, &render_buffer,
- &render_output);
-
- const float kElementErrorBound = 1.f / static_cast<float>(1 << 15);
-
- // Compare the output with the reference. Only the first values of the output
- // from last frame processed are compared in order not having to specify all
- // preceeding frames as testvectors. As the algorithm being tested has a
- // memory, testing only the last frame implicitly also tests the preceeding
- // frames.
- EXPECT_TRUE(test::VerifyDeinterleavedArray(
- render_buffer.num_frames(), render_config.num_channels(),
- output_reference, render_output, kElementErrorBound));
-}
-
-float float_rand() {
- return std::rand() * 2.f / RAND_MAX - 1;
-}
-
-} // namespace
-
-class IntelligibilityEnhancerTest : public ::testing::Test {
- protected:
- IntelligibilityEnhancerTest()
- : clear_buffer_(kFragmentSize,
- kNumChannels,
- kFragmentSize,
- kNumChannels,
- kFragmentSize),
- stream_config_(kSampleRate, kNumChannels),
- clear_data_(kSamples),
- noise_data_(kNumNoiseBins),
- orig_data_(kSamples) {
- std::srand(1);
- enh_.reset(new IntelligibilityEnhancer(kSampleRate, kNumChannels, kNumBands,
- kNumNoiseBins));
- }
-
- bool CheckUpdate() {
- enh_.reset(new IntelligibilityEnhancer(kSampleRate, kNumChannels, kNumBands,
- kNumNoiseBins));
- float* clear_cursor = clear_data_.data();
- for (int i = 0; i < kSamples; i += kFragmentSize) {
- enh_->SetCaptureNoiseEstimate(noise_data_, 1);
- clear_buffer_.CopyFrom(&clear_cursor, stream_config_);
- enh_->ProcessRenderAudio(&clear_buffer_);
- clear_buffer_.CopyTo(stream_config_, &clear_cursor);
- clear_cursor += kFragmentSize;
- }
- for (int i = initial_delay_; i < kSamples; i++) {
- if (std::fabs(clear_data_[i] - orig_data_[i - initial_delay_]) >
- kMaxTestError) {
- return true;
- }
- }
- return false;
- }
-
- std::unique_ptr<IntelligibilityEnhancer> enh_;
- // Render clean speech buffer.
- AudioBuffer clear_buffer_;
- StreamConfig stream_config_;
- std::vector<float> clear_data_;
- std::vector<float> noise_data_;
- std::vector<float> orig_data_;
- size_t initial_delay_;
-};
-
-// For each class of generated data, tests that render stream is updated when
-// it should be.
-TEST_F(IntelligibilityEnhancerTest, TestRenderUpdate) {
- initial_delay_ = enh_->render_mangler_->initial_delay();
- std::fill(noise_data_.begin(), noise_data_.end(), 0.f);
- std::fill(orig_data_.begin(), orig_data_.end(), 0.f);
- std::fill(clear_data_.begin(), clear_data_.end(), 0.f);
- EXPECT_FALSE(CheckUpdate());
- std::generate(clear_data_.begin(), clear_data_.end(), float_rand);
- orig_data_ = clear_data_;
- EXPECT_FALSE(CheckUpdate());
- std::generate(clear_data_.begin(), clear_data_.end(), float_rand);
- orig_data_ = clear_data_;
- std::generate(noise_data_.begin(), noise_data_.end(), float_rand);
- FloatToFloatS16(noise_data_.data(), noise_data_.size(), noise_data_.data());
- EXPECT_TRUE(CheckUpdate());
-}
-
-// Tests ERB bank creation, comparing against matlab output.
-TEST_F(IntelligibilityEnhancerTest, TestErbCreation) {
- ASSERT_EQ(arraysize(kTestCenterFreqs), enh_->bank_size_);
- for (size_t i = 0; i < enh_->bank_size_; ++i) {
- EXPECT_NEAR(kTestCenterFreqs[i], enh_->center_freqs_[i], kMaxTestError);
- ASSERT_EQ(arraysize(kTestFilterBank[0]), enh_->freqs_);
- for (size_t j = 0; j < enh_->freqs_; ++j) {
- EXPECT_NEAR(kTestFilterBank[i][j], enh_->render_filter_bank_[i][j],
- kMaxTestError);
- }
- }
-}
-
-// Tests analytic solution for optimal gains, comparing
-// against matlab output.
-TEST_F(IntelligibilityEnhancerTest, TestSolveForGains) {
- ASSERT_EQ(kTestStartFreq, enh_->start_freq_);
- std::vector<float> sols(enh_->bank_size_);
- float lambda = -0.001f;
- for (size_t i = 0; i < enh_->bank_size_; i++) {
- enh_->filtered_clear_pow_[i] = 0.f;
- enh_->filtered_noise_pow_[i] = 0.f;
- }
- enh_->SolveForGainsGivenLambda(lambda, enh_->start_freq_, sols.data());
- for (size_t i = 0; i < enh_->bank_size_; i++) {
- EXPECT_NEAR(kTestZeroVar, sols[i], kMaxTestError);
- }
- for (size_t i = 0; i < enh_->bank_size_; i++) {
- enh_->filtered_clear_pow_[i] = static_cast<float>(i + 1);
- enh_->filtered_noise_pow_[i] = static_cast<float>(enh_->bank_size_ - i);
- }
- enh_->SolveForGainsGivenLambda(lambda, enh_->start_freq_, sols.data());
- for (size_t i = 0; i < enh_->bank_size_; i++) {
- EXPECT_NEAR(kTestNonZeroVarLambdaTop[i], sols[i], kMaxTestError);
- }
- lambda = -1.f;
- enh_->SolveForGainsGivenLambda(lambda, enh_->start_freq_, sols.data());
- for (size_t i = 0; i < enh_->bank_size_; i++) {
- EXPECT_NEAR(kTestNonZeroVarLambdaTop[i], sols[i], kMaxTestError);
- }
-}
-
-TEST_F(IntelligibilityEnhancerTest, TestNoiseGainHasExpectedResult) {
- const float kGain = 2.f;
- const float kTolerance = 0.007f;
- std::vector<float> noise(kNumNoiseBins);
- std::vector<float> noise_psd(kNumNoiseBins);
- std::generate(noise.begin(), noise.end(), float_rand);
- for (size_t i = 0; i < kNumNoiseBins; ++i) {
- noise_psd[i] = kGain * kGain * noise[i] * noise[i];
- }
- float* clear_cursor = clear_data_.data();
- for (size_t i = 0; i < kNumFramesToProcess; ++i) {
- enh_->SetCaptureNoiseEstimate(noise, kGain);
- clear_buffer_.CopyFrom(&clear_cursor, stream_config_);
- enh_->ProcessRenderAudio(&clear_buffer_);
- }
- const std::vector<float>& estimated_psd =
- enh_->noise_power_estimator_.power();
- for (size_t i = 0; i < kNumNoiseBins; ++i) {
- EXPECT_LT(std::abs(estimated_psd[i] - noise_psd[i]) / noise_psd[i],
- kTolerance);
- }
-}
-
-TEST_F(IntelligibilityEnhancerTest, TestAllBandsHaveSameDelay) {
- const int kTestSampleRate = AudioProcessing::kSampleRate32kHz;
- const int kTestSplitRate = AudioProcessing::kSampleRate16kHz;
- const size_t kTestNumBands =
- rtc::CheckedDivExact(kTestSampleRate, kTestSplitRate);
- const size_t kTestFragmentSize = rtc::CheckedDivExact(kTestSampleRate, 100);
- const size_t kTestSplitFragmentSize =
- rtc::CheckedDivExact(kTestSplitRate, 100);
- enh_.reset(new IntelligibilityEnhancer(kTestSplitRate, kNumChannels,
- kTestNumBands, kNumNoiseBins));
- size_t initial_delay = enh_->render_mangler_->initial_delay();
- std::vector<float> rand_gen_buf(kTestFragmentSize);
- AudioBuffer original_buffer(kTestFragmentSize, kNumChannels,
- kTestFragmentSize, kNumChannels,
- kTestFragmentSize);
- AudioBuffer audio_buffer(kTestFragmentSize, kNumChannels, kTestFragmentSize,
- kNumChannels, kTestFragmentSize);
- for (size_t i = 0u; i < kTestNumBands; ++i) {
- std::generate(rand_gen_buf.begin(), rand_gen_buf.end(), float_rand);
- original_buffer.split_data_f()->SetDataForTesting(rand_gen_buf.data(),
- rand_gen_buf.size());
- audio_buffer.split_data_f()->SetDataForTesting(rand_gen_buf.data(),
- rand_gen_buf.size());
- }
- enh_->ProcessRenderAudio(&audio_buffer);
- for (size_t i = 0u; i < kTestNumBands; ++i) {
- const float* original_ptr = original_buffer.split_bands_const_f(0)[i];
- const float* audio_ptr = audio_buffer.split_bands_const_f(0)[i];
- for (size_t j = initial_delay; j < kTestSplitFragmentSize; ++j) {
- EXPECT_LT(std::fabs(original_ptr[j - initial_delay] - audio_ptr[j]),
- kMaxTestError);
- }
- }
-}
-
-TEST(IntelligibilityEnhancerBitExactnessTest, DISABLED_Mono8kHz) {
- const float kOutputReference[] = {-0.001892f, -0.003296f, -0.001953f};
-
- RunBitexactnessTest(AudioProcessing::kSampleRate8kHz, 1, kOutputReference);
-}
-
-TEST(IntelligibilityEnhancerBitExactnessTest, DISABLED_Mono16kHz) {
- const float kOutputReference[] = {-0.000977f, -0.003296f, -0.002441f};
-
- RunBitexactnessTest(AudioProcessing::kSampleRate16kHz, 1, kOutputReference);
-}
-
-TEST(IntelligibilityEnhancerBitExactnessTest, DISABLED_Mono32kHz) {
- const float kOutputReference[] = {0.003021f, -0.011780f, -0.008209f};
-
- RunBitexactnessTest(AudioProcessing::kSampleRate32kHz, 1, kOutputReference);
-}
-
-TEST(IntelligibilityEnhancerBitExactnessTest, DISABLED_Mono48kHz) {
- const float kOutputReference[] = {-0.027696f, -0.026253f, -0.018001f};
-
- RunBitexactnessTest(AudioProcessing::kSampleRate48kHz, 1, kOutputReference);
-}
-
-TEST(IntelligibilityEnhancerBitExactnessTest, DISABLED_Stereo8kHz) {
- const float kOutputReference[] = {0.021454f, 0.035919f, 0.026428f,
- -0.000641f, 0.000366f, 0.000641f};
-
- RunBitexactnessTest(AudioProcessing::kSampleRate8kHz, 2, kOutputReference);
-}
-
-TEST(IntelligibilityEnhancerBitExactnessTest, DISABLED_Stereo16kHz) {
- const float kOutputReference[] = {0.021362f, 0.035736f, 0.023895f,
- -0.001404f, -0.001465f, 0.000549f};
-
- RunBitexactnessTest(AudioProcessing::kSampleRate16kHz, 2, kOutputReference);
-}
-
-TEST(IntelligibilityEnhancerBitExactnessTest, DISABLED_Stereo32kHz) {
- const float kOutputReference[] = {0.030641f, 0.027406f, 0.028321f,
- -0.001343f, -0.004578f, 0.000977f};
-
- RunBitexactnessTest(AudioProcessing::kSampleRate32kHz, 2, kOutputReference);
-}
-
-TEST(IntelligibilityEnhancerBitExactnessTest, DISABLED_Stereo48kHz) {
- const float kOutputReference[] = {-0.009276f, -0.001601f, -0.008255f,
- -0.012975f, -0.015940f, -0.017820f};
-
- RunBitexactnessTest(AudioProcessing::kSampleRate48kHz, 2, kOutputReference);
-}
-
-} // namespace webrtc
diff --git a/modules/audio_processing/intelligibility/intelligibility_utils.cc b/modules/audio_processing/intelligibility/intelligibility_utils.cc
deleted file mode 100644
index b606d95..0000000
--- a/modules/audio_processing/intelligibility/intelligibility_utils.cc
+++ /dev/null
@@ -1,94 +0,0 @@
-/*
- * Copyright (c) 2014 The WebRTC project authors. All Rights Reserved.
- *
- * Use of this source code is governed by a BSD-style license
- * that can be found in the LICENSE file in the root of the source
- * tree. An additional intellectual property rights grant can be found
- * in the file PATENTS. All contributing project authors may
- * be found in the AUTHORS file in the root of the source tree.
- */
-
-#include "modules/audio_processing/intelligibility/intelligibility_utils.h"
-
-#include <math.h>
-#include <stdlib.h>
-#include <string.h>
-#include <algorithm>
-#include <limits>
-
-#include "rtc_base/numerics/safe_minmax.h"
-
-namespace webrtc {
-
-namespace intelligibility {
-
-namespace {
-
-const float kMinFactor = 0.01f;
-const float kMaxFactor = 100.f;
-
-// Return |current| changed towards |target|, with the relative change being at
-// most |limit|.
-float UpdateFactor(float target, float current, float limit) {
- const float gain = target / (current + std::numeric_limits<float>::epsilon());
- const float clamped_gain = rtc::SafeClamp(gain, 1 - limit, 1 + limit);
- return rtc::SafeClamp(current * clamped_gain, kMinFactor, kMaxFactor);
-}
-
-} // namespace
-
-template <typename T>
-PowerEstimator<T>::PowerEstimator(size_t num_freqs, float decay)
- : power_(num_freqs, 0.f), decay_(decay) {}
-
-template <typename T>
-void PowerEstimator<T>::Step(const T* data) {
- for (size_t i = 0; i < power_.size(); ++i) {
- power_[i] = decay_ * power_[i] +
- (1.f - decay_) * std::abs(data[i]) * std::abs(data[i]);
- }
-}
-
-template class PowerEstimator<float>;
-template class PowerEstimator<std::complex<float>>;
-
-GainApplier::GainApplier(size_t freqs, float relative_change_limit)
- : num_freqs_(freqs),
- relative_change_limit_(relative_change_limit),
- target_(freqs, 1.f),
- current_(freqs, 1.f) {}
-
-GainApplier::~GainApplier() {}
-
-void GainApplier::Apply(const std::complex<float>* in_block,
- std::complex<float>* out_block) {
- for (size_t i = 0; i < num_freqs_; ++i) {
- current_[i] = UpdateFactor(target_[i], current_[i], relative_change_limit_);
- out_block[i] = sqrtf(fabsf(current_[i])) * in_block[i];
- }
-}
-
-DelayBuffer::DelayBuffer(size_t delay, size_t num_channels)
- : buffer_(num_channels, std::vector<float>(delay, 0.f)), read_index_(0u) {}
-
-DelayBuffer::~DelayBuffer() {}
-
-void DelayBuffer::Delay(float* const* data, size_t length) {
- size_t sample_index = read_index_;
- for (size_t i = 0u; i < buffer_.size(); ++i) {
- sample_index = read_index_;
- for (size_t j = 0u; j < length; ++j) {
- float swap = data[i][j];
- data[i][j] = buffer_[i][sample_index];
- buffer_[i][sample_index] = swap;
- if (++sample_index == buffer_.size()) {
- sample_index = 0u;
- }
- }
- }
- read_index_ = sample_index;
-}
-
-} // namespace intelligibility
-
-} // namespace webrtc
diff --git a/modules/audio_processing/intelligibility/intelligibility_utils.h b/modules/audio_processing/intelligibility/intelligibility_utils.h
deleted file mode 100644
index 4dc17d5..0000000
--- a/modules/audio_processing/intelligibility/intelligibility_utils.h
+++ /dev/null
@@ -1,86 +0,0 @@
-/*
- * Copyright (c) 2014 The WebRTC project authors. All Rights Reserved.
- *
- * Use of this source code is governed by a BSD-style license
- * that can be found in the LICENSE file in the root of the source
- * tree. An additional intellectual property rights grant can be found
- * in the file PATENTS. All contributing project authors may
- * be found in the AUTHORS file in the root of the source tree.
- */
-
-#ifndef MODULES_AUDIO_PROCESSING_INTELLIGIBILITY_INTELLIGIBILITY_UTILS_H_
-#define MODULES_AUDIO_PROCESSING_INTELLIGIBILITY_INTELLIGIBILITY_UTILS_H_
-
-#include <complex>
-#include <vector>
-
-namespace webrtc {
-
-namespace intelligibility {
-
-// Internal helper for computing the power of a stream of arrays.
-// The result is an array of power per position: the i-th power is the power of
-// the stream of data on the i-th positions in the input arrays.
-template <typename T>
-class PowerEstimator {
- public:
- // Construct an instance for the given input array length (|freqs|), with the
- // appropriate parameters. |decay| is the forgetting factor.
- PowerEstimator(size_t freqs, float decay);
-
- // Add a new data point to the series.
- void Step(const T* data);
-
- // The current power array.
- const std::vector<float>& power() { return power_; };
-
- private:
- // The current power array.
- std::vector<float> power_;
-
- const float decay_;
-};
-
-// Helper class for smoothing gain changes. On each application step, the
-// currently used gains are changed towards a set of settable target gains,
-// constrained by a limit on the relative changes.
-class GainApplier {
- public:
- GainApplier(size_t freqs, float relative_change_limit);
-
- ~GainApplier();
-
- // Copy |in_block| to |out_block|, multiplied by the current set of gains,
- // and step the current set of gains towards the target set.
- void Apply(const std::complex<float>* in_block,
- std::complex<float>* out_block);
-
- // Return the current target gain set. Modify this array to set the targets.
- float* target() { return target_.data(); }
-
- private:
- const size_t num_freqs_;
- const float relative_change_limit_;
- std::vector<float> target_;
- std::vector<float> current_;
-};
-
-// Helper class to delay a signal by an integer number of samples.
-class DelayBuffer {
- public:
- DelayBuffer(size_t delay, size_t num_channels);
-
- ~DelayBuffer();
-
- void Delay(float* const* data, size_t length);
-
- private:
- std::vector<std::vector<float>> buffer_;
- size_t read_index_;
-};
-
-} // namespace intelligibility
-
-} // namespace webrtc
-
-#endif // MODULES_AUDIO_PROCESSING_INTELLIGIBILITY_INTELLIGIBILITY_UTILS_H_
diff --git a/modules/audio_processing/intelligibility/intelligibility_utils_unittest.cc b/modules/audio_processing/intelligibility/intelligibility_utils_unittest.cc
deleted file mode 100644
index fea394c..0000000
--- a/modules/audio_processing/intelligibility/intelligibility_utils_unittest.cc
+++ /dev/null
@@ -1,79 +0,0 @@
-/*
- * Copyright (c) 2015 The WebRTC project authors. All Rights Reserved.
- *
- * Use of this source code is governed by a BSD-style license
- * that can be found in the LICENSE file in the root of the source
- * tree. An additional intellectual property rights grant can be found
- * in the file PATENTS. All contributing project authors may
- * be found in the AUTHORS file in the root of the source tree.
- */
-
-#include <cmath>
-#include <complex>
-#include <vector>
-
-#include "modules/audio_processing/intelligibility/intelligibility_utils.h"
-#include "rtc_base/arraysize.h"
-#include "test/gtest.h"
-
-namespace webrtc {
-
-namespace intelligibility {
-
-std::vector<std::vector<std::complex<float>>> GenerateTestData(size_t freqs,
- size_t samples) {
- std::vector<std::vector<std::complex<float>>> data(samples);
- for (size_t i = 0; i < samples; ++i) {
- for (size_t j = 0; j < freqs; ++j) {
- const float val = 0.99f / ((i + 1) * (j + 1));
- data[i].push_back(std::complex<float>(val, val));
- }
- }
- return data;
-}
-
-// Tests PowerEstimator, for all power step types.
-TEST(IntelligibilityUtilsTest, TestPowerEstimator) {
- const size_t kFreqs = 10;
- const size_t kSamples = 100;
- const float kDecay = 0.5f;
- const std::vector<std::vector<std::complex<float>>> test_data(
- GenerateTestData(kFreqs, kSamples));
- PowerEstimator<std::complex<float>> power_estimator(kFreqs, kDecay);
- EXPECT_EQ(0, power_estimator.power()[0]);
-
- // Makes sure Step is doing something.
- power_estimator.Step(test_data[0].data());
- for (size_t i = 1; i < kSamples; ++i) {
- power_estimator.Step(test_data[i].data());
- for (size_t j = 0; j < kFreqs; ++j) {
- EXPECT_GE(power_estimator.power()[j], 0.f);
- EXPECT_LE(power_estimator.power()[j], 1.f);
- }
- }
-}
-
-// Tests gain applier.
-TEST(IntelligibilityUtilsTest, TestGainApplier) {
- const size_t kFreqs = 10;
- const size_t kSamples = 100;
- const float kChangeLimit = 0.1f;
- GainApplier gain_applier(kFreqs, kChangeLimit);
- const std::vector<std::vector<std::complex<float>>> in_data(
- GenerateTestData(kFreqs, kSamples));
- std::vector<std::vector<std::complex<float>>> out_data(
- GenerateTestData(kFreqs, kSamples));
- for (size_t i = 0; i < kSamples; ++i) {
- gain_applier.Apply(in_data[i].data(), out_data[i].data());
- for (size_t j = 0; j < kFreqs; ++j) {
- EXPECT_GT(out_data[i][j].real(), 0.f);
- EXPECT_LT(out_data[i][j].real(), 1.f);
- EXPECT_GT(out_data[i][j].imag(), 0.f);
- EXPECT_LT(out_data[i][j].imag(), 1.f);
- }
- }
-}
-
-} // namespace intelligibility
-
-} // namespace webrtc
diff --git a/modules/audio_processing/intelligibility/test/intelligibility_proc.cc b/modules/audio_processing/intelligibility/test/intelligibility_proc.cc
deleted file mode 100644
index b90449c..0000000
--- a/modules/audio_processing/intelligibility/test/intelligibility_proc.cc
+++ /dev/null
@@ -1,96 +0,0 @@
-/*
- * Copyright (c) 2014 The WebRTC project authors. All Rights Reserved.
- *
- * Use of this source code is governed by a BSD-style license
- * that can be found in the LICENSE file in the root of the source
- * tree. An additional intellectual property rights grant can be found
- * in the file PATENTS. All contributing project authors may
- * be found in the AUTHORS file in the root of the source tree.
- */
-
-#include "common_audio/channel_buffer.h"
-#include "common_audio/include/audio_util.h"
-#include "common_audio/wav_file.h"
-#include "modules/audio_processing/audio_buffer.h"
-#include "modules/audio_processing/intelligibility/intelligibility_enhancer.h"
-#include "modules/audio_processing/noise_suppression_impl.h"
-#include "rtc_base/criticalsection.h"
-#include "rtc_base/flags.h"
-
-using std::complex;
-
-namespace webrtc {
-namespace {
-
-DEFINE_string(clear_file, "speech.wav", "Input file with clear speech.");
-DEFINE_string(noise_file, "noise.wav", "Input file with noise data.");
-DEFINE_string(out_file, "proc_enhanced.wav", "Enhanced output file.");
-DEFINE_bool(help, false, "Print this message.");
-
-int int_main(int argc, char* argv[]) {
- if (rtc::FlagList::SetFlagsFromCommandLine(&argc, argv, true)) {
- return 1;
- }
- if (FLAG_help) {
- rtc::FlagList::Print(nullptr, false);
- return 0;
- }
- if (argc != 1) {
- printf("\n\nInput files must be little-endian 16-bit signed raw PCM.\n");
- return 0;
- }
-
- WavReader in_file(FLAG_clear_file);
- WavReader noise_file(FLAG_noise_file);
- WavWriter out_file(FLAG_out_file, in_file.sample_rate(),
- in_file.num_channels());
- rtc::CriticalSection crit;
- NoiseSuppressionImpl ns(&crit);
- IntelligibilityEnhancer enh(in_file.sample_rate(), in_file.num_channels(), 1u,
- NoiseSuppressionImpl::num_noise_bins());
- ns.Initialize(noise_file.num_channels(), noise_file.sample_rate());
- ns.Enable(true);
- const size_t in_samples = noise_file.sample_rate() / 100;
- const size_t noise_samples = noise_file.sample_rate() / 100;
- std::vector<float> in(in_samples * in_file.num_channels());
- std::vector<float> noise(noise_samples * noise_file.num_channels());
- ChannelBuffer<float> in_buf(in_samples, in_file.num_channels());
- ChannelBuffer<float> noise_buf(noise_samples, noise_file.num_channels());
- AudioBuffer capture_audio(noise_samples, noise_file.num_channels(),
- noise_samples, noise_file.num_channels(),
- noise_samples);
- AudioBuffer render_audio(in_samples, in_file.num_channels(), in_samples,
- in_file.num_channels(), in_samples);
- StreamConfig noise_config(noise_file.sample_rate(),
- noise_file.num_channels());
- StreamConfig in_config(in_file.sample_rate(), in_file.num_channels());
- while (in_file.ReadSamples(in.size(), in.data()) == in.size() &&
- noise_file.ReadSamples(noise.size(), noise.data()) == noise.size()) {
- FloatS16ToFloat(noise.data(), noise.size(), noise.data());
- FloatS16ToFloat(in.data(), in.size(), in.data());
- Deinterleave(in.data(), in_buf.num_frames(), in_buf.num_channels(),
- in_buf.channels());
- Deinterleave(noise.data(), noise_buf.num_frames(), noise_buf.num_channels(),
- noise_buf.channels());
- capture_audio.CopyFrom(noise_buf.channels(), noise_config);
- render_audio.CopyFrom(in_buf.channels(), in_config);
- ns.AnalyzeCaptureAudio(&capture_audio);
- ns.ProcessCaptureAudio(&capture_audio);
- enh.SetCaptureNoiseEstimate(ns.NoiseEstimate(), 1);
- enh.ProcessRenderAudio(&render_audio);
- render_audio.CopyTo(in_config, in_buf.channels());
- Interleave(in_buf.channels(), in_buf.num_frames(), in_buf.num_channels(),
- in.data());
- FloatToFloatS16(in.data(), in.size(), in.data());
- out_file.WriteSamples(in.data(), in.size());
- }
-
- return 0;
-}
-
-} // namespace
-} // namespace webrtc
-
-int main(int argc, char* argv[]) {
- return webrtc::int_main(argc, argv);
-}
diff --git a/modules/audio_processing/test/aec_dump_based_simulator.cc b/modules/audio_processing/test/aec_dump_based_simulator.cc
index fe7197c..743bbd0 100644
--- a/modules/audio_processing/test/aec_dump_based_simulator.cc
+++ b/modules/audio_processing/test/aec_dump_based_simulator.cc
@@ -427,16 +427,6 @@
}
}
- if (msg.has_intelligibility_enhancer_enabled() || settings_.use_ie) {
- bool enable = settings_.use_ie ? *settings_.use_ie
- : msg.intelligibility_enhancer_enabled();
- config.Set<Intelligibility>(new Intelligibility(enable));
- if (settings_.use_verbose_logging) {
- std::cout << " intelligibility_enhancer_enabled: "
- << (enable ? "true" : "false") << std::endl;
- }
- }
-
if (msg.has_hpf_enabled() || settings_.use_hpf) {
bool enable = settings_.use_hpf ? *settings_.use_hpf : msg.hpf_enabled();
apm_config.high_pass_filter.enabled = enable;
diff --git a/modules/audio_processing/test/audio_processing_simulator.cc b/modules/audio_processing/test/audio_processing_simulator.cc
index c860251..7ea7660 100644
--- a/modules/audio_processing/test/audio_processing_simulator.cc
+++ b/modules/audio_processing/test/audio_processing_simulator.cc
@@ -678,9 +678,6 @@
if (settings_.use_ts) {
config.Set<ExperimentalNs>(new ExperimentalNs(*settings_.use_ts));
}
- if (settings_.use_ie) {
- config.Set<Intelligibility>(new Intelligibility(*settings_.use_ie));
- }
if (settings_.use_agc2) {
apm_config.gain_controller2.enabled = *settings_.use_agc2;
apm_config.gain_controller2.fixed_gain_db = settings_.agc2_fixed_gain_db;
diff --git a/modules/audio_processing/test/audioproc_float_impl.cc b/modules/audio_processing/test/audioproc_float_impl.cc
index 57190ea..42d8093 100644
--- a/modules/audio_processing/test/audioproc_float_impl.cc
+++ b/modules/audio_processing/test/audioproc_float_impl.cc
@@ -87,9 +87,6 @@
DEFINE_int(ts,
kParameterNotSpecifiedValue,
"Activate (1) or deactivate(0) the transient suppressor");
-DEFINE_int(ie,
- kParameterNotSpecifiedValue,
- "Activate (1) or deactivate(0) the intelligibility enhancer");
DEFINE_int(vad,
kParameterNotSpecifiedValue,
"Activate (1) or deactivate(0) the voice activity detector");
@@ -247,7 +244,6 @@
SetSettingIfFlagSet(FLAG_hpf, &settings.use_hpf);
SetSettingIfFlagSet(FLAG_ns, &settings.use_ns);
SetSettingIfFlagSet(FLAG_ts, &settings.use_ts);
- SetSettingIfFlagSet(FLAG_ie, &settings.use_ie);
SetSettingIfFlagSet(FLAG_vad, &settings.use_vad);
SetSettingIfFlagSet(FLAG_le, &settings.use_le);
SetSettingIfSpecified(FLAG_aec_suppression_level,
diff --git a/modules/audio_processing/test/debug_dump_replayer.cc b/modules/audio_processing/test/debug_dump_replayer.cc
index d88330a..c7767f7 100644
--- a/modules/audio_processing/test/debug_dump_replayer.cc
+++ b/modules/audio_processing/test/debug_dump_replayer.cc
@@ -186,10 +186,6 @@
config.Set<ExtendedFilter>(
new ExtendedFilter(msg.aec_extended_filter_enabled()));
- RTC_CHECK(msg.has_intelligibility_enhancer_enabled());
- config.Set<Intelligibility>(
- new Intelligibility(msg.intelligibility_enhancer_enabled()));
-
// We only create APM once, since changes on these fields should not
// happen in current implementation.
if (!apm_.get()) {
diff --git a/modules/audio_processing/test/py_quality_assessment/README.md b/modules/audio_processing/test/py_quality_assessment/README.md
index 6fa0b70..4156112 100644
--- a/modules/audio_processing/test/py_quality_assessment/README.md
+++ b/modules/audio_processing/test/py_quality_assessment/README.md
@@ -95,7 +95,7 @@
one](https://matplotlib.org/mpl_examples/pylab_examples/boxplot_demo_06.png).
Suppose some scores come from running the APM simulator `audioproc_f` with
-or without the intelligibility enhancer: `--ie=1` or `--ie=0`. Then two boxplots
+or without the level controller: `--lc=1` or `--lc=0`. Then two boxplots
side by side can be generated with
```
@@ -103,7 +103,7 @@
-o /path/to/output
-v <score_name>
-n /path/to/dir/with/apm_configs
- -z ie
+ -z lc
```
## Troubleshooting
diff --git a/modules/audio_processing/test/py_quality_assessment/apm_quality_assessment_gencfgs.py b/modules/audio_processing/test/py_quality_assessment/apm_quality_assessment_gencfgs.py
index 4017747..698579b 100755
--- a/modules/audio_processing/test/py_quality_assessment/apm_quality_assessment_gencfgs.py
+++ b/modules/audio_processing/test/py_quality_assessment/apm_quality_assessment_gencfgs.py
@@ -36,7 +36,6 @@
settings.use_bf = rtc::Optional<bool>(false);
settings.use_ed = rtc::Optional<bool>(false);
settings.use_hpf = rtc::Optional<bool>(true);
- settings.use_ie = rtc::Optional<bool>(false);
settings.use_le = rtc::Optional<bool>(true);
settings.use_ns = rtc::Optional<bool>(true);
settings.use_ts = rtc::Optional<bool>(true);
@@ -83,7 +82,6 @@
'with_drift_compensation': {'-drift_compensation': 1,},
'with_residual_echo_detector': {'-ed': 1,},
'with_AEC_extended_filter': {'-extended_filter': 1,},
- 'with_intelligibility_enhancer': {'-ie': 1,},
'with_LC': {'-lc': 1,},
'with_refined_adaptive_filter': {'-refined_adaptive_filter': 1,},
}
diff --git a/rtc_tools/unpack_aecdump/unpack.cc b/rtc_tools/unpack_aecdump/unpack.cc
index 44f8346..0367cc4 100644
--- a/rtc_tools/unpack_aecdump/unpack.cc
+++ b/rtc_tools/unpack_aecdump/unpack.cc
@@ -289,7 +289,6 @@
PRINT_CONFIG(ns_enabled);
PRINT_CONFIG(ns_level);
PRINT_CONFIG(transient_suppression_enabled);
- PRINT_CONFIG(intelligibility_enhancer_enabled);
PRINT_CONFIG(pre_amplifier_enabled);
PRINT_CONFIG_FLOAT(pre_amplifier_fixed_gain_factor);
diff --git a/test/fuzzers/audio_processing_configs_fuzzer.cc b/test/fuzzers/audio_processing_configs_fuzzer.cc
index bcbfabb..e24f837 100644
--- a/test/fuzzers/audio_processing_configs_fuzzer.cc
+++ b/test/fuzzers/audio_processing_configs_fuzzer.cc
@@ -63,7 +63,7 @@
bool ef = fuzz_data->ReadOrDefaultValue(true);
bool raf = fuzz_data->ReadOrDefaultValue(true);
static_cast<void>(fuzz_data->ReadOrDefaultValue(true));
- bool ie = fuzz_data->ReadOrDefaultValue(true);
+ static_cast<void>(fuzz_data->ReadOrDefaultValue(true));
bool red = fuzz_data->ReadOrDefaultValue(true);
bool hpf = fuzz_data->ReadOrDefaultValue(true);
bool aec3 = fuzz_data->ReadOrDefaultValue(true);
@@ -123,7 +123,6 @@
config.Set<ExtendedFilter>(new ExtendedFilter(ef));
config.Set<RefinedAdaptiveFilter>(new RefinedAdaptiveFilter(raf));
config.Set<DelayAgnostic>(new DelayAgnostic(true));
- config.Set<Intelligibility>(new Intelligibility(ie));
std::unique_ptr<AudioProcessing> apm(
AudioProcessingBuilder()
diff --git a/tools_webrtc/mb/mb_config.pyl b/tools_webrtc/mb/mb_config.pyl
index ebdafe1..56c8583 100644
--- a/tools_webrtc/mb/mb_config.pyl
+++ b/tools_webrtc/mb/mb_config.pyl
@@ -53,8 +53,6 @@
# "More configs" bots will build all the following configs in sequence.
# This is using MB's "phases" feature.
'Linux (more configs)': {
- 'intelligibility_enhancer_no_include_tests':
- 'intelligibility_enhancer_no_include_tests_x64',
'bwe_test_logging':
'bwe_test_logging_x64',
'dummy_audio_file_devices_no_protobuf':
@@ -74,8 +72,6 @@
'Android32 Builder x86 (dbg)': 'android_debug_static_bot_x86',
'Android64 Builder x64 (dbg)': 'android_debug_static_bot_x64',
'Android32 (more configs)': {
- 'intelligibility_enhancer_no_include_tests':
- 'intelligibility_enhancer_no_include_tests_android_arm',
'bwe_test_logging':
'bwe_test_logging_android_arm',
'dummy_audio_file_devices_no_protobuf':
@@ -95,8 +91,6 @@
'Win64 Release (Clang)': 'win_clang_release_bot_x64',
'Win32 ASan': 'win_asan_clang_release_bot_x86',
'Win (more configs)': {
- 'intelligibility_enhancer_no_include_tests':
- 'intelligibility_enhancer_no_include_tests_x86',
'bwe_test_logging':
'bwe_test_logging_x86',
'dummy_audio_file_devices_no_protobuf':
@@ -186,8 +180,6 @@
'linux_experimental': 'release_bot_x64',
'linux_libfuzzer_rel': 'libfuzzer_asan_release_bot_x64',
'linux_more_configs': {
- 'intelligibility_enhancer_no_include_tests':
- 'intelligibility_enhancer_no_include_tests_x64',
'bwe_test_logging':
'bwe_test_logging_x64',
'dummy_audio_file_devices_no_protobuf':
@@ -209,8 +201,6 @@
'android_arm64_rel': 'android_release_bot_arm64',
'android_experimental': 'android_release_bot_arm',
'android_more_configs': {
- 'intelligibility_enhancer_no_include_tests':
- 'intelligibility_enhancer_no_include_tests_android_arm',
'bwe_test_logging':
'bwe_test_logging_android_arm',
'dummy_audio_file_devices_no_protobuf':
@@ -237,8 +227,6 @@
'win_x64_win8': 'debug_bot_x64',
'win_x64_win10': 'debug_bot_x64',
'win_more_configs': {
- 'intelligibility_enhancer_no_include_tests':
- 'intelligibility_enhancer_no_include_tests_x86',
'bwe_test_logging':
'bwe_test_logging_x86',
'dummy_audio_file_devices_no_protobuf':
@@ -402,9 +390,6 @@
],
# More configs
- 'intelligibility_enhancer_no_include_tests_x64': [
- 'debug_bot', 'x64', 'intelligibility_enhancer', 'no_include_tests'
- ],
'bwe_test_logging_x64': [
'debug_bot', 'x64', 'bwe_test_logging'
],
@@ -415,9 +400,6 @@
'debug_bot', 'x64', 'rtti', 'no_sctp'
],
- 'intelligibility_enhancer_no_include_tests_x86': [
- 'debug_bot', 'x86', 'intelligibility_enhancer', 'no_include_tests'
- ],
'bwe_test_logging_x86': [
'debug_bot', 'x86', 'bwe_test_logging'
],
@@ -428,10 +410,6 @@
'debug_bot', 'x86', 'rtti', 'no_sctp'
],
- 'intelligibility_enhancer_no_include_tests_android_arm': [
- 'android', 'debug_static_bot', 'arm',
- 'intelligibility_enhancer', 'no_include_tests'
- ],
'bwe_test_logging_android_arm': [
'android', 'debug_static_bot', 'arm', 'bwe_test_logging'
],
@@ -586,14 +564,6 @@
'gn_args': 'target_cpu="x86"',
},
- 'intelligibility_enhancer': {
- 'gn_args': 'rtc_enable_intelligibility_enhancer=true',
- },
-
- 'no_include_tests': {
- 'gn_args': 'rtc_include_tests=false',
- },
-
'bwe_test_logging': {
'gn_args': 'rtc_enable_bwe_test_logging=true',
},
diff --git a/webrtc.gni b/webrtc.gni
index c43ebd3..409382b 100644
--- a/webrtc.gni
+++ b/webrtc.gni
@@ -75,9 +75,6 @@
# Selects fixed-point code where possible.
rtc_prefer_fixed_point = false
- # Disable the code for the intelligibility enhancer by default.
- rtc_enable_intelligibility_enhancer = false
-
# Enable when an external authentication mechanism is used for performing
# packet authentication for RTP packets instead of libsrtp.
rtc_enable_external_auth = build_with_chromium