Surface the IntelligibilityEnhancer on MediaConstraints
R=henrika@webrtc.org, peah@webrtc.org, tommi@webrtc.org
Review URL: https://codereview.webrtc.org/1952123003 .
Cr-Commit-Position: refs/heads/master@{#12763}
diff --git a/webrtc/api/localaudiosource.cc b/webrtc/api/localaudiosource.cc
index 3b22ad1..9da9fd2 100644
--- a/webrtc/api/localaudiosource.cc
+++ b/webrtc/api/localaudiosource.cc
@@ -47,6 +47,8 @@
options->noise_suppression},
{MediaConstraintsInterface::kExperimentalNoiseSuppression,
options->experimental_ns},
+ {MediaConstraintsInterface::kIntelligibilityEnhancer,
+ options->intelligibility_enhancer},
{MediaConstraintsInterface::kHighpassFilter, options->highpass_filter},
{MediaConstraintsInterface::kTypingNoiseDetection,
options->typing_detection},
diff --git a/webrtc/api/mediaconstraintsinterface.cc b/webrtc/api/mediaconstraintsinterface.cc
index a567870..6a014a2 100644
--- a/webrtc/api/mediaconstraintsinterface.cc
+++ b/webrtc/api/mediaconstraintsinterface.cc
@@ -46,6 +46,8 @@
"googNoiseSuppression";
const char MediaConstraintsInterface::kExperimentalNoiseSuppression[] =
"googNoiseSuppression2";
+const char MediaConstraintsInterface::kIntelligibilityEnhancer[] =
+ "intelligibilityEnhancer";
const char MediaConstraintsInterface::kHighpassFilter[] =
"googHighpassFilter";
const char MediaConstraintsInterface::kTypingNoiseDetection[] =
diff --git a/webrtc/api/mediaconstraintsinterface.h b/webrtc/api/mediaconstraintsinterface.h
index 3db6e26..13560dd 100644
--- a/webrtc/api/mediaconstraintsinterface.h
+++ b/webrtc/api/mediaconstraintsinterface.h
@@ -73,6 +73,7 @@
static const char kExperimentalAutoGainControl[]; // googAutoGainControl2
static const char kNoiseSuppression[]; // googNoiseSuppression
static const char kExperimentalNoiseSuppression[]; // googNoiseSuppression2
+ static const char kIntelligibilityEnhancer[]; // intelligibilityEnhancer
static const char kHighpassFilter[]; // googHighpassFilter
static const char kTypingNoiseDetection[]; // googTypingNoiseDetection
static const char kAudioMirroring[]; // googAudioMirroring
diff --git a/webrtc/media/base/mediachannel.h b/webrtc/media/base/mediachannel.h
index cdbf239..5434709 100644
--- a/webrtc/media/base/mediachannel.h
+++ b/webrtc/media/base/mediachannel.h
@@ -157,6 +157,7 @@
SetFrom(&extended_filter_aec, change.extended_filter_aec);
SetFrom(&delay_agnostic_aec, change.delay_agnostic_aec);
SetFrom(&experimental_ns, change.experimental_ns);
+ SetFrom(&intelligibility_enhancer, change.intelligibility_enhancer);
SetFrom(&tx_agc_target_dbov, change.tx_agc_target_dbov);
SetFrom(&tx_agc_digital_compression_gain,
change.tx_agc_digital_compression_gain);
@@ -181,6 +182,7 @@
extended_filter_aec == o.extended_filter_aec &&
delay_agnostic_aec == o.delay_agnostic_aec &&
experimental_ns == o.experimental_ns &&
+ intelligibility_enhancer == o.intelligibility_enhancer &&
adjust_agc_delta == o.adjust_agc_delta &&
tx_agc_target_dbov == o.tx_agc_target_dbov &&
tx_agc_digital_compression_gain == o.tx_agc_digital_compression_gain &&
@@ -210,6 +212,7 @@
ost << ToStringIfSet("extended_filter_aec", extended_filter_aec);
ost << ToStringIfSet("delay_agnostic_aec", delay_agnostic_aec);
ost << ToStringIfSet("experimental_ns", experimental_ns);
+ ost << ToStringIfSet("intelligibility_enhancer", intelligibility_enhancer);
ost << ToStringIfSet("tx_agc_target_dbov", tx_agc_target_dbov);
ost << ToStringIfSet("tx_agc_digital_compression_gain",
tx_agc_digital_compression_gain);
@@ -244,6 +247,7 @@
rtc::Optional<bool> extended_filter_aec;
rtc::Optional<bool> delay_agnostic_aec;
rtc::Optional<bool> experimental_ns;
+ rtc::Optional<bool> intelligibility_enhancer;
// Note that tx_agc_* only applies to non-experimental AGC.
rtc::Optional<uint16_t> tx_agc_target_dbov;
rtc::Optional<uint16_t> tx_agc_digital_compression_gain;
diff --git a/webrtc/media/engine/webrtcvoiceengine.cc b/webrtc/media/engine/webrtcvoiceengine.cc
index 3ee5eb6..09a072a 100644
--- a/webrtc/media/engine/webrtcvoiceengine.cc
+++ b/webrtc/media/engine/webrtcvoiceengine.cc
@@ -577,6 +577,7 @@
options.extended_filter_aec = rtc::Optional<bool>(false);
options.delay_agnostic_aec = rtc::Optional<bool>(false);
options.experimental_ns = rtc::Optional<bool>(false);
+ options.intelligibility_enhancer = rtc::Optional<bool>(false);
bool error = ApplyOptions(options);
RTC_DCHECK(error);
}
@@ -746,11 +747,20 @@
}
}
+ if (options.intelligibility_enhancer) {
+ intelligibility_enhancer_ = options.intelligibility_enhancer;
+ }
+ if (intelligibility_enhancer_ && *intelligibility_enhancer_) {
+ LOG(LS_INFO) << "Enabling NS when Intelligibility Enhancer is active.";
+ options.noise_suppression = intelligibility_enhancer_;
+ }
+
if (options.noise_suppression) {
- const bool built_in_ns = adm()->BuiltInNSIsAvailable();
- if (built_in_ns) {
- if (adm()->EnableBuiltInNS(*options.noise_suppression) == 0 &&
- *options.noise_suppression) {
+ if (adm()->BuiltInNSIsAvailable()) {
+ bool builtin_ns =
+ *options.noise_suppression &&
+ !(intelligibility_enhancer_ && *intelligibility_enhancer_);
+ if (adm()->EnableBuiltInNS(builtin_ns) == 0 && builtin_ns) {
// Disable internal software NS if built-in NS is enabled,
// i.e., replace the software NS with the built-in NS.
options.noise_suppression = rtc::Optional<bool>(false);
@@ -843,6 +853,13 @@
new webrtc::ExperimentalNs(*experimental_ns_));
}
+ if (intelligibility_enhancer_) {
+ LOG(LS_INFO) << "Intelligibility Enhancer is enabled? "
+ << *intelligibility_enhancer_;
+ config.Set<webrtc::Intelligibility>(
+ new webrtc::Intelligibility(*intelligibility_enhancer_));
+ }
+
// We check audioproc for the benefit of tests, since FakeWebRtcVoiceEngine
// returns NULL on audio_processing().
webrtc::AudioProcessing* audioproc = voe_wrapper_->base()->audio_processing();
diff --git a/webrtc/media/engine/webrtcvoiceengine.h b/webrtc/media/engine/webrtcvoiceengine.h
index 162abd1..5b43534 100644
--- a/webrtc/media/engine/webrtcvoiceengine.h
+++ b/webrtc/media/engine/webrtcvoiceengine.h
@@ -121,13 +121,14 @@
bool is_dumping_aec_ = false;
webrtc::AgcConfig default_agc_config_;
- // Cache received extended_filter_aec, delay_agnostic_aec and experimental_ns
- // values, and apply them in case they are missing in the audio options. We
- // need to do this because SetExtraOptions() will revert to defaults for
- // options which are not provided.
+ // Cache received extended_filter_aec, delay_agnostic_aec, experimental_ns and
+ // intelligibility_enhancer values, and apply them in case they are missing
+ // in the audio options. We need to do this because SetExtraOptions() will
+ // revert to defaults for options which are not provided.
rtc::Optional<bool> extended_filter_aec_;
rtc::Optional<bool> delay_agnostic_aec_;
rtc::Optional<bool> experimental_ns_;
+ rtc::Optional<bool> intelligibility_enhancer_;
RTC_DISALLOW_IMPLICIT_CONSTRUCTORS(WebRtcVoiceEngine);
};
diff --git a/webrtc/modules/audio_processing/audio_processing_impl.cc b/webrtc/modules/audio_processing/audio_processing_impl.cc
index d89cc33..e75b328 100644
--- a/webrtc/modules/audio_processing/audio_processing_impl.cc
+++ b/webrtc/modules/audio_processing/audio_processing_impl.cc
@@ -163,12 +163,10 @@
private_submodules_(new ApmPrivateSubmodules(beamformer)),
constants_(config.Get<ExperimentalAgc>().startup_min_volume,
#if defined(WEBRTC_ANDROID) || defined(WEBRTC_IOS)
- false,
+ false),
#else
- config.Get<ExperimentalAgc>().enabled,
+ config.Get<ExperimentalAgc>().enabled),
#endif
- config.Get<Intelligibility>().enabled),
-
#if defined(WEBRTC_ANDROID) || defined(WEBRTC_IOS)
capture_(false,
#else
@@ -176,7 +174,8 @@
#endif
config.Get<Beamforming>().array_geometry,
config.Get<Beamforming>().target_direction),
- capture_nonlocked_(config.Get<Beamforming>().enabled)
+ capture_nonlocked_(config.Get<Beamforming>().enabled,
+ config.Get<Intelligibility>().enabled)
{
{
rtc::CritScope cs_render(&crit_render_);
@@ -411,6 +410,13 @@
InitializeTransient();
}
+ if(capture_nonlocked_.intelligibility_enabled !=
+ config.Get<Intelligibility>().enabled) {
+ capture_nonlocked_.intelligibility_enabled =
+ config.Get<Intelligibility>().enabled;
+ InitializeIntelligibility();
+ }
+
#ifdef WEBRTC_ANDROID_PLATFORM_BUILD
if (capture_nonlocked_.beamformer_enabled !=
config.Get<Beamforming>().enabled) {
@@ -704,12 +710,13 @@
ca->CopyLowPassToReference();
}
public_submodules_->noise_suppression->ProcessCaptureAudio(ca);
- if (constants_.intelligibility_enabled) {
+ if (capture_nonlocked_.intelligibility_enabled) {
RTC_DCHECK(public_submodules_->noise_suppression->is_enabled());
- RTC_DCHECK(public_submodules_->gain_control->is_enabled());
+ int gain_db = public_submodules_->gain_control->is_enabled() ?
+ public_submodules_->gain_control->compression_gain_db() :
+ 0;
public_submodules_->intelligibility_enhancer->SetCaptureNoiseEstimate(
- public_submodules_->noise_suppression->NoiseEstimate(),
- public_submodules_->gain_control->compression_gain_db());
+ public_submodules_->noise_suppression->NoiseEstimate(), gain_db);
}
// Ensure that the stream delay was set before the call to the
@@ -902,7 +909,7 @@
ra->SplitIntoFrequencyBands();
}
- if (constants_.intelligibility_enabled) {
+ if (capture_nonlocked_.intelligibility_enabled) {
public_submodules_->intelligibility_enhancer->ProcessRenderAudio(
ra->split_channels_f(kBand0To8kHz), capture_nonlocked_.split_rate,
ra->num_channels());
@@ -1150,7 +1157,7 @@
}
bool AudioProcessingImpl::is_rev_processed() const {
- return constants_.intelligibility_enabled;
+ return capture_nonlocked_.intelligibility_enabled;
}
bool AudioProcessingImpl::rev_synthesis_needed() const {
@@ -1215,7 +1222,7 @@
}
void AudioProcessingImpl::InitializeIntelligibility() {
- if (constants_.intelligibility_enabled) {
+ if (capture_nonlocked_.intelligibility_enabled) {
public_submodules_->intelligibility_enhancer.reset(
new IntelligibilityEnhancer(capture_nonlocked_.split_rate,
render_.render_audio->num_channels(),
@@ -1442,6 +1449,8 @@
config.set_transient_suppression_enabled(
capture_.transient_suppressor_enabled);
+ config.set_intelligibility_enhancer_enabled(
+ capture_nonlocked_.intelligibility_enabled);
std::string experiments_description =
public_submodules_->echo_cancellation->GetExperimentsDescription();
diff --git a/webrtc/modules/audio_processing/audio_processing_impl.h b/webrtc/modules/audio_processing/audio_processing_impl.h
index 7323b85..04ddabd 100644
--- a/webrtc/modules/audio_processing/audio_processing_impl.h
+++ b/webrtc/modules/audio_processing/audio_processing_impl.h
@@ -276,16 +276,12 @@
// APM constants.
const struct ApmConstants {
- ApmConstants(int agc_startup_min_volume,
- bool use_experimental_agc,
- bool intelligibility_enabled)
+ ApmConstants(int agc_startup_min_volume, bool use_experimental_agc)
: // Format of processing streams at input/output call sites.
agc_startup_min_volume(agc_startup_min_volume),
- use_experimental_agc(use_experimental_agc),
- intelligibility_enabled(intelligibility_enabled) {}
+ use_experimental_agc(use_experimental_agc) {}
int agc_startup_min_volume;
bool use_experimental_agc;
- bool intelligibility_enabled;
} constants_;
struct ApmCaptureState {
@@ -325,11 +321,13 @@
} capture_ GUARDED_BY(crit_capture_);
struct ApmCaptureNonLockedState {
- ApmCaptureNonLockedState(bool beamformer_enabled)
+ ApmCaptureNonLockedState(bool beamformer_enabled,
+ bool intelligibility_enabled)
: fwd_proc_format(kSampleRate16kHz),
split_rate(kSampleRate16kHz),
stream_delay_ms(0),
- beamformer_enabled(beamformer_enabled) {}
+ beamformer_enabled(beamformer_enabled),
+ intelligibility_enabled(intelligibility_enabled) {}
// Only the rate and samples fields of fwd_proc_format_ are used because the
// forward processing number of channels is mutable and is tracked by the
// capture_audio_.
@@ -337,6 +335,7 @@
int split_rate;
int stream_delay_ms;
bool beamformer_enabled;
+ bool intelligibility_enabled;
} capture_nonlocked_;
struct ApmRenderState {
diff --git a/webrtc/modules/audio_processing/debug.proto b/webrtc/modules/audio_processing/debug.proto
index f796744..4417773 100644
--- a/webrtc/modules/audio_processing/debug.proto
+++ b/webrtc/modules/audio_processing/debug.proto
@@ -46,7 +46,7 @@
// Contains the configurations of various APM component. A Config message is
// added when any of the fields are changed.
message Config {
- // Next field number 18.
+ // Next field number 19.
// Acoustic echo canceler.
optional bool aec_enabled = 1;
optional bool aec_delay_agnostic_enabled = 2;
@@ -72,6 +72,8 @@
// Semicolon-separated string containing experimental feature
// descriptions.
optional string experiments_description = 17;
+ // Intelligibility Enhancer
+ optional bool intelligibility_enhancer_enabled = 18;
}
message Event {
diff --git a/webrtc/modules/audio_processing/include/audio_processing.h b/webrtc/modules/audio_processing/include/audio_processing.h
index 24be9de..2f8e48f 100644
--- a/webrtc/modules/audio_processing/include/audio_processing.h
+++ b/webrtc/modules/audio_processing/include/audio_processing.h
@@ -160,9 +160,7 @@
const SphericalPointf target_direction;
};
-// Use to enable intelligibility enhancer in audio processing. Must be provided
-// though the constructor. It will have no impact if used with
-// AudioProcessing::SetExtraOptions().
+// Use to enable intelligibility enhancer in audio processing.
//
// Note: If enabled and the reverse stream has more than one output channel,
// the reverse stream will become an upmixed mono signal.
diff --git a/webrtc/modules/audio_processing/test/debug_dump_replayer.cc b/webrtc/modules/audio_processing/test/debug_dump_replayer.cc
index fc127e6..fa76747 100644
--- a/webrtc/modules/audio_processing/test/debug_dump_replayer.cc
+++ b/webrtc/modules/audio_processing/test/debug_dump_replayer.cc
@@ -187,6 +187,10 @@
config.Set<ExtendedFilter>(
new ExtendedFilter(msg.aec_extended_filter_enabled()));
+ RTC_CHECK(msg.has_intelligibility_enhancer_enabled());
+ config.Set<Intelligibility>(
+ new Intelligibility(msg.intelligibility_enhancer_enabled()));
+
// We only create APM once, since changes on these fields should not
// happen in current implementation.
if (!apm_.get()) {
diff --git a/webrtc/modules/audio_processing/test/unpack.cc b/webrtc/modules/audio_processing/test/unpack.cc
index fbb8e85..f5c0700 100644
--- a/webrtc/modules/audio_processing/test/unpack.cc
+++ b/webrtc/modules/audio_processing/test/unpack.cc
@@ -252,6 +252,7 @@
PRINT_CONFIG(ns_enabled);
PRINT_CONFIG(ns_level);
PRINT_CONFIG(transient_suppression_enabled);
+ PRINT_CONFIG(intelligibility_enhancer_enabled);
if (msg.has_experiments_description()) {
fprintf(settings_file, " experiments_description: %s\n",
msg.experiments_description().c_str());