Surface the IntelligibilityEnhancer on MediaConstraints

R=henrika@webrtc.org, peah@webrtc.org, tommi@webrtc.org

Review URL: https://codereview.webrtc.org/1952123003 .

Cr-Commit-Position: refs/heads/master@{#12763}
diff --git a/webrtc/api/localaudiosource.cc b/webrtc/api/localaudiosource.cc
index 3b22ad1..9da9fd2 100644
--- a/webrtc/api/localaudiosource.cc
+++ b/webrtc/api/localaudiosource.cc
@@ -47,6 +47,8 @@
        options->noise_suppression},
       {MediaConstraintsInterface::kExperimentalNoiseSuppression,
        options->experimental_ns},
+      {MediaConstraintsInterface::kIntelligibilityEnhancer,
+       options->intelligibility_enhancer},
       {MediaConstraintsInterface::kHighpassFilter, options->highpass_filter},
       {MediaConstraintsInterface::kTypingNoiseDetection,
        options->typing_detection},
diff --git a/webrtc/api/mediaconstraintsinterface.cc b/webrtc/api/mediaconstraintsinterface.cc
index a567870..6a014a2 100644
--- a/webrtc/api/mediaconstraintsinterface.cc
+++ b/webrtc/api/mediaconstraintsinterface.cc
@@ -46,6 +46,8 @@
     "googNoiseSuppression";
 const char MediaConstraintsInterface::kExperimentalNoiseSuppression[] =
     "googNoiseSuppression2";
+const char MediaConstraintsInterface::kIntelligibilityEnhancer[] =
+    "intelligibilityEnhancer";
 const char MediaConstraintsInterface::kHighpassFilter[] =
     "googHighpassFilter";
 const char MediaConstraintsInterface::kTypingNoiseDetection[] =
diff --git a/webrtc/api/mediaconstraintsinterface.h b/webrtc/api/mediaconstraintsinterface.h
index 3db6e26..13560dd 100644
--- a/webrtc/api/mediaconstraintsinterface.h
+++ b/webrtc/api/mediaconstraintsinterface.h
@@ -73,6 +73,7 @@
   static const char kExperimentalAutoGainControl[];  // googAutoGainControl2
   static const char kNoiseSuppression[];  // googNoiseSuppression
   static const char kExperimentalNoiseSuppression[];  // googNoiseSuppression2
+  static const char kIntelligibilityEnhancer[];  // intelligibilityEnhancer
   static const char kHighpassFilter[];  // googHighpassFilter
   static const char kTypingNoiseDetection[];  // googTypingNoiseDetection
   static const char kAudioMirroring[];  // googAudioMirroring
diff --git a/webrtc/media/base/mediachannel.h b/webrtc/media/base/mediachannel.h
index cdbf239..5434709 100644
--- a/webrtc/media/base/mediachannel.h
+++ b/webrtc/media/base/mediachannel.h
@@ -157,6 +157,7 @@
     SetFrom(&extended_filter_aec, change.extended_filter_aec);
     SetFrom(&delay_agnostic_aec, change.delay_agnostic_aec);
     SetFrom(&experimental_ns, change.experimental_ns);
+    SetFrom(&intelligibility_enhancer, change.intelligibility_enhancer);
     SetFrom(&tx_agc_target_dbov, change.tx_agc_target_dbov);
     SetFrom(&tx_agc_digital_compression_gain,
             change.tx_agc_digital_compression_gain);
@@ -181,6 +182,7 @@
         extended_filter_aec == o.extended_filter_aec &&
         delay_agnostic_aec == o.delay_agnostic_aec &&
         experimental_ns == o.experimental_ns &&
+        intelligibility_enhancer == o.intelligibility_enhancer &&
         adjust_agc_delta == o.adjust_agc_delta &&
         tx_agc_target_dbov == o.tx_agc_target_dbov &&
         tx_agc_digital_compression_gain == o.tx_agc_digital_compression_gain &&
@@ -210,6 +212,7 @@
     ost << ToStringIfSet("extended_filter_aec", extended_filter_aec);
     ost << ToStringIfSet("delay_agnostic_aec", delay_agnostic_aec);
     ost << ToStringIfSet("experimental_ns", experimental_ns);
+    ost << ToStringIfSet("intelligibility_enhancer", intelligibility_enhancer);
     ost << ToStringIfSet("tx_agc_target_dbov", tx_agc_target_dbov);
     ost << ToStringIfSet("tx_agc_digital_compression_gain",
         tx_agc_digital_compression_gain);
@@ -244,6 +247,7 @@
   rtc::Optional<bool> extended_filter_aec;
   rtc::Optional<bool> delay_agnostic_aec;
   rtc::Optional<bool> experimental_ns;
+  rtc::Optional<bool> intelligibility_enhancer;
   // Note that tx_agc_* only applies to non-experimental AGC.
   rtc::Optional<uint16_t> tx_agc_target_dbov;
   rtc::Optional<uint16_t> tx_agc_digital_compression_gain;
diff --git a/webrtc/media/engine/webrtcvoiceengine.cc b/webrtc/media/engine/webrtcvoiceengine.cc
index 3ee5eb6..09a072a 100644
--- a/webrtc/media/engine/webrtcvoiceengine.cc
+++ b/webrtc/media/engine/webrtcvoiceengine.cc
@@ -577,6 +577,7 @@
     options.extended_filter_aec = rtc::Optional<bool>(false);
     options.delay_agnostic_aec = rtc::Optional<bool>(false);
     options.experimental_ns = rtc::Optional<bool>(false);
+    options.intelligibility_enhancer = rtc::Optional<bool>(false);
     bool error = ApplyOptions(options);
     RTC_DCHECK(error);
   }
@@ -746,11 +747,20 @@
     }
   }
 
+  if (options.intelligibility_enhancer) {
+    intelligibility_enhancer_ = options.intelligibility_enhancer;
+  }
+  if (intelligibility_enhancer_ && *intelligibility_enhancer_) {
+    LOG(LS_INFO) << "Enabling NS when Intelligibility Enhancer is active.";
+    options.noise_suppression = intelligibility_enhancer_;
+  }
+
   if (options.noise_suppression) {
-    const bool built_in_ns = adm()->BuiltInNSIsAvailable();
-    if (built_in_ns) {
-      if (adm()->EnableBuiltInNS(*options.noise_suppression) == 0 &&
-          *options.noise_suppression) {
+    if (adm()->BuiltInNSIsAvailable()) {
+      bool builtin_ns =
+          *options.noise_suppression &&
+          !(intelligibility_enhancer_ && *intelligibility_enhancer_);
+      if (adm()->EnableBuiltInNS(builtin_ns) == 0 && builtin_ns) {
         // Disable internal software NS if built-in NS is enabled,
         // i.e., replace the software NS with the built-in NS.
         options.noise_suppression = rtc::Optional<bool>(false);
@@ -843,6 +853,13 @@
         new webrtc::ExperimentalNs(*experimental_ns_));
   }
 
+  if (intelligibility_enhancer_) {
+    LOG(LS_INFO) << "Intelligibility Enhancer is enabled? "
+                 << *intelligibility_enhancer_;
+    config.Set<webrtc::Intelligibility>(
+        new webrtc::Intelligibility(*intelligibility_enhancer_));
+  }
+
   // We check audioproc for the benefit of tests, since FakeWebRtcVoiceEngine
   // returns NULL on audio_processing().
   webrtc::AudioProcessing* audioproc = voe_wrapper_->base()->audio_processing();
diff --git a/webrtc/media/engine/webrtcvoiceengine.h b/webrtc/media/engine/webrtcvoiceengine.h
index 162abd1..5b43534 100644
--- a/webrtc/media/engine/webrtcvoiceengine.h
+++ b/webrtc/media/engine/webrtcvoiceengine.h
@@ -121,13 +121,14 @@
   bool is_dumping_aec_ = false;
 
   webrtc::AgcConfig default_agc_config_;
-  // Cache received extended_filter_aec, delay_agnostic_aec and experimental_ns
-  // values, and apply them in case they are missing in the audio options. We
-  // need to do this because SetExtraOptions() will revert to defaults for
-  // options which are not provided.
+  // Cache received extended_filter_aec, delay_agnostic_aec, experimental_ns and
+  // intelligibility_enhancer values, and apply them in case they are missing
+  // in the audio options. We need to do this because SetExtraOptions() will
+  // revert to defaults for options which are not provided.
   rtc::Optional<bool> extended_filter_aec_;
   rtc::Optional<bool> delay_agnostic_aec_;
   rtc::Optional<bool> experimental_ns_;
+  rtc::Optional<bool> intelligibility_enhancer_;
 
   RTC_DISALLOW_IMPLICIT_CONSTRUCTORS(WebRtcVoiceEngine);
 };
diff --git a/webrtc/modules/audio_processing/audio_processing_impl.cc b/webrtc/modules/audio_processing/audio_processing_impl.cc
index d89cc33..e75b328 100644
--- a/webrtc/modules/audio_processing/audio_processing_impl.cc
+++ b/webrtc/modules/audio_processing/audio_processing_impl.cc
@@ -163,12 +163,10 @@
       private_submodules_(new ApmPrivateSubmodules(beamformer)),
       constants_(config.Get<ExperimentalAgc>().startup_min_volume,
 #if defined(WEBRTC_ANDROID) || defined(WEBRTC_IOS)
-                 false,
+                 false),
 #else
-                 config.Get<ExperimentalAgc>().enabled,
+                 config.Get<ExperimentalAgc>().enabled),
 #endif
-                 config.Get<Intelligibility>().enabled),
-
 #if defined(WEBRTC_ANDROID) || defined(WEBRTC_IOS)
       capture_(false,
 #else
@@ -176,7 +174,8 @@
 #endif
                config.Get<Beamforming>().array_geometry,
                config.Get<Beamforming>().target_direction),
-      capture_nonlocked_(config.Get<Beamforming>().enabled)
+      capture_nonlocked_(config.Get<Beamforming>().enabled,
+                         config.Get<Intelligibility>().enabled)
 {
   {
     rtc::CritScope cs_render(&crit_render_);
@@ -411,6 +410,13 @@
     InitializeTransient();
   }
 
+  if(capture_nonlocked_.intelligibility_enabled !=
+     config.Get<Intelligibility>().enabled) {
+    capture_nonlocked_.intelligibility_enabled =
+        config.Get<Intelligibility>().enabled;
+    InitializeIntelligibility();
+  }
+
 #ifdef WEBRTC_ANDROID_PLATFORM_BUILD
   if (capture_nonlocked_.beamformer_enabled !=
           config.Get<Beamforming>().enabled) {
@@ -704,12 +710,13 @@
     ca->CopyLowPassToReference();
   }
   public_submodules_->noise_suppression->ProcessCaptureAudio(ca);
-  if (constants_.intelligibility_enabled) {
+  if (capture_nonlocked_.intelligibility_enabled) {
     RTC_DCHECK(public_submodules_->noise_suppression->is_enabled());
-    RTC_DCHECK(public_submodules_->gain_control->is_enabled());
+    int gain_db = public_submodules_->gain_control->is_enabled() ?
+                  public_submodules_->gain_control->compression_gain_db() :
+                  0;
     public_submodules_->intelligibility_enhancer->SetCaptureNoiseEstimate(
-        public_submodules_->noise_suppression->NoiseEstimate(),
-        public_submodules_->gain_control->compression_gain_db());
+        public_submodules_->noise_suppression->NoiseEstimate(), gain_db);
   }
 
   // Ensure that the stream delay was set before the call to the
@@ -902,7 +909,7 @@
     ra->SplitIntoFrequencyBands();
   }
 
-  if (constants_.intelligibility_enabled) {
+  if (capture_nonlocked_.intelligibility_enabled) {
     public_submodules_->intelligibility_enhancer->ProcessRenderAudio(
         ra->split_channels_f(kBand0To8kHz), capture_nonlocked_.split_rate,
         ra->num_channels());
@@ -1150,7 +1157,7 @@
 }
 
 bool AudioProcessingImpl::is_rev_processed() const {
-  return constants_.intelligibility_enabled;
+  return capture_nonlocked_.intelligibility_enabled;
 }
 
 bool AudioProcessingImpl::rev_synthesis_needed() const {
@@ -1215,7 +1222,7 @@
 }
 
 void AudioProcessingImpl::InitializeIntelligibility() {
-  if (constants_.intelligibility_enabled) {
+  if (capture_nonlocked_.intelligibility_enabled) {
     public_submodules_->intelligibility_enhancer.reset(
         new IntelligibilityEnhancer(capture_nonlocked_.split_rate,
                                     render_.render_audio->num_channels(),
@@ -1442,6 +1449,8 @@
 
   config.set_transient_suppression_enabled(
       capture_.transient_suppressor_enabled);
+  config.set_intelligibility_enhancer_enabled(
+      capture_nonlocked_.intelligibility_enabled);
 
   std::string experiments_description =
       public_submodules_->echo_cancellation->GetExperimentsDescription();
diff --git a/webrtc/modules/audio_processing/audio_processing_impl.h b/webrtc/modules/audio_processing/audio_processing_impl.h
index 7323b85..04ddabd 100644
--- a/webrtc/modules/audio_processing/audio_processing_impl.h
+++ b/webrtc/modules/audio_processing/audio_processing_impl.h
@@ -276,16 +276,12 @@
 
   // APM constants.
   const struct ApmConstants {
-    ApmConstants(int agc_startup_min_volume,
-                 bool use_experimental_agc,
-                 bool intelligibility_enabled)
+    ApmConstants(int agc_startup_min_volume, bool use_experimental_agc)
         :  // Format of processing streams at input/output call sites.
           agc_startup_min_volume(agc_startup_min_volume),
-          use_experimental_agc(use_experimental_agc),
-          intelligibility_enabled(intelligibility_enabled) {}
+          use_experimental_agc(use_experimental_agc) {}
     int agc_startup_min_volume;
     bool use_experimental_agc;
-    bool intelligibility_enabled;
   } constants_;
 
   struct ApmCaptureState {
@@ -325,11 +321,13 @@
   } capture_ GUARDED_BY(crit_capture_);
 
   struct ApmCaptureNonLockedState {
-    ApmCaptureNonLockedState(bool beamformer_enabled)
+    ApmCaptureNonLockedState(bool beamformer_enabled,
+                             bool intelligibility_enabled)
         : fwd_proc_format(kSampleRate16kHz),
           split_rate(kSampleRate16kHz),
           stream_delay_ms(0),
-          beamformer_enabled(beamformer_enabled) {}
+          beamformer_enabled(beamformer_enabled),
+          intelligibility_enabled(intelligibility_enabled) {}
     // Only the rate and samples fields of fwd_proc_format_ are used because the
     // forward processing number of channels is mutable and is tracked by the
     // capture_audio_.
@@ -337,6 +335,7 @@
     int split_rate;
     int stream_delay_ms;
     bool beamformer_enabled;
+    bool intelligibility_enabled;
   } capture_nonlocked_;
 
   struct ApmRenderState {
diff --git a/webrtc/modules/audio_processing/debug.proto b/webrtc/modules/audio_processing/debug.proto
index f796744..4417773 100644
--- a/webrtc/modules/audio_processing/debug.proto
+++ b/webrtc/modules/audio_processing/debug.proto
@@ -46,7 +46,7 @@
 // Contains the configurations of various APM component. A Config message is
 // added when any of the fields are changed.
 message Config {
-  // Next field number 18.
+  // Next field number 19.
   // Acoustic echo canceler.
   optional bool aec_enabled = 1;
   optional bool aec_delay_agnostic_enabled = 2;
@@ -72,6 +72,8 @@
   // Semicolon-separated string containing experimental feature
   // descriptions.
   optional string experiments_description = 17;
+  // Intelligibility Enhancer
+  optional bool intelligibility_enhancer_enabled = 18;
 }
 
 message Event {
diff --git a/webrtc/modules/audio_processing/include/audio_processing.h b/webrtc/modules/audio_processing/include/audio_processing.h
index 24be9de..2f8e48f 100644
--- a/webrtc/modules/audio_processing/include/audio_processing.h
+++ b/webrtc/modules/audio_processing/include/audio_processing.h
@@ -160,9 +160,7 @@
   const SphericalPointf target_direction;
 };
 
-// Use to enable intelligibility enhancer in audio processing. Must be provided
-// though the constructor. It will have no impact if used with
-// AudioProcessing::SetExtraOptions().
+// Use to enable intelligibility enhancer in audio processing.
 //
 // Note: If enabled and the reverse stream has more than one output channel,
 // the reverse stream will become an upmixed mono signal.
diff --git a/webrtc/modules/audio_processing/test/debug_dump_replayer.cc b/webrtc/modules/audio_processing/test/debug_dump_replayer.cc
index fc127e6..fa76747 100644
--- a/webrtc/modules/audio_processing/test/debug_dump_replayer.cc
+++ b/webrtc/modules/audio_processing/test/debug_dump_replayer.cc
@@ -187,6 +187,10 @@
   config.Set<ExtendedFilter>(
       new ExtendedFilter(msg.aec_extended_filter_enabled()));
 
+  RTC_CHECK(msg.has_intelligibility_enhancer_enabled());
+  config.Set<Intelligibility>(
+      new Intelligibility(msg.intelligibility_enhancer_enabled()));
+
   // We only create APM once, since changes on these fields should not
   // happen in current implementation.
   if (!apm_.get()) {
diff --git a/webrtc/modules/audio_processing/test/unpack.cc b/webrtc/modules/audio_processing/test/unpack.cc
index fbb8e85..f5c0700 100644
--- a/webrtc/modules/audio_processing/test/unpack.cc
+++ b/webrtc/modules/audio_processing/test/unpack.cc
@@ -252,6 +252,7 @@
       PRINT_CONFIG(ns_enabled);
       PRINT_CONFIG(ns_level);
       PRINT_CONFIG(transient_suppression_enabled);
+      PRINT_CONFIG(intelligibility_enhancer_enabled);
       if (msg.has_experiments_description()) {
         fprintf(settings_file, "  experiments_description: %s\n",
                 msg.experiments_description().c_str());