Integrate Intelligibility with APM

- Integrates intelligibility into audio_processing.
    - Allows modification of reverse stream if intelligibility enabled.
- Makes intelligibility available in audioproc_float test.
    - Adds reverse stream processing to audioproc_float.
- (removed) Makes intelligibility toggleable in real time in voe_cmd_test.
- Cleans up intelligibility construction, parameters, constants and dead code.

TBR=pbos@webrtc.org

Review URL: https://codereview.webrtc.org/1234463003

Cr-Commit-Position: refs/heads/master@{#9713}
diff --git a/webrtc/modules/audio_processing/audio_processing_impl.cc b/webrtc/modules/audio_processing/audio_processing_impl.cc
index 81d6c70..c9e4ddc 100644
--- a/webrtc/modules/audio_processing/audio_processing_impl.cc
+++ b/webrtc/modules/audio_processing/audio_processing_impl.cc
@@ -15,8 +15,9 @@
 
 #include "webrtc/base/checks.h"
 #include "webrtc/base/platform_file.h"
-#include "webrtc/common_audio/include/audio_util.h"
+#include "webrtc/common_audio/audio_converter.h"
 #include "webrtc/common_audio/channel_buffer.h"
+#include "webrtc/common_audio/include/audio_util.h"
 #include "webrtc/common_audio/signal_processing/include/signal_processing_library.h"
 extern "C" {
 #include "webrtc/modules/audio_processing/aec/aec_core.h"
@@ -29,6 +30,7 @@
 #include "webrtc/modules/audio_processing/echo_control_mobile_impl.h"
 #include "webrtc/modules/audio_processing/gain_control_impl.h"
 #include "webrtc/modules/audio_processing/high_pass_filter_impl.h"
+#include "webrtc/modules/audio_processing/intelligibility/intelligibility_enhancer.h"
 #include "webrtc/modules/audio_processing/level_estimator_impl.h"
 #include "webrtc/modules/audio_processing/noise_suppression_impl.h"
 #include "webrtc/modules/audio_processing/processing_component.h"
@@ -184,6 +186,7 @@
 #endif
       api_format_({{{kSampleRate16kHz, 1, false},
                     {kSampleRate16kHz, 1, false},
+                    {kSampleRate16kHz, 1, false},
                     {kSampleRate16kHz, 1, false}}}),
       fwd_proc_format_(kSampleRate16kHz),
       rev_proc_format_(kSampleRate16kHz, 1),
@@ -210,7 +213,8 @@
 #endif
       beamformer_enabled_(config.Get<Beamforming>().enabled),
       beamformer_(beamformer),
-      array_geometry_(config.Get<Beamforming>().array_geometry) {
+      array_geometry_(config.Get<Beamforming>().array_geometry),
+      intelligibility_enabled_(config.Get<Intelligibility>().enabled) {
   echo_cancellation_ = new EchoCancellationImpl(this, crit_);
   component_list_.push_back(echo_cancellation_);
 
@@ -282,11 +286,17 @@
                                     ChannelLayout output_layout,
                                     ChannelLayout reverse_layout) {
   const ProcessingConfig processing_config = {
-      {{input_sample_rate_hz, ChannelsFromLayout(input_layout),
+      {{input_sample_rate_hz,
+        ChannelsFromLayout(input_layout),
         LayoutHasKeyboard(input_layout)},
-       {output_sample_rate_hz, ChannelsFromLayout(output_layout),
+       {output_sample_rate_hz,
+        ChannelsFromLayout(output_layout),
         LayoutHasKeyboard(output_layout)},
-       {reverse_sample_rate_hz, ChannelsFromLayout(reverse_layout),
+       {reverse_sample_rate_hz,
+        ChannelsFromLayout(reverse_layout),
+        LayoutHasKeyboard(reverse_layout)},
+       {reverse_sample_rate_hz,
+        ChannelsFromLayout(reverse_layout),
         LayoutHasKeyboard(reverse_layout)}}};
 
   return Initialize(processing_config);
@@ -301,14 +311,28 @@
   const int fwd_audio_buffer_channels =
       beamformer_enabled_ ? api_format_.input_stream().num_channels()
                           : api_format_.output_stream().num_channels();
-  if (api_format_.reverse_stream().num_channels() > 0) {
+  const int rev_audio_buffer_out_num_frames =
+      api_format_.reverse_output_stream().num_frames() == 0
+          ? rev_proc_format_.num_frames()
+          : api_format_.reverse_output_stream().num_frames();
+  if (api_format_.reverse_input_stream().num_channels() > 0) {
     render_audio_.reset(new AudioBuffer(
-        api_format_.reverse_stream().num_frames(),
-        api_format_.reverse_stream().num_channels(),
+        api_format_.reverse_input_stream().num_frames(),
+        api_format_.reverse_input_stream().num_channels(),
         rev_proc_format_.num_frames(), rev_proc_format_.num_channels(),
-        rev_proc_format_.num_frames()));
+        rev_audio_buffer_out_num_frames));
+    if (rev_conversion_needed()) {
+      render_converter_ = AudioConverter::Create(
+          api_format_.reverse_input_stream().num_channels(),
+          api_format_.reverse_input_stream().num_frames(),
+          api_format_.reverse_output_stream().num_channels(),
+          api_format_.reverse_output_stream().num_frames());
+    } else {
+      render_converter_.reset(nullptr);
+    }
   } else {
     render_audio_.reset(nullptr);
+    render_converter_.reset(nullptr);
   }
   capture_audio_.reset(new AudioBuffer(
       api_format_.input_stream().num_frames(),
@@ -329,6 +353,8 @@
 
   InitializeBeamformer();
 
+  InitializeIntelligibility();
+
 #ifdef WEBRTC_AUDIOPROC_DEBUG_DUMP
   if (debug_file_->Open()) {
     int err = WriteInitMessage();
@@ -396,7 +422,8 @@
     // ...the forward stream is at 8 kHz.
     rev_proc_rate = kSampleRate8kHz;
   } else {
-    if (api_format_.reverse_stream().sample_rate_hz() == kSampleRate32kHz) {
+    if (api_format_.reverse_input_stream().sample_rate_hz() ==
+        kSampleRate32kHz) {
       // ...or the input is at 32 kHz, in which case we use the splitting
       // filter rather than the resampler.
       rev_proc_rate = kSampleRate32kHz;
@@ -624,6 +651,7 @@
   MaybeUpdateHistograms();
 
   AudioBuffer* ca = capture_audio_.get();  // For brevity.
+
   if (use_new_agc_ && gain_control_->is_enabled()) {
     agc_manager_->AnalyzePreProcess(ca->channels()[0], ca->num_channels(),
                                     fwd_proc_format_.num_frames());
@@ -634,6 +662,11 @@
     ca->SplitIntoFrequencyBands();
   }
 
+  if (intelligibility_enabled_) {
+    intelligibility_enhancer_->AnalyzeCaptureAudio(
+        ca->split_channels_f(kBand0To8kHz), split_rate_, ca->num_channels());
+  }
+
   if (beamformer_enabled_) {
     beamformer_->ProcessChunk(*ca->split_data_f(), ca->split_data_f());
     ca->set_num_channels(1);
@@ -684,50 +717,81 @@
 
 int AudioProcessingImpl::AnalyzeReverseStream(const float* const* data,
                                               int samples_per_channel,
-                                              int sample_rate_hz,
+                                              int rev_sample_rate_hz,
                                               ChannelLayout layout) {
   const StreamConfig reverse_config = {
-      sample_rate_hz, ChannelsFromLayout(layout), LayoutHasKeyboard(layout),
+      rev_sample_rate_hz, ChannelsFromLayout(layout), LayoutHasKeyboard(layout),
   };
   if (samples_per_channel != reverse_config.num_frames()) {
     return kBadDataLengthError;
   }
-  return AnalyzeReverseStream(data, reverse_config);
+  return AnalyzeReverseStream(data, reverse_config, reverse_config);
+}
+
+int AudioProcessingImpl::ProcessReverseStream(
+    const float* const* src,
+    const StreamConfig& reverse_input_config,
+    const StreamConfig& reverse_output_config,
+    float* const* dest) {
+  RETURN_ON_ERR(
+      AnalyzeReverseStream(src, reverse_input_config, reverse_output_config));
+  if (is_rev_processed()) {
+    render_audio_->CopyTo(api_format_.reverse_output_stream(), dest);
+  } else if (rev_conversion_needed()) {
+    render_converter_->Convert(src, reverse_input_config.num_samples(), dest,
+                               reverse_output_config.num_samples());
+  } else {
+    CopyAudioIfNeeded(src, reverse_input_config.num_frames(),
+                      reverse_input_config.num_channels(), dest);
+  }
+
+  return kNoError;
 }
 
 int AudioProcessingImpl::AnalyzeReverseStream(
-    const float* const* data,
-    const StreamConfig& reverse_config) {
+    const float* const* src,
+    const StreamConfig& reverse_input_config,
+    const StreamConfig& reverse_output_config) {
   CriticalSectionScoped crit_scoped(crit_);
-  if (data == NULL) {
+  if (src == NULL) {
     return kNullPointerError;
   }
 
-  if (reverse_config.num_channels() <= 0) {
+  if (reverse_input_config.num_channels() <= 0) {
     return kBadNumberChannelsError;
   }
 
   ProcessingConfig processing_config = api_format_;
-  processing_config.reverse_stream() = reverse_config;
+  processing_config.reverse_input_stream() = reverse_input_config;
+  processing_config.reverse_output_stream() = reverse_output_config;
 
   RETURN_ON_ERR(MaybeInitializeLocked(processing_config));
-  assert(reverse_config.num_frames() ==
-         api_format_.reverse_stream().num_frames());
+  assert(reverse_input_config.num_frames() ==
+         api_format_.reverse_input_stream().num_frames());
 
 #ifdef WEBRTC_AUDIOPROC_DEBUG_DUMP
   if (debug_file_->Open()) {
     event_msg_->set_type(audioproc::Event::REVERSE_STREAM);
     audioproc::ReverseStream* msg = event_msg_->mutable_reverse_stream();
     const size_t channel_size =
-        sizeof(float) * api_format_.reverse_stream().num_frames();
-    for (int i = 0; i < api_format_.reverse_stream().num_channels(); ++i)
-      msg->add_channel(data[i], channel_size);
+        sizeof(float) * api_format_.reverse_input_stream().num_frames();
+    for (int i = 0; i < api_format_.reverse_input_stream().num_channels(); ++i)
+      msg->add_channel(src[i], channel_size);
     RETURN_ON_ERR(WriteMessageToDebugFile());
   }
 #endif
 
-  render_audio_->CopyFrom(data, api_format_.reverse_stream());
-  return AnalyzeReverseStreamLocked();
+  render_audio_->CopyFrom(src, api_format_.reverse_input_stream());
+  return ProcessReverseStreamLocked();
+}
+
+int AudioProcessingImpl::ProcessReverseStream(AudioFrame* frame) {
+  RETURN_ON_ERR(AnalyzeReverseStream(frame));
+  if (is_rev_processed()) {
+    render_audio_->InterleaveTo(frame, true);
+  }
+
+  return kNoError;
 }
 
 int AudioProcessingImpl::AnalyzeReverseStream(AudioFrame* frame) {
@@ -752,12 +816,18 @@
   }
 
   ProcessingConfig processing_config = api_format_;
-  processing_config.reverse_stream().set_sample_rate_hz(frame->sample_rate_hz_);
-  processing_config.reverse_stream().set_num_channels(frame->num_channels_);
+  processing_config.reverse_input_stream().set_sample_rate_hz(
+      frame->sample_rate_hz_);
+  processing_config.reverse_input_stream().set_num_channels(
+      frame->num_channels_);
+  processing_config.reverse_output_stream().set_sample_rate_hz(
+      frame->sample_rate_hz_);
+  processing_config.reverse_output_stream().set_num_channels(
+      frame->num_channels_);
 
   RETURN_ON_ERR(MaybeInitializeLocked(processing_config));
   if (frame->samples_per_channel_ !=
-      api_format_.reverse_stream().num_frames()) {
+      api_format_.reverse_input_stream().num_frames()) {
     return kBadDataLengthError;
   }
 
@@ -771,23 +841,32 @@
     RETURN_ON_ERR(WriteMessageToDebugFile());
   }
 #endif
-
   render_audio_->DeinterleaveFrom(frame);
-  return AnalyzeReverseStreamLocked();
+  return ProcessReverseStreamLocked();
 }
 
-int AudioProcessingImpl::AnalyzeReverseStreamLocked() {
+int AudioProcessingImpl::ProcessReverseStreamLocked() {
   AudioBuffer* ra = render_audio_.get();  // For brevity.
   if (rev_proc_format_.sample_rate_hz() == kSampleRate32kHz) {
     ra->SplitIntoFrequencyBands();
   }
 
+  if (intelligibility_enabled_) {
+    intelligibility_enhancer_->ProcessRenderAudio(
+        ra->split_channels_f(kBand0To8kHz), split_rate_, ra->num_channels());
+  }
+
   RETURN_ON_ERR(echo_cancellation_->ProcessRenderAudio(ra));
   RETURN_ON_ERR(echo_control_mobile_->ProcessRenderAudio(ra));
   if (!use_new_agc_) {
     RETURN_ON_ERR(gain_control_->ProcessRenderAudio(ra));
   }
 
+  if (rev_proc_format_.sample_rate_hz() == kSampleRate32kHz &&
+      is_rev_processed()) {
+    ra->MergeFrequencyBands();
+  }
+
   return kNoError;
 }
 
@@ -1004,6 +1083,15 @@
   return false;
 }
 
+bool AudioProcessingImpl::is_rev_processed() const {
+  return intelligibility_enabled_ && intelligibility_enhancer_->active();
+}
+
+bool AudioProcessingImpl::rev_conversion_needed() const {
+  return (api_format_.reverse_input_stream() !=
+          api_format_.reverse_output_stream());
+}
+
 void AudioProcessingImpl::InitializeExperimentalAgc() {
   if (use_new_agc_) {
     if (!agc_manager_.get()) {
@@ -1036,6 +1124,16 @@
   }
 }
 
+void AudioProcessingImpl::InitializeIntelligibility() {
+  if (intelligibility_enabled_) {
+    IntelligibilityEnhancer::Config config;
+    config.sample_rate_hz = split_rate_;
+    config.num_capture_channels = capture_audio_->num_channels();
+    config.num_render_channels = render_audio_->num_channels();
+    intelligibility_enhancer_.reset(new IntelligibilityEnhancer(config));
+  }
+}
+
 void AudioProcessingImpl::MaybeUpdateHistograms() {
   static const int kMinDiffDelayMs = 60;
 
@@ -1134,9 +1232,12 @@
   msg->set_sample_rate(api_format_.input_stream().sample_rate_hz());
   msg->set_num_input_channels(api_format_.input_stream().num_channels());
   msg->set_num_output_channels(api_format_.output_stream().num_channels());
-  msg->set_num_reverse_channels(api_format_.reverse_stream().num_channels());
-  msg->set_reverse_sample_rate(api_format_.reverse_stream().sample_rate_hz());
+  msg->set_num_reverse_channels(
+      api_format_.reverse_input_stream().num_channels());
+  msg->set_reverse_sample_rate(
+      api_format_.reverse_input_stream().sample_rate_hz());
   msg->set_output_sample_rate(api_format_.output_stream().sample_rate_hz());
+  // TODO(ekmeyerson): Add reverse output fields to event_msg_.
 
   int err = WriteMessageToDebugFile();
   if (err != kNoError) {