Fix the maximum native sample rate in AudioProcessing

BUG=webrtc:4983
R=andrew@webrtc.org, henrik.lundin@webrtc.org

Review URL: https://codereview.webrtc.org/1338833002 .

Cr-Commit-Position: refs/heads/master@{#10037}
diff --git a/webrtc/modules/audio_conference_mixer/source/audio_conference_mixer_impl.cc b/webrtc/modules/audio_conference_mixer/source/audio_conference_mixer_impl.cc
index da9d1fa..82480d2 100644
--- a/webrtc/modules/audio_conference_mixer/source/audio_conference_mixer_impl.cc
+++ b/webrtc/modules/audio_conference_mixer/source/audio_conference_mixer_impl.cc
@@ -290,8 +290,9 @@
 
         // We only use the limiter if it supports the output sample rate and
         // we're actually mixing multiple streams.
-        use_limiter_ = _numMixedParticipants > 1 &&
-                       _outputFrequency <= kAudioProcMaxNativeSampleRateHz;
+        use_limiter_ =
+            _numMixedParticipants > 1 &&
+            _outputFrequency <= AudioProcessing::kMaxNativeSampleRateHz;
 
         MixFromList(mixedAudio, mixList);
         MixAnonomouslyFromList(mixedAudio, additionalFramesList);
diff --git a/webrtc/modules/audio_processing/audio_processing_impl.cc b/webrtc/modules/audio_processing/audio_processing_impl.cc
index ff4128b..4ef4e6d 100644
--- a/webrtc/modules/audio_processing/audio_processing_impl.cc
+++ b/webrtc/modules/audio_processing/audio_processing_impl.cc
@@ -147,6 +147,17 @@
   int volume_;
 };
 
+const int AudioProcessing::kNativeSampleRatesHz[] = {
+    AudioProcessing::kSampleRate8kHz,
+    AudioProcessing::kSampleRate16kHz,
+    AudioProcessing::kSampleRate32kHz,
+    AudioProcessing::kSampleRate48kHz};
+const size_t AudioProcessing::kNumNativeSampleRates =
+    arraysize(AudioProcessing::kNativeSampleRatesHz);
+const int AudioProcessing::kMaxNativeSampleRateHz = AudioProcessing::
+    kNativeSampleRatesHz[AudioProcessing::kNumNativeSampleRates - 1];
+const int AudioProcessing::kMaxAECMSampleRateHz = kSampleRate16kHz;
+
 AudioProcessing* AudioProcessing::Create() {
   Config config;
   return Create(config, nullptr);
@@ -400,18 +411,16 @@
       std::min(api_format_.input_stream().sample_rate_hz(),
                api_format_.output_stream().sample_rate_hz());
   int fwd_proc_rate;
-  if (min_proc_rate > kSampleRate32kHz) {
-    fwd_proc_rate = kSampleRate48kHz;
-  } else if (min_proc_rate > kSampleRate16kHz) {
-    fwd_proc_rate = kSampleRate32kHz;
-  } else if (min_proc_rate > kSampleRate8kHz) {
-    fwd_proc_rate = kSampleRate16kHz;
-  } else {
-    fwd_proc_rate = kSampleRate8kHz;
+  for (size_t i = 0; i < kNumNativeSampleRates; ++i) {
+    fwd_proc_rate = kNativeSampleRatesHz[i];
+    if (fwd_proc_rate >= min_proc_rate) {
+      break;
+    }
   }
   // ...with one exception.
-  if (echo_control_mobile_->is_enabled() && min_proc_rate > kSampleRate16kHz) {
-    fwd_proc_rate = kSampleRate16kHz;
+  if (echo_control_mobile_->is_enabled() &&
+      min_proc_rate > kMaxAECMSampleRateHz) {
+    fwd_proc_rate = kMaxAECMSampleRateHz;
   }
 
   fwd_proc_format_ = StreamConfig(fwd_proc_rate);
@@ -592,7 +601,7 @@
     return kBadSampleRateError;
   }
   if (echo_control_mobile_->is_enabled() &&
-      frame->sample_rate_hz_ > kSampleRate16kHz) {
+      frame->sample_rate_hz_ > kMaxAECMSampleRateHz) {
     LOG(LS_ERROR) << "AECM only supports 16 or 8 kHz sample rates";
     return kUnsupportedComponentError;
   }
diff --git a/webrtc/modules/audio_processing/include/audio_processing.h b/webrtc/modules/audio_processing/include/audio_processing.h
index 445d5c8..5eb3b62 100644
--- a/webrtc/modules/audio_processing/include/audio_processing.h
+++ b/webrtc/modules/audio_processing/include/audio_processing.h
@@ -15,6 +15,7 @@
 #include <stdio.h>  // FILE
 #include <vector>
 
+#include "webrtc/base/arraysize.h"
 #include "webrtc/base/platform_file.h"
 #include "webrtc/common.h"
 #include "webrtc/modules/audio_processing/beamformer/array_util.h"
@@ -128,8 +129,6 @@
   bool enabled;
 };
 
-static const int kAudioProcMaxNativeSampleRateHz = 32000;
-
 // The Audio Processing Module (APM) provides a collection of voice processing
 // components designed for real-time communications software.
 //
@@ -471,6 +470,11 @@
     kSampleRate48kHz = 48000
   };
 
+  static const int kNativeSampleRatesHz[];
+  static const size_t kNumNativeSampleRates;
+  static const int kMaxNativeSampleRateHz;
+  static const int kMaxAECMSampleRateHz;
+
   static const int kChunkSizeMs = 10;
 };
 
diff --git a/webrtc/voice_engine/channel.cc b/webrtc/voice_engine/channel.cc
index 4ac4b89..fa44785 100644
--- a/webrtc/voice_engine/channel.cc
+++ b/webrtc/voice_engine/channel.cc
@@ -3190,19 +3190,12 @@
   CodecInst codec;
   GetSendCodec(codec);
 
-  if (!mono_recording_audio_.get()) {
-    // Temporary space for DownConvertToCodecFormat.
-    mono_recording_audio_.reset(new int16_t[kMaxMonoDataSizeSamples]);
-  }
-  DownConvertToCodecFormat(audio_data,
-                           number_of_frames,
-                           number_of_channels,
-                           sample_rate,
-                           codec.channels,
-                           codec.plfreq,
-                           mono_recording_audio_.get(),
-                           &input_resampler_,
-                           &_audioFrame);
+  // Never upsample or upmix the capture signal here. This should be done at the
+  // end of the send chain.
+  _audioFrame.sample_rate_hz_ = std::min(codec.plfreq, sample_rate);
+  _audioFrame.num_channels_ = std::min(number_of_channels, codec.channels);
+  RemixAndResample(audio_data, number_of_frames, number_of_channels,
+                   sample_rate, &input_resampler_, &_audioFrame);
 }
 
 uint32_t
diff --git a/webrtc/voice_engine/channel.h b/webrtc/voice_engine/channel.h
index d9e4575..bf792a5 100644
--- a/webrtc/voice_engine/channel.h
+++ b/webrtc/voice_engine/channel.h
@@ -499,7 +499,6 @@
     AudioLevel _outputAudioLevel;
     bool _externalTransport;
     AudioFrame _audioFrame;
-    rtc::scoped_ptr<int16_t[]> mono_recording_audio_;
     // Downsamples to the codec rate if necessary.
     PushResampler<int16_t> input_resampler_;
     FilePlayer* _inputFilePlayerPtr;
diff --git a/webrtc/voice_engine/transmit_mixer.cc b/webrtc/voice_engine/transmit_mixer.cc
index 92b9c6e..5e62939 100644
--- a/webrtc/voice_engine/transmit_mixer.cc
+++ b/webrtc/voice_engine/transmit_mixer.cc
@@ -1133,31 +1133,25 @@
   int codec_rate;
   int num_codec_channels;
   GetSendCodecInfo(&codec_rate, &num_codec_channels);
-  // TODO(ajm): This currently restricts the sample rate to 32 kHz.
-  // See: https://code.google.com/p/webrtc/issues/detail?id=3146
-  // When 48 kHz is supported natively by AudioProcessing, this will have
-  // to be changed to handle 44.1 kHz.
-  int max_sample_rate_hz = kAudioProcMaxNativeSampleRateHz;
-  if (audioproc_->echo_control_mobile()->is_enabled()) {
-    // AECM only supports 8 and 16 kHz.
-    max_sample_rate_hz = 16000;
-  }
-  codec_rate = std::min(codec_rate, max_sample_rate_hz);
   stereo_codec_ = num_codec_channels == 2;
 
-  if (!mono_buffer_.get()) {
-    // Temporary space for DownConvertToCodecFormat.
-    mono_buffer_.reset(new int16_t[kMaxMonoDataSizeSamples]);
+  // We want to process at the lowest rate possible without losing information.
+  // Choose the lowest native rate at least equal to the input and codec rates.
+  const int min_processing_rate = std::min(sample_rate_hz, codec_rate);
+  for (size_t i = 0; i < AudioProcessing::kNumNativeSampleRates; ++i) {
+    _audioFrame.sample_rate_hz_ = AudioProcessing::kNativeSampleRatesHz[i];
+    if (_audioFrame.sample_rate_hz_ >= min_processing_rate) {
+      break;
+    }
   }
-  DownConvertToCodecFormat(audio,
-                           samples_per_channel,
-                           num_channels,
-                           sample_rate_hz,
-                           num_codec_channels,
-                           codec_rate,
-                           mono_buffer_.get(),
-                           &resampler_,
-                           &_audioFrame);
+  if (audioproc_->echo_control_mobile()->is_enabled()) {
+    // AECM only supports 8 and 16 kHz.
+    _audioFrame.sample_rate_hz_ = std::min(
+        _audioFrame.sample_rate_hz_, AudioProcessing::kMaxAECMSampleRateHz);
+  }
+  _audioFrame.num_channels_ = std::min(num_channels, num_codec_channels);
+  RemixAndResample(audio, samples_per_channel, num_channels, sample_rate_hz,
+                   &resampler_, &_audioFrame);
 }
 
 int32_t TransmitMixer::RecordAudioToFile(
diff --git a/webrtc/voice_engine/transmit_mixer.h b/webrtc/voice_engine/transmit_mixer.h
index 8bbb421..714efb4 100644
--- a/webrtc/voice_engine/transmit_mixer.h
+++ b/webrtc/voice_engine/transmit_mixer.h
@@ -229,7 +229,6 @@
     int32_t _remainingMuteMicTimeMs;
     bool stereo_codec_;
     bool swap_stereo_channels_;
-    rtc::scoped_ptr<int16_t[]> mono_buffer_;
 };
 
 }  // namespace voe
diff --git a/webrtc/voice_engine/utility.cc b/webrtc/voice_engine/utility.cc
index 82ef076..498620a 100644
--- a/webrtc/voice_engine/utility.cc
+++ b/webrtc/voice_engine/utility.cc
@@ -21,34 +21,43 @@
 namespace webrtc {
 namespace voe {
 
-// TODO(ajm): There is significant overlap between RemixAndResample and
-// ConvertToCodecFormat. Consolidate using AudioConverter.
 void RemixAndResample(const AudioFrame& src_frame,
                       PushResampler<int16_t>* resampler,
                       AudioFrame* dst_frame) {
-  const int16_t* audio_ptr = src_frame.data_;
-  int audio_ptr_num_channels = src_frame.num_channels_;
+  RemixAndResample(src_frame.data_, src_frame.samples_per_channel_,
+                   src_frame.num_channels_, src_frame.sample_rate_hz_,
+                   resampler, dst_frame);
+  dst_frame->timestamp_ = src_frame.timestamp_;
+  dst_frame->elapsed_time_ms_ = src_frame.elapsed_time_ms_;
+  dst_frame->ntp_time_ms_ = src_frame.ntp_time_ms_;
+}
+
+void RemixAndResample(const int16_t* src_data,
+                      size_t samples_per_channel,
+                      int num_channels,
+                      int sample_rate_hz,
+                      PushResampler<int16_t>* resampler,
+                      AudioFrame* dst_frame) {
+  const int16_t* audio_ptr = src_data;
+  int audio_ptr_num_channels = num_channels;
   int16_t mono_audio[AudioFrame::kMaxDataSizeSamples];
 
   // Downmix before resampling.
-  if (src_frame.num_channels_ == 2 && dst_frame->num_channels_ == 1) {
-    AudioFrameOperations::StereoToMono(src_frame.data_,
-                                       src_frame.samples_per_channel_,
+  if (num_channels == 2 && dst_frame->num_channels_ == 1) {
+    AudioFrameOperations::StereoToMono(src_data, samples_per_channel,
                                        mono_audio);
     audio_ptr = mono_audio;
     audio_ptr_num_channels = 1;
   }
 
-  if (resampler->InitializeIfNeeded(src_frame.sample_rate_hz_,
-                                    dst_frame->sample_rate_hz_,
+  if (resampler->InitializeIfNeeded(sample_rate_hz, dst_frame->sample_rate_hz_,
                                     audio_ptr_num_channels) == -1) {
-    LOG_FERR3(LS_ERROR, InitializeIfNeeded, src_frame.sample_rate_hz_,
+    LOG_FERR3(LS_ERROR, InitializeIfNeeded, sample_rate_hz,
               dst_frame->sample_rate_hz_, audio_ptr_num_channels);
     assert(false);
   }
 
-  const size_t src_length = src_frame.samples_per_channel_ *
-                         audio_ptr_num_channels;
+  const size_t src_length = samples_per_channel * audio_ptr_num_channels;
   int out_length = resampler->Resample(audio_ptr, src_length, dst_frame->data_,
                                        AudioFrame::kMaxDataSizeSamples);
   if (out_length == -1) {
@@ -59,66 +68,12 @@
       static_cast<size_t>(out_length / audio_ptr_num_channels);
 
   // Upmix after resampling.
-  if (src_frame.num_channels_ == 1 && dst_frame->num_channels_ == 2) {
+  if (num_channels == 1 && dst_frame->num_channels_ == 2) {
     // The audio in dst_frame really is mono at this point; MonoToStereo will
     // set this back to stereo.
     dst_frame->num_channels_ = 1;
     AudioFrameOperations::MonoToStereo(dst_frame);
   }
-
-  dst_frame->timestamp_ = src_frame.timestamp_;
-  dst_frame->elapsed_time_ms_ = src_frame.elapsed_time_ms_;
-  dst_frame->ntp_time_ms_ = src_frame.ntp_time_ms_;
-}
-
-void DownConvertToCodecFormat(const int16_t* src_data,
-                              size_t samples_per_channel,
-                              int num_channels,
-                              int sample_rate_hz,
-                              int codec_num_channels,
-                              int codec_rate_hz,
-                              int16_t* mono_buffer,
-                              PushResampler<int16_t>* resampler,
-                              AudioFrame* dst_af) {
-  assert(samples_per_channel <= kMaxMonoDataSizeSamples);
-  assert(num_channels == 1 || num_channels == 2);
-  assert(codec_num_channels == 1 || codec_num_channels == 2);
-  dst_af->Reset();
-
-  // Never upsample the capture signal here. This should be done at the
-  // end of the send chain.
-  int destination_rate = std::min(codec_rate_hz, sample_rate_hz);
-
-  // If no stereo codecs are in use, we downmix a stereo stream from the
-  // device early in the chain, before resampling.
-  if (num_channels == 2 && codec_num_channels == 1) {
-    AudioFrameOperations::StereoToMono(src_data, samples_per_channel,
-                                       mono_buffer);
-    src_data = mono_buffer;
-    num_channels = 1;
-  }
-
-  if (resampler->InitializeIfNeeded(
-          sample_rate_hz, destination_rate, num_channels) != 0) {
-    LOG_FERR3(LS_ERROR,
-              InitializeIfNeeded,
-              sample_rate_hz,
-              destination_rate,
-              num_channels);
-    assert(false);
-  }
-
-  const size_t in_length = samples_per_channel * num_channels;
-  int out_length = resampler->Resample(
-      src_data, in_length, dst_af->data_, AudioFrame::kMaxDataSizeSamples);
-  if (out_length == -1) {
-    LOG_FERR3(LS_ERROR, Resample, src_data, in_length, dst_af->data_);
-    assert(false);
-  }
-
-  dst_af->samples_per_channel_ = static_cast<size_t>(out_length / num_channels);
-  dst_af->sample_rate_hz_ = destination_rate;
-  dst_af->num_channels_ = num_channels;
 }
 
 void MixWithSat(int16_t target[],
diff --git a/webrtc/voice_engine/utility.h b/webrtc/voice_engine/utility.h
index 87003c4..cc44533 100644
--- a/webrtc/voice_engine/utility.h
+++ b/webrtc/voice_engine/utility.h
@@ -24,32 +24,26 @@
 
 namespace voe {
 
-// Upmix or downmix and resample the audio in |src_frame| to |dst_frame|.
-// Expects |dst_frame| to have its sample rate and channels members set to the
-// desired values. Updates the samples per channel member accordingly. No other
-// members will be changed.
+// Upmix or downmix and resample the audio to |dst_frame|. Expects |dst_frame|
+// to have its sample rate and channels members set to the desired values.
+// Updates the |samples_per_channel_| member accordingly.
+//
+// This version has an AudioFrame |src_frame| as input and sets the output
+// |timestamp_|, |elapsed_time_ms_| and |ntp_time_ms_| members equals to the
+// input ones.
 void RemixAndResample(const AudioFrame& src_frame,
                       PushResampler<int16_t>* resampler,
                       AudioFrame* dst_frame);
 
-// Downmix and downsample the audio in |src_data| to |dst_af| as necessary,
-// specified by |codec_num_channels| and |codec_rate_hz|. |mono_buffer| is
-// temporary space and must be of sufficient size to hold the downmixed source
-// audio (recommend using a size of kMaxMonoDataSizeSamples).
-//
-// |dst_af| will have its data and format members (sample rate, channels and
-// samples per channel) set appropriately. No other members will be changed.
-// TODO(ajm): For now, this still calls Reset() on |dst_af|. Remove this, as
-// it shouldn't be needed.
-void DownConvertToCodecFormat(const int16_t* src_data,
-                              size_t samples_per_channel,
-                              int num_channels,
-                              int sample_rate_hz,
-                              int codec_num_channels,
-                              int codec_rate_hz,
-                              int16_t* mono_buffer,
-                              PushResampler<int16_t>* resampler,
-                              AudioFrame* dst_af);
+// This version has a pointer to the samples |src_data| as input and receives
+// |samples_per_channel|, |num_channels| and |sample_rate_hz| of the data as
+// parameters.
+void RemixAndResample(const int16_t* src_data,
+                      size_t samples_per_channel,
+                      int num_channels,
+                      int sample_rate_hz,
+                      PushResampler<int16_t>* resampler,
+                      AudioFrame* dst_frame);
 
 void MixWithSat(int16_t target[],
                 int target_channel,
diff --git a/webrtc/voice_engine/utility_unittest.cc b/webrtc/voice_engine/utility_unittest.cc
index 5f02f51..226e383 100644
--- a/webrtc/voice_engine/utility_unittest.cc
+++ b/webrtc/voice_engine/utility_unittest.cc
@@ -21,11 +21,6 @@
 namespace voe {
 namespace {
 
-enum FunctionToTest {
-  TestRemixAndResample,
-  TestDownConvertToCodecFormat
-};
-
 class UtilityTest : public ::testing::Test {
  protected:
   UtilityTest() {
@@ -36,9 +31,10 @@
     golden_frame_.CopyFrom(src_frame_);
   }
 
-  void RunResampleTest(int src_channels, int src_sample_rate_hz,
-                       int dst_channels, int dst_sample_rate_hz,
-                       FunctionToTest function);
+  void RunResampleTest(int src_channels,
+                       int src_sample_rate_hz,
+                       int dst_channels,
+                       int dst_sample_rate_hz);
 
   PushResampler<int16_t> resampler_;
   AudioFrame src_frame_;
@@ -130,8 +126,7 @@
 void UtilityTest::RunResampleTest(int src_channels,
                                   int src_sample_rate_hz,
                                   int dst_channels,
-                                  int dst_sample_rate_hz,
-                                  FunctionToTest function) {
+                                  int dst_sample_rate_hz) {
   PushResampler<int16_t> resampler;  // Create a new one with every test.
   const int16_t kSrcLeft = 30;  // Shouldn't overflow for any used sample rate.
   const int16_t kSrcRight = 15;
@@ -168,20 +163,7 @@
       kInputKernelDelaySamples * dst_channels * 2);
   printf("(%d, %d Hz) -> (%d, %d Hz) ",  // SNR reported on the same line later.
       src_channels, src_sample_rate_hz, dst_channels, dst_sample_rate_hz);
-  if (function == TestRemixAndResample) {
-    RemixAndResample(src_frame_, &resampler, &dst_frame_);
-  } else {
-    int16_t mono_buffer[kMaxMonoDataSizeSamples];
-    DownConvertToCodecFormat(src_frame_.data_,
-                             src_frame_.samples_per_channel_,
-                             src_frame_.num_channels_,
-                             src_frame_.sample_rate_hz_,
-                             dst_frame_.num_channels_,
-                             dst_frame_.sample_rate_hz_,
-                             mono_buffer,
-                             &resampler,
-                             &dst_frame_);
-  }
+  RemixAndResample(src_frame_, &resampler, &dst_frame_);
 
   if (src_sample_rate_hz == 96000 && dst_sample_rate_hz == 8000) {
     // The sinc resampler gives poor SNR at this extreme conversion, but we
@@ -232,28 +214,7 @@
       for (int src_channel = 0; src_channel < kChannelsSize; src_channel++) {
         for (int dst_channel = 0; dst_channel < kChannelsSize; dst_channel++) {
           RunResampleTest(kChannels[src_channel], kSampleRates[src_rate],
-                          kChannels[dst_channel], kSampleRates[dst_rate],
-                          TestRemixAndResample);
-        }
-      }
-    }
-  }
-}
-
-TEST_F(UtilityTest, ConvertToCodecFormatSucceeds) {
-  const int kSampleRates[] = {8000, 16000, 32000, 44100, 48000, 96000};
-  const int kSampleRatesSize = sizeof(kSampleRates) / sizeof(*kSampleRates);
-  const int kChannels[] = {1, 2};
-  const int kChannelsSize = sizeof(kChannels) / sizeof(*kChannels);
-  for (int src_rate = 0; src_rate < kSampleRatesSize; src_rate++) {
-    for (int dst_rate = 0; dst_rate < kSampleRatesSize; dst_rate++) {
-      for (int src_channel = 0; src_channel < kChannelsSize; src_channel++) {
-        for (int dst_channel = 0; dst_channel < kChannelsSize; dst_channel++) {
-          if (dst_rate <= src_rate && dst_channel <= src_channel) {
-            RunResampleTest(kChannels[src_channel], kSampleRates[src_rate],
-                            kChannels[src_channel], kSampleRates[dst_rate],
-                            TestDownConvertToCodecFormat);
-          }
+                          kChannels[dst_channel], kSampleRates[dst_rate]);
         }
       }
     }