APM: Replace most usages of AudioFrame with a stream interface

This CL creates a new stream interface and uses it to replace
most of the usage of AudioFrame in the non-test code.

The CL changes some of the test code as well, as the other
changes required that.

The CL will be followed by 2 more related CLs.

Bug: webrtc:5298
Change-Id: I5cfbe6079f30fc3fbf35b35fd077b6fb49c7def0
Reviewed-on: https://webrtc-review.googlesource.com/c/src/+/170040
Commit-Queue: Per Åhgren <peah@webrtc.org>
Reviewed-by: Sam Zackrisson <saza@webrtc.org>
Cr-Commit-Position: refs/heads/master@{#30799}
diff --git a/modules/audio_processing/aec_dump/BUILD.gn b/modules/audio_processing/aec_dump/BUILD.gn
index ad2c2c2..46f0022 100644
--- a/modules/audio_processing/aec_dump/BUILD.gn
+++ b/modules/audio_processing/aec_dump/BUILD.gn
@@ -35,7 +35,7 @@
 
 rtc_library("mock_aec_dump_unittests") {
   testonly = true
-
+  configs += [ "..:apm_debug_dump" ]
   sources = [ "aec_dump_integration_test.cc" ]
 
   deps = [
diff --git a/modules/audio_processing/aec_dump/aec_dump_impl.cc b/modules/audio_processing/aec_dump/aec_dump_impl.cc
index 37e9ec2..1299738 100644
--- a/modules/audio_processing/aec_dump/aec_dump_impl.cc
+++ b/modules/audio_processing/aec_dump/aec_dump_impl.cc
@@ -109,12 +109,16 @@
   capture_stream_info_.AddOutput(src);
 }
 
-void AecDumpImpl::AddCaptureStreamInput(const AudioFrame& frame) {
-  capture_stream_info_.AddInput(frame);
+void AecDumpImpl::AddCaptureStreamInput(const int16_t* const data,
+                                        int num_channels,
+                                        int samples_per_channel) {
+  capture_stream_info_.AddInput(data, num_channels, samples_per_channel);
 }
 
-void AecDumpImpl::AddCaptureStreamOutput(const AudioFrame& frame) {
-  capture_stream_info_.AddOutput(frame);
+void AecDumpImpl::AddCaptureStreamOutput(const int16_t* const data,
+                                         int num_channels,
+                                         int samples_per_channel) {
+  capture_stream_info_.AddOutput(data, num_channels, samples_per_channel);
 }
 
 void AecDumpImpl::AddAudioProcessingState(const AudioProcessingState& state) {
@@ -128,15 +132,16 @@
   capture_stream_info_.SetTask(CreateWriteToFileTask());
 }
 
-void AecDumpImpl::WriteRenderStreamMessage(const AudioFrame& frame) {
+void AecDumpImpl::WriteRenderStreamMessage(const int16_t* const data,
+                                           int num_channels,
+                                           int samples_per_channel) {
   auto task = CreateWriteToFileTask();
   auto* event = task->GetEvent();
 
   event->set_type(audioproc::Event::REVERSE_STREAM);
   audioproc::ReverseStream* msg = event->mutable_reverse_stream();
-  const size_t data_size =
-      sizeof(int16_t) * frame.samples_per_channel_ * frame.num_channels_;
-  msg->set_data(frame.data(), data_size);
+  const size_t data_size = sizeof(int16_t) * samples_per_channel * num_channels;
+  msg->set_data(data, data_size);
 
   worker_queue_->PostTask(std::move(task));
 }
diff --git a/modules/audio_processing/aec_dump/aec_dump_impl.h b/modules/audio_processing/aec_dump/aec_dump_impl.h
index 4f7a63c..a9d3830 100644
--- a/modules/audio_processing/aec_dump/aec_dump_impl.h
+++ b/modules/audio_processing/aec_dump/aec_dump_impl.h
@@ -15,7 +15,6 @@
 #include <string>
 #include <vector>
 
-#include "api/audio/audio_frame.h"
 #include "modules/audio_processing/aec_dump/capture_stream_info.h"
 #include "modules/audio_processing/aec_dump/write_to_file_task.h"
 #include "modules/audio_processing/include/aec_dump.h"
@@ -55,12 +54,18 @@
                         int64_t time_now_ms) override;
   void AddCaptureStreamInput(const AudioFrameView<const float>& src) override;
   void AddCaptureStreamOutput(const AudioFrameView<const float>& src) override;
-  void AddCaptureStreamInput(const AudioFrame& frame) override;
-  void AddCaptureStreamOutput(const AudioFrame& frame) override;
+  void AddCaptureStreamInput(const int16_t* const data,
+                             int num_channels,
+                             int samples_per_channel) override;
+  void AddCaptureStreamOutput(const int16_t* const data,
+                              int num_channels,
+                              int samples_per_channel) override;
   void AddAudioProcessingState(const AudioProcessingState& state) override;
   void WriteCaptureStreamMessage() override;
 
-  void WriteRenderStreamMessage(const AudioFrame& frame) override;
+  void WriteRenderStreamMessage(const int16_t* const data,
+                                int num_channels,
+                                int samples_per_channel) override;
   void WriteRenderStreamMessage(
       const AudioFrameView<const float>& src) override;
 
diff --git a/modules/audio_processing/aec_dump/aec_dump_integration_test.cc b/modules/audio_processing/aec_dump/aec_dump_integration_test.cc
index f3544b5..7b1f218 100644
--- a/modules/audio_processing/aec_dump/aec_dump_integration_test.cc
+++ b/modules/audio_processing/aec_dump/aec_dump_integration_test.cc
@@ -8,16 +8,17 @@
  *  be found in the AUTHORS file in the root of the source tree.
  */
 
+#include <array>
 #include <memory>
 #include <utility>
 
 #include "modules/audio_processing/aec_dump/mock_aec_dump.h"
+#include "modules/audio_processing/audio_processing_impl.h"
 #include "modules/audio_processing/include/audio_processing.h"
 
 using ::testing::_;
 using ::testing::AtLeast;
 using ::testing::Exactly;
-using ::testing::Matcher;
 using ::testing::StrictMock;
 
 namespace {
@@ -37,14 +38,6 @@
   return std::unique_ptr<webrtc::test::MockAecDump>(std::move(mock_aec_dump));
 }
 
-std::unique_ptr<webrtc::AudioFrame> CreateFakeFrame() {
-  auto fake_frame = std::make_unique<webrtc::AudioFrame>();
-  fake_frame->num_channels_ = 1;
-  fake_frame->sample_rate_hz_ = 48000;
-  fake_frame->samples_per_channel_ = 480;
-  return fake_frame;
-}
-
 }  // namespace
 
 TEST(AecDumpIntegration, ConfigurationAndInitShouldBeLogged) {
@@ -57,27 +50,40 @@
      RenderStreamShouldBeLoggedOnceEveryProcessReverseStream) {
   auto apm = CreateAudioProcessing();
   auto mock_aec_dump = CreateMockAecDump();
-  auto fake_frame = CreateFakeFrame();
+  constexpr int kNumChannels = 1;
+  constexpr int kNumSampleRateHz = 16000;
+  constexpr int kNumSamplesPerChannel = kNumSampleRateHz / 100;
+  std::array<int16_t, kNumSamplesPerChannel * kNumChannels> frame;
+  frame.fill(0.f);
+  webrtc::StreamConfig stream_config(kNumSampleRateHz, kNumChannels,
+                                     /*has_keyboard=*/false);
 
-  EXPECT_CALL(*mock_aec_dump.get(),
-              WriteRenderStreamMessage(Matcher<const webrtc::AudioFrame&>(_)))
+  EXPECT_CALL(*mock_aec_dump.get(), WriteRenderStreamMessage(_, _, _))
       .Times(Exactly(1));
 
   apm->AttachAecDump(std::move(mock_aec_dump));
-  apm->ProcessReverseStream(fake_frame.get());
+  apm->ProcessReverseStream(frame.data(), stream_config, stream_config,
+                            frame.data());
 }
 
 TEST(AecDumpIntegration, CaptureStreamShouldBeLoggedOnceEveryProcessStream) {
   auto apm = CreateAudioProcessing();
   auto mock_aec_dump = CreateMockAecDump();
-  auto fake_frame = CreateFakeFrame();
+  constexpr int kNumChannels = 1;
+  constexpr int kNumSampleRateHz = 16000;
+  constexpr int kNumSamplesPerChannel = kNumSampleRateHz / 100;
+  std::array<int16_t, kNumSamplesPerChannel * kNumChannels> frame;
+  frame.fill(0.f);
+  webrtc::AudioProcessing::VoiceDetectionResult vad_result =
+      webrtc::AudioProcessing::VoiceDetectionResult::kNotAvailable;
 
-  EXPECT_CALL(*mock_aec_dump.get(),
-              AddCaptureStreamInput(Matcher<const webrtc::AudioFrame&>(_)))
+  webrtc::StreamConfig stream_config(kNumSampleRateHz, kNumChannels,
+                                     /*has_keyboard=*/false);
+
+  EXPECT_CALL(*mock_aec_dump.get(), AddCaptureStreamInput(_, _, _))
       .Times(AtLeast(1));
 
-  EXPECT_CALL(*mock_aec_dump.get(),
-              AddCaptureStreamOutput(Matcher<const webrtc::AudioFrame&>(_)))
+  EXPECT_CALL(*mock_aec_dump.get(), AddCaptureStreamOutput(_, _, _))
       .Times(Exactly(1));
 
   EXPECT_CALL(*mock_aec_dump.get(), AddAudioProcessingState(_))
@@ -87,5 +93,6 @@
       .Times(Exactly(1));
 
   apm->AttachAecDump(std::move(mock_aec_dump));
-  apm->ProcessStream(fake_frame.get());
+  apm->ProcessStream(frame.data(), stream_config, stream_config, frame.data(),
+                     &vad_result);
 }
diff --git a/modules/audio_processing/aec_dump/aec_dump_unittest.cc b/modules/audio_processing/aec_dump/aec_dump_unittest.cc
index 3624bfc..f4b0908 100644
--- a/modules/audio_processing/aec_dump/aec_dump_unittest.cc
+++ b/modules/audio_processing/aec_dump/aec_dump_unittest.cc
@@ -8,6 +8,7 @@
  *  be found in the AUTHORS file in the root of the source tree.
  */
 
+#include <array>
 #include <utility>
 
 #include "modules/audio_processing/aec_dump/aec_dump_factory.h"
@@ -27,11 +28,17 @@
     std::unique_ptr<webrtc::AecDump> aec_dump =
         webrtc::AecDumpFactory::Create(filename, -1, &file_writer_queue);
 
-    const webrtc::AudioFrame frame;
-    aec_dump->WriteRenderStreamMessage(frame);
+    constexpr int kNumChannels = 1;
+    constexpr int kNumSamplesPerChannel = 160;
+    std::array<int16_t, kNumSamplesPerChannel * kNumChannels> frame;
+    frame.fill(0.f);
+    aec_dump->WriteRenderStreamMessage(frame.data(), kNumChannels,
+                                       kNumSamplesPerChannel);
 
-    aec_dump->AddCaptureStreamInput(frame);
-    aec_dump->AddCaptureStreamOutput(frame);
+    aec_dump->AddCaptureStreamInput(frame.data(), kNumChannels,
+                                    kNumSamplesPerChannel);
+    aec_dump->AddCaptureStreamOutput(frame.data(), kNumChannels,
+                                     kNumSamplesPerChannel);
 
     aec_dump->WriteCaptureStreamMessage();
 
@@ -55,8 +62,14 @@
   {
     std::unique_ptr<webrtc::AecDump> aec_dump =
         webrtc::AecDumpFactory::Create(filename, -1, &file_writer_queue);
-    const webrtc::AudioFrame frame;
-    aec_dump->WriteRenderStreamMessage(frame);
+
+    constexpr int kNumChannels = 1;
+    constexpr int kNumSamplesPerChannel = 160;
+    std::array<int16_t, kNumSamplesPerChannel * kNumChannels> frame;
+    frame.fill(0.f);
+
+    aec_dump->WriteRenderStreamMessage(frame.data(), kNumChannels,
+                                       kNumSamplesPerChannel);
   }
 
   // Verify the file has been written after the AecDump d-tor has
diff --git a/modules/audio_processing/aec_dump/capture_stream_info.cc b/modules/audio_processing/aec_dump/capture_stream_info.cc
index dd48fd4..907cd97 100644
--- a/modules/audio_processing/aec_dump/capture_stream_info.cc
+++ b/modules/audio_processing/aec_dump/capture_stream_info.cc
@@ -41,20 +41,22 @@
   }
 }
 
-void CaptureStreamInfo::AddInput(const AudioFrame& frame) {
+void CaptureStreamInfo::AddInput(const int16_t* const data,
+                                 int num_channels,
+                                 int samples_per_channel) {
   RTC_DCHECK(task_);
   auto* stream = task_->GetEvent()->mutable_stream();
-  const size_t data_size =
-      sizeof(int16_t) * frame.samples_per_channel_ * frame.num_channels_;
-  stream->set_input_data(frame.data(), data_size);
+  const size_t data_size = sizeof(int16_t) * samples_per_channel * num_channels;
+  stream->set_input_data(data, data_size);
 }
 
-void CaptureStreamInfo::AddOutput(const AudioFrame& frame) {
+void CaptureStreamInfo::AddOutput(const int16_t* const data,
+                                  int num_channels,
+                                  int samples_per_channel) {
   RTC_DCHECK(task_);
   auto* stream = task_->GetEvent()->mutable_stream();
-  const size_t data_size =
-      sizeof(int16_t) * frame.samples_per_channel_ * frame.num_channels_;
-  stream->set_output_data(frame.data(), data_size);
+  const size_t data_size = sizeof(int16_t) * samples_per_channel * num_channels;
+  stream->set_output_data(data, data_size);
 }
 
 void CaptureStreamInfo::AddAudioProcessingState(
diff --git a/modules/audio_processing/aec_dump/capture_stream_info.h b/modules/audio_processing/aec_dump/capture_stream_info.h
index da8fb58..26b0e2e 100644
--- a/modules/audio_processing/aec_dump/capture_stream_info.h
+++ b/modules/audio_processing/aec_dump/capture_stream_info.h
@@ -15,7 +15,6 @@
 #include <utility>
 #include <vector>
 
-#include "api/audio/audio_frame.h"
 #include "modules/audio_processing/aec_dump/write_to_file_task.h"
 #include "modules/audio_processing/include/aec_dump.h"
 #include "rtc_base/checks.h"
@@ -40,8 +39,12 @@
   void AddInput(const AudioFrameView<const float>& src);
   void AddOutput(const AudioFrameView<const float>& src);
 
-  void AddInput(const AudioFrame& frame);
-  void AddOutput(const AudioFrame& frame);
+  void AddInput(const int16_t* const data,
+                int num_channels,
+                int samples_per_channel);
+  void AddOutput(const int16_t* const data,
+                 int num_channels,
+                 int samples_per_channel);
 
   void AddAudioProcessingState(const AecDump::AudioProcessingState& state);
 
diff --git a/modules/audio_processing/aec_dump/mock_aec_dump.h b/modules/audio_processing/aec_dump/mock_aec_dump.h
index 8910b42..65306a7 100644
--- a/modules/audio_processing/aec_dump/mock_aec_dump.h
+++ b/modules/audio_processing/aec_dump/mock_aec_dump.h
@@ -32,13 +32,22 @@
                void(const AudioFrameView<const float>& src));
   MOCK_METHOD1(AddCaptureStreamOutput,
                void(const AudioFrameView<const float>& src));
-  MOCK_METHOD1(AddCaptureStreamInput, void(const AudioFrame& frame));
-  MOCK_METHOD1(AddCaptureStreamOutput, void(const AudioFrame& frame));
+  MOCK_METHOD3(AddCaptureStreamInput,
+               void(const int16_t* const data,
+                    int num_channels,
+                    int samples_per_channel));
+  MOCK_METHOD3(AddCaptureStreamOutput,
+               void(const int16_t* const data,
+                    int num_channels,
+                    int samples_per_channel));
   MOCK_METHOD1(AddAudioProcessingState,
                void(const AudioProcessingState& state));
   MOCK_METHOD0(WriteCaptureStreamMessage, void());
 
-  MOCK_METHOD1(WriteRenderStreamMessage, void(const AudioFrame& frame));
+  MOCK_METHOD3(WriteRenderStreamMessage,
+               void(const int16_t* const data,
+                    int num_channels,
+                    int samples_per_channel));
   MOCK_METHOD1(WriteRenderStreamMessage,
                void(const AudioFrameView<const float>& src));
 
diff --git a/modules/audio_processing/agc/agc_manager_direct.h b/modules/audio_processing/agc/agc_manager_direct.h
index 54eb867..d3663be 100644
--- a/modules/audio_processing/agc/agc_manager_direct.h
+++ b/modules/audio_processing/agc/agc_manager_direct.h
@@ -22,7 +22,6 @@
 namespace webrtc {
 
 class MonoAgc;
-class AudioFrame;
 class GainControl;
 
 // Direct interface to use AGC to set volume and compression values.
diff --git a/modules/audio_processing/audio_buffer.cc b/modules/audio_processing/audio_buffer.cc
index 9f79b54..ff6636d 100644
--- a/modules/audio_processing/audio_buffer.cc
+++ b/modules/audio_processing/audio_buffer.cc
@@ -111,7 +111,7 @@
   downmix_by_averaging_ = true;
 }
 
-void AudioBuffer::CopyFrom(const float* const* data,
+void AudioBuffer::CopyFrom(const float* const* stacked_data,
                            const StreamConfig& stream_config) {
   RTC_DCHECK_EQ(stream_config.num_frames(), input_num_frames_);
   RTC_DCHECK_EQ(stream_config.num_channels(), input_num_channels_);
@@ -127,15 +127,16 @@
     if (downmix_by_averaging_) {
       const float kOneByNumChannels = 1.f / input_num_channels_;
       for (size_t i = 0; i < input_num_frames_; ++i) {
-        float value = data[0][i];
+        float value = stacked_data[0][i];
         for (size_t j = 1; j < input_num_channels_; ++j) {
-          value += data[j][i];
+          value += stacked_data[j][i];
         }
         downmix[i] = value * kOneByNumChannels;
       }
     }
-    const float* downmixed_data =
-        downmix_by_averaging_ ? downmix.data() : data[channel_for_downmixing_];
+    const float* downmixed_data = downmix_by_averaging_
+                                      ? downmix.data()
+                                      : stacked_data[channel_for_downmixing_];
 
     if (resampling_needed) {
       input_resamplers_[0]->Resample(downmixed_data, input_num_frames_,
@@ -147,7 +148,7 @@
   } else {
     if (resampling_needed) {
       for (size_t i = 0; i < num_channels_; ++i) {
-        input_resamplers_[i]->Resample(data[i], input_num_frames_,
+        input_resamplers_[i]->Resample(stacked_data[i], input_num_frames_,
                                        data_->channels()[i],
                                        buffer_num_frames_);
         FloatToFloatS16(data_->channels()[i], buffer_num_frames_,
@@ -155,14 +156,15 @@
       }
     } else {
       for (size_t i = 0; i < num_channels_; ++i) {
-        FloatToFloatS16(data[i], buffer_num_frames_, data_->channels()[i]);
+        FloatToFloatS16(stacked_data[i], buffer_num_frames_,
+                        data_->channels()[i]);
       }
     }
   }
 }
 
 void AudioBuffer::CopyTo(const StreamConfig& stream_config,
-                         float* const* data) {
+                         float* const* stacked_data) {
   RTC_DCHECK_EQ(stream_config.num_frames(), output_num_frames_);
 
   const bool resampling_needed = output_num_frames_ != buffer_num_frames_;
@@ -171,16 +173,18 @@
       FloatS16ToFloat(data_->channels()[i], buffer_num_frames_,
                       data_->channels()[i]);
       output_resamplers_[i]->Resample(data_->channels()[i], buffer_num_frames_,
-                                      data[i], output_num_frames_);
+                                      stacked_data[i], output_num_frames_);
     }
   } else {
     for (size_t i = 0; i < num_channels_; ++i) {
-      FloatS16ToFloat(data_->channels()[i], buffer_num_frames_, data[i]);
+      FloatS16ToFloat(data_->channels()[i], buffer_num_frames_,
+                      stacked_data[i]);
     }
   }
 
   for (size_t i = num_channels_; i < stream_config.num_channels(); ++i) {
-    memcpy(data[i], data[0], output_num_frames_ * sizeof(**data));
+    memcpy(stacked_data[i], stacked_data[0],
+           output_num_frames_ * sizeof(**stacked_data));
   }
 }
 
@@ -225,14 +229,15 @@
 }
 
 // The resampler is only for supporting 48kHz to 16kHz in the reverse stream.
-void AudioBuffer::CopyFrom(const AudioFrame* frame) {
-  RTC_DCHECK_EQ(frame->num_channels_, input_num_channels_);
-  RTC_DCHECK_EQ(frame->samples_per_channel_, input_num_frames_);
+void AudioBuffer::CopyFrom(const int16_t* const interleaved_data,
+                           const StreamConfig& stream_config) {
+  RTC_DCHECK_EQ(stream_config.num_channels(), input_num_channels_);
+  RTC_DCHECK_EQ(stream_config.num_frames(), input_num_frames_);
   RestoreNumChannels();
 
   const bool resampling_required = input_num_frames_ != buffer_num_frames_;
 
-  const int16_t* interleaved = frame->data();
+  const int16_t* interleaved = interleaved_data;
   if (num_channels_ == 1) {
     if (input_num_channels_ == 1) {
       if (resampling_required) {
@@ -297,13 +302,16 @@
   }
 }
 
-void AudioBuffer::CopyTo(AudioFrame* frame) const {
-  RTC_DCHECK(frame->num_channels_ == num_channels_ || num_channels_ == 1);
-  RTC_DCHECK_EQ(frame->samples_per_channel_, output_num_frames_);
+void AudioBuffer::CopyTo(const StreamConfig& stream_config,
+                         int16_t* const interleaved_data) {
+  const size_t config_num_channels = stream_config.num_channels();
+
+  RTC_DCHECK(config_num_channels == num_channels_ || num_channels_ == 1);
+  RTC_DCHECK_EQ(stream_config.num_frames(), output_num_frames_);
 
   const bool resampling_required = buffer_num_frames_ != output_num_frames_;
 
-  int16_t* interleaved = frame->mutable_data();
+  int16_t* interleaved = interleaved_data;
   if (num_channels_ == 1) {
     std::array<float, kMaxSamplesPerChannel> float_buffer;
 
@@ -314,14 +322,14 @@
     const float* deinterleaved =
         resampling_required ? float_buffer.data() : data_->channels()[0];
 
-    if (frame->num_channels_ == 1) {
+    if (config_num_channels == 1) {
       for (size_t j = 0; j < output_num_frames_; ++j) {
         interleaved[j] = FloatS16ToS16(deinterleaved[j]);
       }
     } else {
       for (size_t i = 0, k = 0; i < output_num_frames_; ++i) {
         float tmp = FloatS16ToS16(deinterleaved[i]);
-        for (size_t j = 0; j < frame->num_channels_; ++j, ++k) {
+        for (size_t j = 0; j < config_num_channels; ++j, ++k) {
           interleaved[k] = tmp;
         }
       }
@@ -342,19 +350,19 @@
         output_resamplers_[i]->Resample(data_->channels()[i],
                                         buffer_num_frames_, float_buffer.data(),
                                         output_num_frames_);
-        interleave_channel(i, frame->num_channels_, output_num_frames_,
+        interleave_channel(i, config_num_channels, output_num_frames_,
                            float_buffer.data(), interleaved);
       }
     } else {
       for (size_t i = 0; i < num_channels_; ++i) {
-        interleave_channel(i, frame->num_channels_, output_num_frames_,
+        interleave_channel(i, config_num_channels, output_num_frames_,
                            data_->channels()[i], interleaved);
       }
     }
 
-    for (size_t i = num_channels_; i < frame->num_channels_; ++i) {
+    for (size_t i = num_channels_; i < config_num_channels; ++i) {
       for (size_t j = 0, k = i, n = num_channels_; j < output_num_frames_;
-           ++j, k += frame->num_channels_, n += frame->num_channels_) {
+           ++j, k += config_num_channels, n += config_num_channels) {
         interleaved[k] = interleaved[n];
       }
     }
diff --git a/modules/audio_processing/audio_buffer.h b/modules/audio_processing/audio_buffer.h
index 161c509..3eecf0d 100644
--- a/modules/audio_processing/audio_buffer.h
+++ b/modules/audio_processing/audio_buffer.h
@@ -17,7 +17,6 @@
 #include <memory>
 #include <vector>
 
-#include "api/audio/audio_frame.h"
 #include "common_audio/channel_buffer.h"
 #include "modules/audio_processing/include/audio_processing.h"
 
@@ -109,12 +108,15 @@
   }
 
   // Copies data into the buffer.
-  void CopyFrom(const AudioFrame* frame);
-  void CopyFrom(const float* const* data, const StreamConfig& stream_config);
+  void CopyFrom(const int16_t* const interleaved_data,
+                const StreamConfig& stream_config);
+  void CopyFrom(const float* const* stacked_data,
+                const StreamConfig& stream_config);
 
   // Copies data from the buffer.
-  void CopyTo(AudioFrame* frame) const;
-  void CopyTo(const StreamConfig& stream_config, float* const* data);
+  void CopyTo(const StreamConfig& stream_config,
+              int16_t* const interleaved_data);
+  void CopyTo(const StreamConfig& stream_config, float* const* stacked_data);
   void CopyTo(AudioBuffer* buffer) const;
 
   // Splits the buffer data into frequency bands.
@@ -145,8 +147,6 @@
   const float* const* split_channels_const_f(Band band) const {
     return split_channels_const(band);
   }
-  void DeinterleaveFrom(const AudioFrame* frame) { CopyFrom(frame); }
-  void InterleaveTo(AudioFrame* frame) const { CopyTo(frame); }
 
  private:
   FRIEND_TEST_ALL_PREFIXES(AudioBufferTest,
diff --git a/modules/audio_processing/audio_processing_impl.cc b/modules/audio_processing/audio_processing_impl.cc
index f4c242b..dfa5437 100644
--- a/modules/audio_processing/audio_processing_impl.cc
+++ b/modules/audio_processing/audio_processing_impl.cc
@@ -19,6 +19,7 @@
 
 #include "absl/types/optional.h"
 #include "api/array_view.h"
+#include "api/audio/audio_frame.h"
 #include "common_audio/audio_converter.h"
 #include "common_audio/include/audio_util.h"
 #include "modules/audio_processing/agc2/gain_applier.h"
@@ -1064,35 +1065,60 @@
   StreamConfig output_config(frame->sample_rate_hz_, frame->num_channels_,
                              /*has_keyboard=*/false);
   RTC_DCHECK_EQ(frame->samples_per_channel(), input_config.num_frames());
+
+  VoiceDetectionResult vad_result = VoiceDetectionResult::kNotAvailable;
+
+  int result = ProcessStream(frame->data(), input_config, output_config,
+                             frame->mutable_data(), &vad_result);
+
+  if (vad_result != VoiceDetectionResult::kNotAvailable) {
+    frame->vad_activity_ = vad_result == VoiceDetectionResult::kDetected
+                               ? AudioFrame::VADActivity::kVadActive
+                               : AudioFrame::VADActivity::kVadPassive;
+  }
+
+  return result;
+}
+
+int AudioProcessingImpl::ProcessStream(const int16_t* const src,
+                                       const StreamConfig& input_config,
+                                       const StreamConfig& output_config,
+                                       int16_t* const dest,
+                                       VoiceDetectionResult* vad_result) {
   RETURN_ON_ERR(MaybeInitializeCapture(input_config, output_config));
 
   rtc::CritScope cs_capture(&crit_capture_);
 
   if (aec_dump_) {
-    RecordUnprocessedCaptureStream(*frame);
+    RecordUnprocessedCaptureStream(src, input_config);
   }
 
-  capture_.capture_audio->CopyFrom(frame);
+  capture_.capture_audio->CopyFrom(src, input_config);
   if (capture_.capture_fullband_audio) {
-    capture_.capture_fullband_audio->CopyFrom(frame);
+    capture_.capture_fullband_audio->CopyFrom(src, input_config);
   }
   RETURN_ON_ERR(ProcessCaptureStreamLocked());
   if (submodule_states_.CaptureMultiBandProcessingPresent() ||
       submodule_states_.CaptureFullBandProcessingActive()) {
     if (capture_.capture_fullband_audio) {
-      capture_.capture_fullband_audio->CopyTo(frame);
+      capture_.capture_fullband_audio->CopyTo(output_config, dest);
     } else {
-      capture_.capture_audio->CopyTo(frame);
+      capture_.capture_audio->CopyTo(output_config, dest);
     }
   }
-  if (capture_.stats.voice_detected) {
-    frame->vad_activity_ = *capture_.stats.voice_detected
-                               ? AudioFrame::kVadActive
-                               : AudioFrame::kVadPassive;
+
+  if (vad_result) {
+    if (capture_.stats.voice_detected) {
+      *vad_result = *capture_.stats.voice_detected
+                        ? VoiceDetectionResult::kDetected
+                        : VoiceDetectionResult::kNotDetected;
+    } else {
+      *vad_result = VoiceDetectionResult::kNotAvailable;
+    }
   }
 
   if (aec_dump_) {
-    RecordProcessedCaptureStream(*frame);
+    RecordProcessedCaptureStream(dest, output_config);
   }
 
   return kNoError;
@@ -1430,7 +1456,6 @@
 
 int AudioProcessingImpl::ProcessReverseStream(AudioFrame* frame) {
   TRACE_EVENT0("webrtc", "AudioProcessing::ProcessReverseStream_AudioFrame");
-  rtc::CritScope cs(&crit_render_);
   if (frame == nullptr) {
     return kNullPointerError;
   }
@@ -1446,31 +1471,47 @@
     return kBadNumberChannelsError;
   }
 
+  StreamConfig input_config(frame->sample_rate_hz_, frame->num_channels_,
+                            /*has_keyboard=*/false);
+  StreamConfig output_config(frame->sample_rate_hz_, frame->num_channels_,
+                             /*has_keyboard=*/false);
+
+  int result = ProcessReverseStream(frame->data(), input_config, output_config,
+                                    frame->mutable_data());
+  return result;
+}
+
+int AudioProcessingImpl::ProcessReverseStream(const int16_t* const src,
+                                              const StreamConfig& input_config,
+                                              const StreamConfig& output_config,
+                                              int16_t* const dest) {
+  rtc::CritScope cs(&crit_render_);
   ProcessingConfig processing_config = formats_.api_format;
   processing_config.reverse_input_stream().set_sample_rate_hz(
-      frame->sample_rate_hz_);
+      input_config.sample_rate_hz());
   processing_config.reverse_input_stream().set_num_channels(
-      frame->num_channels_);
+      input_config.num_channels());
   processing_config.reverse_output_stream().set_sample_rate_hz(
-      frame->sample_rate_hz_);
+      output_config.sample_rate_hz());
   processing_config.reverse_output_stream().set_num_channels(
-      frame->num_channels_);
+      output_config.num_channels());
 
   RETURN_ON_ERR(MaybeInitializeRender(processing_config));
-  if (frame->samples_per_channel_ !=
+  if (input_config.num_frames() !=
       formats_.api_format.reverse_input_stream().num_frames()) {
     return kBadDataLengthError;
   }
 
   if (aec_dump_) {
-    aec_dump_->WriteRenderStreamMessage(*frame);
+    aec_dump_->WriteRenderStreamMessage(src, input_config.num_frames(),
+                                        input_config.num_channels());
   }
 
-  render_.render_audio->CopyFrom(frame);
+  render_.render_audio->CopyFrom(src, input_config);
   RETURN_ON_ERR(ProcessRenderStreamLocked());
   if (submodule_states_.RenderMultiBandProcessingActive() ||
       submodule_states_.RenderFullBandProcessingActive()) {
-    render_.render_audio->CopyTo(frame);
+    render_.render_audio->CopyTo(output_config, dest);
   }
   return kNoError;
 }
@@ -2007,11 +2048,13 @@
 }
 
 void AudioProcessingImpl::RecordUnprocessedCaptureStream(
-    const AudioFrame& capture_frame) {
+    const int16_t* const data,
+    const StreamConfig& config) {
   RTC_DCHECK(aec_dump_);
   WriteAecDumpConfigMessage(false);
 
-  aec_dump_->AddCaptureStreamInput(capture_frame);
+  aec_dump_->AddCaptureStreamInput(data, config.num_channels(),
+                                   config.num_frames());
   RecordAudioProcessingState();
 }
 
@@ -2028,10 +2071,12 @@
 }
 
 void AudioProcessingImpl::RecordProcessedCaptureStream(
-    const AudioFrame& processed_capture_frame) {
+    const int16_t* const data,
+    const StreamConfig& config) {
   RTC_DCHECK(aec_dump_);
 
-  aec_dump_->AddCaptureStreamOutput(processed_capture_frame);
+  aec_dump_->AddCaptureStreamOutput(data, config.num_channels(),
+                                    config.num_channels());
   aec_dump_->WriteCaptureStreamMessage();
 }
 
diff --git a/modules/audio_processing/audio_processing_impl.h b/modules/audio_processing/audio_processing_impl.h
index fd86f39..23ae28f 100644
--- a/modules/audio_processing/audio_processing_impl.h
+++ b/modules/audio_processing/audio_processing_impl.h
@@ -43,6 +43,7 @@
 namespace webrtc {
 
 class ApmDataDumper;
+class AudioFrame;
 class AudioConverter;
 
 class AudioProcessingImpl : public AudioProcessing {
@@ -80,6 +81,11 @@
   // Capture-side exclusive methods possibly running APM in a
   // multi-threaded manner. Acquire the capture lock.
   int ProcessStream(AudioFrame* frame) override;
+  int ProcessStream(const int16_t* const src,
+                    const StreamConfig& input_config,
+                    const StreamConfig& output_config,
+                    int16_t* const dest,
+                    VoiceDetectionResult* vad_state) override;
   int ProcessStream(const float* const* src,
                     const StreamConfig& input_config,
                     const StreamConfig& output_config,
@@ -95,6 +101,10 @@
   // Render-side exclusive methods possibly running APM in a
   // multi-threaded manner. Acquire the render lock.
   int ProcessReverseStream(AudioFrame* frame) override;
+  int ProcessReverseStream(const int16_t* const src,
+                           const StreamConfig& input_config,
+                           const StreamConfig& output_config,
+                           int16_t* const dest) override;
   int AnalyzeReverseStream(const float* const* data,
                            const StreamConfig& reverse_config) override;
   int ProcessReverseStream(const float* const* src,
@@ -292,7 +302,8 @@
   void RecordUnprocessedCaptureStream(const float* const* capture_stream)
       RTC_EXCLUSIVE_LOCKS_REQUIRED(crit_capture_);
 
-  void RecordUnprocessedCaptureStream(const AudioFrame& capture_frame)
+  void RecordUnprocessedCaptureStream(const int16_t* const data,
+                                      const StreamConfig& config)
       RTC_EXCLUSIVE_LOCKS_REQUIRED(crit_capture_);
 
   // Notifies attached AecDump of current configuration and
@@ -302,7 +313,8 @@
       const float* const* processed_capture_stream)
       RTC_EXCLUSIVE_LOCKS_REQUIRED(crit_capture_);
 
-  void RecordProcessedCaptureStream(const AudioFrame& processed_capture_frame)
+  void RecordProcessedCaptureStream(const int16_t* const data,
+                                    const StreamConfig& config)
       RTC_EXCLUSIVE_LOCKS_REQUIRED(crit_capture_);
 
   // Notifies attached AecDump about current state (delay, drift, etc).
diff --git a/modules/audio_processing/include/aec_dump.h b/modules/audio_processing/include/aec_dump.h
index b64bf0b..ed5acb0 100644
--- a/modules/audio_processing/include/aec_dump.h
+++ b/modules/audio_processing/include/aec_dump.h
@@ -15,7 +15,6 @@
 
 #include <string>
 
-#include "api/audio/audio_frame.h"
 #include "modules/audio_processing/include/audio_frame_view.h"
 #include "modules/audio_processing/include/audio_processing.h"
 #include "rtc_base/deprecation.h"
@@ -88,13 +87,19 @@
       const AudioFrameView<const float>& src) = 0;
   virtual void AddCaptureStreamOutput(
       const AudioFrameView<const float>& src) = 0;
-  virtual void AddCaptureStreamInput(const AudioFrame& frame) = 0;
-  virtual void AddCaptureStreamOutput(const AudioFrame& frame) = 0;
+  virtual void AddCaptureStreamInput(const int16_t* const data,
+                                     int num_channels,
+                                     int samples_per_channel) = 0;
+  virtual void AddCaptureStreamOutput(const int16_t* const data,
+                                      int num_channels,
+                                      int samples_per_channel) = 0;
   virtual void AddAudioProcessingState(const AudioProcessingState& state) = 0;
   virtual void WriteCaptureStreamMessage() = 0;
 
   // Logs Event::Type REVERSE_STREAM message.
-  virtual void WriteRenderStreamMessage(const AudioFrame& frame) = 0;
+  virtual void WriteRenderStreamMessage(const int16_t* const data,
+                                        int num_channels,
+                                        int samples_per_channel) = 0;
   virtual void WriteRenderStreamMessage(
       const AudioFrameView<const float>& src) = 0;
 
diff --git a/modules/audio_processing/include/audio_processing.h b/modules/audio_processing/include/audio_processing.h
index b63fa70..6f85aa9 100644
--- a/modules/audio_processing/include/audio_processing.h
+++ b/modules/audio_processing/include/audio_processing.h
@@ -372,6 +372,8 @@
     kStereoAndKeyboard
   };
 
+  enum class VoiceDetectionResult { kNotAvailable, kDetected, kNotDetected };
+
   // Specifies the properties of a setting to be passed to AudioProcessing at
   // runtime.
   class RuntimeSetting {
@@ -538,6 +540,15 @@
   // method, it will trigger an initialization.
   virtual int ProcessStream(AudioFrame* frame) = 0;
 
+  // Accepts and produces a 10 ms frame interleaved 16 bit integer audio as
+  // specified in |input_config| and |output_config|. |src| and |dest| may use
+  // the same memory, if desired.
+  virtual int ProcessStream(const int16_t* const src,
+                            const StreamConfig& input_config,
+                            const StreamConfig& output_config,
+                            int16_t* const dest,
+                            VoiceDetectionResult* vad_result) = 0;
+
   // Accepts deinterleaved float audio with the range [-1, 1]. Each element of
   // |src| points to a channel buffer, arranged according to |input_stream|. At
   // output, the channels will be arranged according to |output_stream| in
@@ -564,6 +575,14 @@
   // members of |frame| must be valid.
   virtual int ProcessReverseStream(AudioFrame* frame) = 0;
 
+  // Accepts and produces a 10 ms frame of interleaved 16 bit integer audio for
+  // the reverse direction audio stream as specified in |input_config| and
+  // |output_config|. |src| and |dest| may use the same memory, if desired.
+  virtual int ProcessReverseStream(const int16_t* const src,
+                                   const StreamConfig& input_config,
+                                   const StreamConfig& output_config,
+                                   int16_t* const dest) = 0;
+
   // Accepts deinterleaved float audio with the range [-1, 1]. Each element of
   // |data| points to a channel buffer, arranged according to |reverse_config|.
   virtual int ProcessReverseStream(const float* const* src,
diff --git a/modules/audio_processing/include/mock_audio_processing.h b/modules/audio_processing/include/mock_audio_processing.h
index b36013a..518087a 100644
--- a/modules/audio_processing/include/mock_audio_processing.h
+++ b/modules/audio_processing/include/mock_audio_processing.h
@@ -82,6 +82,12 @@
   MOCK_METHOD1(set_output_will_be_muted, void(bool muted));
   MOCK_METHOD1(SetRuntimeSetting, void(RuntimeSetting setting));
   MOCK_METHOD1(ProcessStream, int(AudioFrame* frame));
+  MOCK_METHOD5(ProcessStream,
+               int(const int16_t* const src,
+                   const StreamConfig& input_config,
+                   const StreamConfig& output_config,
+                   int16_t* const dest,
+                   VoiceDetectionResult* const vad_result));
   MOCK_METHOD7(ProcessStream,
                int(const float* const* src,
                    size_t samples_per_channel,
@@ -96,6 +102,11 @@
                    const StreamConfig& output_config,
                    float* const* dest));
   MOCK_METHOD1(ProcessReverseStream, int(AudioFrame* frame));
+  MOCK_METHOD4(ProcessReverseStream,
+               int(const int16_t* const src,
+                   const StreamConfig& input_config,
+                   const StreamConfig& output_config,
+                   int16_t* const dest));
   MOCK_METHOD4(AnalyzeReverseStream,
                int(const float* const* data,
                    size_t samples_per_channel,