Merge to M71: AEC3: Simplify render buffering

This CL simplifies the buffering of render data. Instead of making assumptions
about the worst possible platform, it leverages recent improvements in
the delay estimator to quickly adapt when the conditions change.

Pros:
- No capture delay, delay is found ~200 ms faster.
- Cleaner code that makes the concept of delay more clear.
- Allows for removal of one matched filter because of the jitter headroom
removal.

Cons:
- Delay estimator needs to re-adapt when the call jitter increases.

The code can be deactivated by a kill switch. When the kill switch is
pulled the CL is bit exact.

Bug: webrtc:9726,chromium:895338,chromium:905083
Change-Id: Ie2f9c8c5ce5b5a4510b4bdb95db2b970b57cd5d0
Reviewed-on: https://webrtc-review.googlesource.com/c/96920
Commit-Queue: Gustaf Ullberg <gustaf@webrtc.org>
Reviewed-by: Per Ã…hgren <peah@webrtc.org>
Cr-Original-Commit-Position: refs/heads/master@{#25169}
Reviewed-on: https://webrtc-review.googlesource.com/c/111160
Cr-Commit-Position: refs/branch-heads/71@{#16}
Cr-Branched-From: fb226af64dadffcb3a90a9ba42c46164f320d43a-refs/heads/master@{#25118}
diff --git a/api/audio/echo_canceller3_config.h b/api/audio/echo_canceller3_config.h
index 6aa513d..59a84ba 100644
--- a/api/audio/echo_canceller3_config.h
+++ b/api/audio/echo_canceller3_config.h
@@ -25,12 +25,19 @@
 
   EchoCanceller3Config();
   EchoCanceller3Config(const EchoCanceller3Config& e);
+
+  struct Buffering {
+    bool use_new_render_buffering = true;
+    size_t excess_render_detection_interval_blocks = 250;
+    size_t max_allowed_excess_render_blocks = 8;
+  } buffering;
+
   struct Delay {
     Delay();
     Delay(const Delay& e);
     size_t default_delay = 5;
     size_t down_sampling_factor = 4;
-    size_t num_filters = 6;
+    size_t num_filters = 5;
     size_t api_call_jitter_blocks = 26;
     size_t min_echo_path_delay_blocks = 0;
     size_t delay_headroom_blocks = 2;
diff --git a/api/audio/echo_canceller3_config_json.cc b/api/audio/echo_canceller3_config_json.cc
index 47701b5..a27f20c 100644
--- a/api/audio/echo_canceller3_config_json.cc
+++ b/api/audio/echo_canceller3_config_json.cc
@@ -126,6 +126,15 @@
   }
 
   Json::Value section;
+  if (rtc::GetValueFromJsonObject(root, "buffering", &section)) {
+    ReadParam(section, "use_new_render_buffering",
+              &cfg.buffering.use_new_render_buffering);
+    ReadParam(section, "excess_render_detection_interval_blocks",
+              &cfg.buffering.excess_render_detection_interval_blocks);
+    ReadParam(section, "max_allowed_excess_render_blocks",
+              &cfg.buffering.max_allowed_excess_render_blocks);
+  }
+
   if (rtc::GetValueFromJsonObject(aec3_root, "delay", &section)) {
     ReadParam(section, "default_delay", &cfg.delay.default_delay);
     ReadParam(section, "down_sampling_factor", &cfg.delay.down_sampling_factor);
diff --git a/modules/audio_processing/aec3/BUILD.gn b/modules/audio_processing/aec3/BUILD.gn
index e631732..d1f4595 100644
--- a/modules/audio_processing/aec3/BUILD.gn
+++ b/modules/audio_processing/aec3/BUILD.gn
@@ -26,6 +26,7 @@
     "block_framer.h",
     "block_processor.cc",
     "block_processor.h",
+    "block_processor2.cc",
     "block_processor_metrics.cc",
     "block_processor_metrics.h",
     "cascaded_biquad_filter.cc",
@@ -76,8 +77,10 @@
     "render_buffer.h",
     "render_delay_buffer.cc",
     "render_delay_buffer.h",
+    "render_delay_buffer2.cc",
     "render_delay_controller.cc",
     "render_delay_controller.h",
+    "render_delay_controller2.cc",
     "render_delay_controller_metrics.cc",
     "render_delay_controller_metrics.h",
     "render_signal_analyzer.cc",
diff --git a/modules/audio_processing/aec3/aec_state_unittest.cc b/modules/audio_processing/aec3/aec_state_unittest.cc
index 314d05f..a331006 100644
--- a/modules/audio_processing/aec3/aec_state_unittest.cc
+++ b/modules/audio_processing/aec3/aec_state_unittest.cc
@@ -25,7 +25,7 @@
   absl::optional<DelayEstimate> delay_estimate =
       DelayEstimate(DelayEstimate::Quality::kRefined, 10);
   std::unique_ptr<RenderDelayBuffer> render_delay_buffer(
-      RenderDelayBuffer::Create(config, 3));
+      RenderDelayBuffer::Create2(config, 3));
   std::array<float, kFftLengthBy2Plus1> E2_main = {};
   std::array<float, kFftLengthBy2Plus1> Y2 = {};
   std::vector<std::vector<float>> x(3, std::vector<float>(kBlockSize, 0.f));
@@ -179,7 +179,7 @@
   EchoCanceller3Config config;
   AecState state(config);
   std::unique_ptr<RenderDelayBuffer> render_delay_buffer(
-      RenderDelayBuffer::Create(config, 3));
+      RenderDelayBuffer::Create2(config, 3));
   absl::optional<DelayEstimate> delay_estimate;
   std::array<float, kFftLengthBy2Plus1> E2_main;
   std::array<float, kFftLengthBy2Plus1> Y2;
diff --git a/modules/audio_processing/aec3/block_processor.cc b/modules/audio_processing/aec3/block_processor.cc
index fbb6059..4f4020e 100644
--- a/modules/audio_processing/aec3/block_processor.cc
+++ b/modules/audio_processing/aec3/block_processor.cc
@@ -139,7 +139,7 @@
       capture_properly_started_ = false;
       render_properly_started_ = false;
 
-      RTC_LOG(LS_WARNING) << "Reset due to render buffer underrrun at block "
+      RTC_LOG(LS_WARNING) << "Reset due to render buffer underrun at block "
                           << capture_call_counter_;
     }
   } else if (render_event_ == RenderDelayBuffer::BufferingEvent::kApiCallSkew) {
diff --git a/modules/audio_processing/aec3/block_processor.h b/modules/audio_processing/aec3/block_processor.h
index a3967ea..8793a03 100644
--- a/modules/audio_processing/aec3/block_processor.h
+++ b/modules/audio_processing/aec3/block_processor.h
@@ -23,19 +23,33 @@
 // Class for performing echo cancellation on 64 sample blocks of audio data.
 class BlockProcessor {
  public:
+  // Create a block processor with the legacy render buffering.
   static BlockProcessor* Create(const EchoCanceller3Config& config,
                                 int sample_rate_hz);
+  // Create a block processor with the new render buffering.
+  static BlockProcessor* Create2(const EchoCanceller3Config& config,
+                                 int sample_rate_hz);
   // Only used for testing purposes.
   static BlockProcessor* Create(
       const EchoCanceller3Config& config,
       int sample_rate_hz,
       std::unique_ptr<RenderDelayBuffer> render_buffer);
+  static BlockProcessor* Create2(
+      const EchoCanceller3Config& config,
+      int sample_rate_hz,
+      std::unique_ptr<RenderDelayBuffer> render_buffer);
   static BlockProcessor* Create(
       const EchoCanceller3Config& config,
       int sample_rate_hz,
       std::unique_ptr<RenderDelayBuffer> render_buffer,
       std::unique_ptr<RenderDelayController> delay_controller,
       std::unique_ptr<EchoRemover> echo_remover);
+  static BlockProcessor* Create2(
+      const EchoCanceller3Config& config,
+      int sample_rate_hz,
+      std::unique_ptr<RenderDelayBuffer> render_buffer,
+      std::unique_ptr<RenderDelayController> delay_controller,
+      std::unique_ptr<EchoRemover> echo_remover);
 
   virtual ~BlockProcessor() = default;
 
diff --git a/modules/audio_processing/aec3/block_processor2.cc b/modules/audio_processing/aec3/block_processor2.cc
new file mode 100644
index 0000000..3616427
--- /dev/null
+++ b/modules/audio_processing/aec3/block_processor2.cc
@@ -0,0 +1,254 @@
+/*
+ *  Copyright (c) 2018 The WebRTC project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+#include <stddef.h>
+#include <memory>
+#include <utility>
+#include <vector>
+
+#include "absl/types/optional.h"
+#include "api/audio/echo_canceller3_config.h"
+#include "api/audio/echo_control.h"
+#include "modules/audio_processing/aec3/aec3_common.h"
+#include "modules/audio_processing/aec3/block_processor.h"
+#include "modules/audio_processing/aec3/block_processor_metrics.h"
+#include "modules/audio_processing/aec3/delay_estimate.h"
+#include "modules/audio_processing/aec3/echo_path_variability.h"
+#include "modules/audio_processing/aec3/echo_remover.h"
+#include "modules/audio_processing/aec3/render_delay_buffer.h"
+#include "modules/audio_processing/aec3/render_delay_controller.h"
+#include "modules/audio_processing/logging/apm_data_dumper.h"
+#include "rtc_base/atomicops.h"
+#include "rtc_base/checks.h"
+#include "rtc_base/logging.h"
+
+namespace webrtc {
+namespace {
+
+enum class BlockProcessorApiCall { kCapture, kRender };
+
+class BlockProcessorImpl2 final : public BlockProcessor {
+ public:
+  BlockProcessorImpl2(const EchoCanceller3Config& config,
+                      int sample_rate_hz,
+                      std::unique_ptr<RenderDelayBuffer> render_buffer,
+                      std::unique_ptr<RenderDelayController> delay_controller,
+                      std::unique_ptr<EchoRemover> echo_remover);
+
+  BlockProcessorImpl2() = delete;
+
+  ~BlockProcessorImpl2() override;
+
+  void ProcessCapture(bool echo_path_gain_change,
+                      bool capture_signal_saturation,
+                      std::vector<std::vector<float>>* capture_block) override;
+
+  void BufferRender(const std::vector<std::vector<float>>& block) override;
+
+  void UpdateEchoLeakageStatus(bool leakage_detected) override;
+
+  void GetMetrics(EchoControl::Metrics* metrics) const override;
+
+  void SetAudioBufferDelay(size_t delay_ms) override;
+
+ private:
+  static int instance_count_;
+  std::unique_ptr<ApmDataDumper> data_dumper_;
+  const EchoCanceller3Config config_;
+  bool capture_properly_started_ = false;
+  bool render_properly_started_ = false;
+  const size_t sample_rate_hz_;
+  std::unique_ptr<RenderDelayBuffer> render_buffer_;
+  std::unique_ptr<RenderDelayController> delay_controller_;
+  std::unique_ptr<EchoRemover> echo_remover_;
+  BlockProcessorMetrics metrics_;
+  RenderDelayBuffer::BufferingEvent render_event_;
+  size_t capture_call_counter_ = 0;
+  absl::optional<DelayEstimate> estimated_delay_;
+  absl::optional<int> echo_remover_delay_;
+};
+
+int BlockProcessorImpl2::instance_count_ = 0;
+
+BlockProcessorImpl2::BlockProcessorImpl2(
+    const EchoCanceller3Config& config,
+    int sample_rate_hz,
+    std::unique_ptr<RenderDelayBuffer> render_buffer,
+    std::unique_ptr<RenderDelayController> delay_controller,
+    std::unique_ptr<EchoRemover> echo_remover)
+    : data_dumper_(
+          new ApmDataDumper(rtc::AtomicOps::Increment(&instance_count_))),
+      config_(config),
+      sample_rate_hz_(sample_rate_hz),
+      render_buffer_(std::move(render_buffer)),
+      delay_controller_(std::move(delay_controller)),
+      echo_remover_(std::move(echo_remover)),
+      render_event_(RenderDelayBuffer::BufferingEvent::kNone) {
+  RTC_DCHECK(ValidFullBandRate(sample_rate_hz_));
+}
+
+BlockProcessorImpl2::~BlockProcessorImpl2() = default;
+
+void BlockProcessorImpl2::ProcessCapture(
+    bool echo_path_gain_change,
+    bool capture_signal_saturation,
+    std::vector<std::vector<float>>* capture_block) {
+  RTC_DCHECK(capture_block);
+  RTC_DCHECK_EQ(NumBandsForRate(sample_rate_hz_), capture_block->size());
+  RTC_DCHECK_EQ(kBlockSize, (*capture_block)[0].size());
+
+  capture_call_counter_++;
+
+  data_dumper_->DumpRaw("aec3_processblock_call_order",
+                        static_cast<int>(BlockProcessorApiCall::kCapture));
+  data_dumper_->DumpWav("aec3_processblock_capture_input", kBlockSize,
+                        &(*capture_block)[0][0],
+                        LowestBandRate(sample_rate_hz_), 1);
+
+  if (render_properly_started_) {
+    if (!capture_properly_started_) {
+      capture_properly_started_ = true;
+      render_buffer_->Reset();
+      delay_controller_->Reset(true);
+    }
+  } else {
+    // If no render data has yet arrived, do not process the capture signal.
+    return;
+  }
+
+  EchoPathVariability echo_path_variability(
+      echo_path_gain_change, EchoPathVariability::DelayAdjustment::kNone,
+      false);
+
+  if (render_event_ == RenderDelayBuffer::BufferingEvent::kRenderOverrun &&
+      render_properly_started_) {
+    echo_path_variability.delay_change =
+        EchoPathVariability::DelayAdjustment::kBufferFlush;
+    delay_controller_->Reset(true);
+    RTC_LOG(LS_WARNING) << "Reset due to render buffer overrun at block  "
+                        << capture_call_counter_;
+  }
+  render_event_ = RenderDelayBuffer::BufferingEvent::kNone;
+
+  // Update the render buffers with any newly arrived render blocks and prepare
+  // the render buffers for reading the render data corresponding to the current
+  // capture block.
+  RenderDelayBuffer::BufferingEvent buffer_event =
+      render_buffer_->PrepareCaptureProcessing();
+  // Reset the delay controller at render buffer underrun.
+  if (buffer_event == RenderDelayBuffer::BufferingEvent::kRenderUnderrun) {
+    delay_controller_->Reset(false);
+  }
+
+  data_dumper_->DumpWav("aec3_processblock_capture_input2", kBlockSize,
+                        &(*capture_block)[0][0],
+                        LowestBandRate(sample_rate_hz_), 1);
+
+  // Compute and and apply the render delay required to achieve proper signal
+  // alignment.
+  estimated_delay_ = delay_controller_->GetDelay(
+      render_buffer_->GetDownsampledRenderBuffer(), render_buffer_->Delay(),
+      echo_remover_delay_, (*capture_block)[0]);
+
+  if (estimated_delay_) {
+    bool delay_change = render_buffer_->SetDelay(estimated_delay_->delay);
+    if (delay_change) {
+      RTC_LOG(LS_WARNING) << "Delay changed to " << estimated_delay_->delay
+                          << " at block " << capture_call_counter_;
+      echo_path_variability.delay_change =
+          EchoPathVariability::DelayAdjustment::kNewDetectedDelay;
+    }
+  }
+
+  // Remove the echo from the capture signal.
+  echo_remover_->ProcessCapture(
+      echo_path_variability, capture_signal_saturation, estimated_delay_,
+      render_buffer_->GetRenderBuffer(), capture_block);
+
+  // Check to see if a refined delay estimate has been obtained from the echo
+  // remover.
+  echo_remover_delay_ = echo_remover_->Delay();
+
+  // Update the metrics.
+  metrics_.UpdateCapture(false);
+}
+
+void BlockProcessorImpl2::BufferRender(
+    const std::vector<std::vector<float>>& block) {
+  RTC_DCHECK_EQ(NumBandsForRate(sample_rate_hz_), block.size());
+  RTC_DCHECK_EQ(kBlockSize, block[0].size());
+  data_dumper_->DumpRaw("aec3_processblock_call_order",
+                        static_cast<int>(BlockProcessorApiCall::kRender));
+  data_dumper_->DumpWav("aec3_processblock_render_input", kBlockSize,
+                        &block[0][0], LowestBandRate(sample_rate_hz_), 1);
+  data_dumper_->DumpWav("aec3_processblock_render_input2", kBlockSize,
+                        &block[0][0], LowestBandRate(sample_rate_hz_), 1);
+
+  render_event_ = render_buffer_->Insert(block);
+
+  metrics_.UpdateRender(render_event_ !=
+                        RenderDelayBuffer::BufferingEvent::kNone);
+
+  render_properly_started_ = true;
+  delay_controller_->LogRenderCall();
+}
+
+void BlockProcessorImpl2::UpdateEchoLeakageStatus(bool leakage_detected) {
+  echo_remover_->UpdateEchoLeakageStatus(leakage_detected);
+}
+
+void BlockProcessorImpl2::GetMetrics(EchoControl::Metrics* metrics) const {
+  echo_remover_->GetMetrics(metrics);
+  const int block_size_ms = sample_rate_hz_ == 8000 ? 8 : 4;
+  absl::optional<size_t> delay = render_buffer_->Delay();
+  metrics->delay_ms = delay ? static_cast<int>(*delay) * block_size_ms : 0;
+}
+
+void BlockProcessorImpl2::SetAudioBufferDelay(size_t delay_ms) {
+  render_buffer_->SetAudioBufferDelay(delay_ms);
+}
+
+}  // namespace
+
+BlockProcessor* BlockProcessor::Create2(const EchoCanceller3Config& config,
+                                        int sample_rate_hz) {
+  std::unique_ptr<RenderDelayBuffer> render_buffer(
+      RenderDelayBuffer::Create2(config, NumBandsForRate(sample_rate_hz)));
+  std::unique_ptr<RenderDelayController> delay_controller(
+      RenderDelayController::Create2(config, sample_rate_hz));
+  std::unique_ptr<EchoRemover> echo_remover(
+      EchoRemover::Create(config, sample_rate_hz));
+  return Create2(config, sample_rate_hz, std::move(render_buffer),
+                 std::move(delay_controller), std::move(echo_remover));
+}
+
+BlockProcessor* BlockProcessor::Create2(
+    const EchoCanceller3Config& config,
+    int sample_rate_hz,
+    std::unique_ptr<RenderDelayBuffer> render_buffer) {
+  std::unique_ptr<RenderDelayController> delay_controller(
+      RenderDelayController::Create2(config, sample_rate_hz));
+  std::unique_ptr<EchoRemover> echo_remover(
+      EchoRemover::Create(config, sample_rate_hz));
+  return Create2(config, sample_rate_hz, std::move(render_buffer),
+                 std::move(delay_controller), std::move(echo_remover));
+}
+
+BlockProcessor* BlockProcessor::Create2(
+    const EchoCanceller3Config& config,
+    int sample_rate_hz,
+    std::unique_ptr<RenderDelayBuffer> render_buffer,
+    std::unique_ptr<RenderDelayController> delay_controller,
+    std::unique_ptr<EchoRemover> echo_remover) {
+  return new BlockProcessorImpl2(
+      config, sample_rate_hz, std::move(render_buffer),
+      std::move(delay_controller), std::move(echo_remover));
+}
+
+}  // namespace webrtc
diff --git a/modules/audio_processing/aec3/block_processor_unittest.cc b/modules/audio_processing/aec3/block_processor_unittest.cc
index 7145786..8aba5b5 100644
--- a/modules/audio_processing/aec3/block_processor_unittest.cc
+++ b/modules/audio_processing/aec3/block_processor_unittest.cc
@@ -37,7 +37,7 @@
 // methods are callable.
 void RunBasicSetupAndApiCallTest(int sample_rate_hz, int num_iterations) {
   std::unique_ptr<BlockProcessor> block_processor(
-      BlockProcessor::Create(EchoCanceller3Config(), sample_rate_hz));
+      BlockProcessor::Create2(EchoCanceller3Config(), sample_rate_hz));
   std::vector<std::vector<float>> block(NumBandsForRate(sample_rate_hz),
                                         std::vector<float>(kBlockSize, 1000.f));
 
@@ -51,7 +51,7 @@
 #if RTC_DCHECK_IS_ON && GTEST_HAS_DEATH_TEST && !defined(WEBRTC_ANDROID)
 void RunRenderBlockSizeVerificationTest(int sample_rate_hz) {
   std::unique_ptr<BlockProcessor> block_processor(
-      BlockProcessor::Create(EchoCanceller3Config(), sample_rate_hz));
+      BlockProcessor::Create2(EchoCanceller3Config(), sample_rate_hz));
   std::vector<std::vector<float>> block(
       NumBandsForRate(sample_rate_hz), std::vector<float>(kBlockSize - 1, 0.f));
 
@@ -60,7 +60,7 @@
 
 void RunCaptureBlockSizeVerificationTest(int sample_rate_hz) {
   std::unique_ptr<BlockProcessor> block_processor(
-      BlockProcessor::Create(EchoCanceller3Config(), sample_rate_hz));
+      BlockProcessor::Create2(EchoCanceller3Config(), sample_rate_hz));
   std::vector<std::vector<float>> block(
       NumBandsForRate(sample_rate_hz), std::vector<float>(kBlockSize - 1, 0.f));
 
@@ -72,7 +72,7 @@
                                      ? NumBandsForRate(sample_rate_hz) + 1
                                      : 1;
   std::unique_ptr<BlockProcessor> block_processor(
-      BlockProcessor::Create(EchoCanceller3Config(), sample_rate_hz));
+      BlockProcessor::Create2(EchoCanceller3Config(), sample_rate_hz));
   std::vector<std::vector<float>> block(wrong_num_bands,
                                         std::vector<float>(kBlockSize, 0.f));
 
@@ -84,7 +84,7 @@
                                      ? NumBandsForRate(sample_rate_hz) + 1
                                      : 1;
   std::unique_ptr<BlockProcessor> block_processor(
-      BlockProcessor::Create(EchoCanceller3Config(), sample_rate_hz));
+      BlockProcessor::Create2(EchoCanceller3Config(), sample_rate_hz));
   std::vector<std::vector<float>> block(wrong_num_bands,
                                         std::vector<float>(kBlockSize, 0.f));
 
@@ -124,7 +124,7 @@
     EXPECT_CALL(*render_delay_buffer_mock, Delay())
         .Times(kNumBlocks + 1)
         .WillRepeatedly(Return(0));
-    std::unique_ptr<BlockProcessor> block_processor(BlockProcessor::Create(
+    std::unique_ptr<BlockProcessor> block_processor(BlockProcessor::Create2(
         EchoCanceller3Config(), rate, std::move(render_delay_buffer_mock)));
 
     std::vector<std::vector<float>> render_block(
@@ -173,7 +173,7 @@
     EXPECT_CALL(*echo_remover_mock, UpdateEchoLeakageStatus(_))
         .Times(kNumBlocks);
 
-    std::unique_ptr<BlockProcessor> block_processor(BlockProcessor::Create(
+    std::unique_ptr<BlockProcessor> block_processor(BlockProcessor::Create2(
         EchoCanceller3Config(), rate, std::move(render_delay_buffer_mock),
         std::move(render_delay_controller_mock), std::move(echo_remover_mock)));
 
@@ -239,7 +239,7 @@
 // Verifiers that the verification for null ProcessCapture input works.
 TEST(BlockProcessor, NullProcessCaptureParameter) {
   EXPECT_DEATH(std::unique_ptr<BlockProcessor>(
-                   BlockProcessor::Create(EchoCanceller3Config(), 8000))
+                   BlockProcessor::Create2(EchoCanceller3Config(), 8000))
                    ->ProcessCapture(false, false, nullptr),
                "");
 }
@@ -249,7 +249,7 @@
 // tests on test bots has been fixed.
 TEST(BlockProcessor, DISABLED_WrongSampleRate) {
   EXPECT_DEATH(std::unique_ptr<BlockProcessor>(
-                   BlockProcessor::Create(EchoCanceller3Config(), 8001)),
+                   BlockProcessor::Create2(EchoCanceller3Config(), 8001)),
                "");
 }
 
diff --git a/modules/audio_processing/aec3/echo_canceller3.cc b/modules/audio_processing/aec3/echo_canceller3.cc
index 940a339..857587a 100644
--- a/modules/audio_processing/aec3/echo_canceller3.cc
+++ b/modules/audio_processing/aec3/echo_canceller3.cc
@@ -90,6 +90,10 @@
   return !field_trial::IsEnabled("WebRTC-Aec3EarlyDelayDetectionKillSwitch");
 }
 
+bool EnableNewRenderBuffering() {
+  return !field_trial::IsEnabled("WebRTC-Aec3NewRenderBufferingKillSwitch");
+}
+
 // Method for adjusting config parameter dependencies..
 EchoCanceller3Config AdjustConfig(const EchoCanceller3Config& config) {
   EchoCanceller3Config adjusted_cfg = config;
@@ -104,6 +108,13 @@
         std::min(adjusted_cfg.delay.num_filters, static_cast<size_t>(5));
   }
 
+  bool use_new_render_buffering =
+      EnableNewRenderBuffering() && config.buffering.use_new_render_buffering;
+  // Old render buffering needs one more filter to cover the same delay.
+  if (!use_new_render_buffering) {
+    adjusted_cfg.delay.num_filters += 1;
+  }
+
   if (EnableReverbBasedOnRender() == false) {
     adjusted_cfg.ep_strength.reverb_based_on_render = false;
   }
@@ -370,12 +381,16 @@
 EchoCanceller3::EchoCanceller3(const EchoCanceller3Config& config,
                                int sample_rate_hz,
                                bool use_highpass_filter)
-    : EchoCanceller3(
-          AdjustConfig(config),
-          sample_rate_hz,
-          use_highpass_filter,
-          std::unique_ptr<BlockProcessor>(
-              BlockProcessor::Create(AdjustConfig(config), sample_rate_hz))) {}
+    : EchoCanceller3(AdjustConfig(config),
+                     sample_rate_hz,
+                     use_highpass_filter,
+                     std::unique_ptr<BlockProcessor>(
+                         EnableNewRenderBuffering() &&
+                                 config.buffering.use_new_render_buffering
+                             ? BlockProcessor::Create2(AdjustConfig(config),
+                                                       sample_rate_hz)
+                             : BlockProcessor::Create(AdjustConfig(config),
+                                                      sample_rate_hz))) {}
 EchoCanceller3::EchoCanceller3(const EchoCanceller3Config& config,
                                int sample_rate_hz,
                                bool use_highpass_filter,
diff --git a/modules/audio_processing/aec3/echo_path_delay_estimator_unittest.cc b/modules/audio_processing/aec3/echo_path_delay_estimator_unittest.cc
index 311a4a2..a4e3133 100644
--- a/modules/audio_processing/aec3/echo_path_delay_estimator_unittest.cc
+++ b/modules/audio_processing/aec3/echo_path_delay_estimator_unittest.cc
@@ -39,7 +39,7 @@
   ApmDataDumper data_dumper(0);
   EchoCanceller3Config config;
   std::unique_ptr<RenderDelayBuffer> render_delay_buffer(
-      RenderDelayBuffer::Create(config, 3));
+      RenderDelayBuffer::Create2(config, 3));
   EchoPathDelayEstimator estimator(&data_dumper, config);
   std::vector<std::vector<float>> render(3, std::vector<float>(kBlockSize));
   std::vector<float> capture(kBlockSize);
@@ -64,12 +64,9 @@
     config.delay.num_filters = 10;
     for (size_t delay_samples : {30, 64, 150, 200, 800, 4000}) {
       SCOPED_TRACE(ProduceDebugText(delay_samples, down_sampling_factor));
-
-      config.delay.api_call_jitter_blocks = 5;
       std::unique_ptr<RenderDelayBuffer> render_delay_buffer(
-          RenderDelayBuffer::Create(config, 3));
-      DelayBuffer<float> signal_delay_buffer(
-          delay_samples + 2 * config.delay.api_call_jitter_blocks * 64);
+          RenderDelayBuffer::Create2(config, 3));
+      DelayBuffer<float> signal_delay_buffer(delay_samples);
       EchoPathDelayEstimator estimator(&data_dumper, config);
 
       absl::optional<DelayEstimate> estimated_delay_samples;
@@ -97,9 +94,7 @@
         // domain.
         size_t delay_ds = delay_samples / down_sampling_factor;
         size_t estimated_delay_ds =
-            (estimated_delay_samples->delay -
-             (config.delay.api_call_jitter_blocks + 1) * 64) /
-            down_sampling_factor;
+            estimated_delay_samples->delay / down_sampling_factor;
         EXPECT_NEAR(delay_ds, estimated_delay_ds, 1);
       } else {
         ADD_FAILURE();
@@ -118,7 +113,7 @@
   ApmDataDumper data_dumper(0);
   EchoPathDelayEstimator estimator(&data_dumper, config);
   std::unique_ptr<RenderDelayBuffer> render_delay_buffer(
-      RenderDelayBuffer::Create(EchoCanceller3Config(), 3));
+      RenderDelayBuffer::Create2(EchoCanceller3Config(), 3));
   for (size_t k = 0; k < 100; ++k) {
     RandomizeSampleVector(&random_generator, render[0]);
     for (auto& render_k : render[0]) {
@@ -142,7 +137,7 @@
   EchoCanceller3Config config;
   EchoPathDelayEstimator estimator(&data_dumper, config);
   std::unique_ptr<RenderDelayBuffer> render_delay_buffer(
-      RenderDelayBuffer::Create(config, 3));
+      RenderDelayBuffer::Create2(config, 3));
   std::vector<float> capture(kBlockSize);
   EXPECT_DEATH(estimator.EstimateDelay(
                    render_delay_buffer->GetDownsampledRenderBuffer(), capture),
@@ -157,7 +152,7 @@
   EchoCanceller3Config config;
   EchoPathDelayEstimator estimator(&data_dumper, config);
   std::unique_ptr<RenderDelayBuffer> render_delay_buffer(
-      RenderDelayBuffer::Create(config, 3));
+      RenderDelayBuffer::Create2(config, 3));
   std::vector<float> capture(std::vector<float>(kBlockSize - 1));
   EXPECT_DEATH(estimator.EstimateDelay(
                    render_delay_buffer->GetDownsampledRenderBuffer(), capture),
diff --git a/modules/audio_processing/aec3/echo_remover_unittest.cc b/modules/audio_processing/aec3/echo_remover_unittest.cc
index da03f4c..8bf76c4 100644
--- a/modules/audio_processing/aec3/echo_remover_unittest.cc
+++ b/modules/audio_processing/aec3/echo_remover_unittest.cc
@@ -48,7 +48,7 @@
     SCOPED_TRACE(ProduceDebugText(rate));
     std::unique_ptr<EchoRemover> remover(
         EchoRemover::Create(EchoCanceller3Config(), rate));
-    std::unique_ptr<RenderDelayBuffer> render_buffer(RenderDelayBuffer::Create(
+    std::unique_ptr<RenderDelayBuffer> render_buffer(RenderDelayBuffer::Create2(
         EchoCanceller3Config(), NumBandsForRate(rate)));
 
     std::vector<std::vector<float>> render(NumBandsForRate(rate),
@@ -89,7 +89,7 @@
     SCOPED_TRACE(ProduceDebugText(rate));
     std::unique_ptr<EchoRemover> remover(
         EchoRemover::Create(EchoCanceller3Config(), rate));
-    std::unique_ptr<RenderDelayBuffer> render_buffer(RenderDelayBuffer::Create(
+    std::unique_ptr<RenderDelayBuffer> render_buffer(RenderDelayBuffer::Create2(
         EchoCanceller3Config(), NumBandsForRate(rate)));
     std::vector<std::vector<float>> capture(
         NumBandsForRate(rate), std::vector<float>(kBlockSize - 1, 0.f));
@@ -111,7 +111,7 @@
     SCOPED_TRACE(ProduceDebugText(rate));
     std::unique_ptr<EchoRemover> remover(
         EchoRemover::Create(EchoCanceller3Config(), rate));
-    std::unique_ptr<RenderDelayBuffer> render_buffer(RenderDelayBuffer::Create(
+    std::unique_ptr<RenderDelayBuffer> render_buffer(RenderDelayBuffer::Create2(
         EchoCanceller3Config(), NumBandsForRate(rate)));
     std::vector<std::vector<float>> capture(
         NumBandsForRate(rate == 48000 ? 16000 : rate + 16000),
@@ -131,7 +131,7 @@
   std::unique_ptr<EchoRemover> remover(
       EchoRemover::Create(EchoCanceller3Config(), 8000));
   std::unique_ptr<RenderDelayBuffer> render_buffer(
-      RenderDelayBuffer::Create(EchoCanceller3Config(), 3));
+      RenderDelayBuffer::Create2(EchoCanceller3Config(), 3));
   EchoPathVariability echo_path_variability(
       false, EchoPathVariability::DelayAdjustment::kNone, false);
   EXPECT_DEATH(
@@ -161,7 +161,7 @@
       config.delay.min_echo_path_delay_blocks = 0;
       std::unique_ptr<EchoRemover> remover(EchoRemover::Create(config, rate));
       std::unique_ptr<RenderDelayBuffer> render_buffer(
-          RenderDelayBuffer::Create(config, NumBandsForRate(rate)));
+          RenderDelayBuffer::Create2(config, NumBandsForRate(rate)));
       render_buffer->SetDelay(delay_samples / kBlockSize);
 
       std::vector<std::unique_ptr<DelayBuffer<float>>> delay_buffers(x.size());
diff --git a/modules/audio_processing/aec3/main_filter_update_gain_unittest.cc b/modules/audio_processing/aec3/main_filter_update_gain_unittest.cc
index 8c44ae0..093b194 100644
--- a/modules/audio_processing/aec3/main_filter_update_gain_unittest.cc
+++ b/modules/audio_processing/aec3/main_filter_update_gain_unittest.cc
@@ -66,7 +66,7 @@
   config.delay.min_echo_path_delay_blocks = 0;
   config.delay.default_delay = 1;
   std::unique_ptr<RenderDelayBuffer> render_delay_buffer(
-      RenderDelayBuffer::Create(config, 3));
+      RenderDelayBuffer::Create2(config, 3));
   AecState aec_state(config);
   RenderSignalAnalyzer render_signal_analyzer(config);
   absl::optional<DelayEstimate> delay_estimate;
diff --git a/modules/audio_processing/aec3/matched_filter_unittest.cc b/modules/audio_processing/aec3/matched_filter_unittest.cc
index 16b603a..0c17118 100644
--- a/modules/audio_processing/aec3/matched_filter_unittest.cc
+++ b/modules/audio_processing/aec3/matched_filter_unittest.cc
@@ -165,7 +165,7 @@
                            config.delay.delay_candidate_detection_threshold);
 
       std::unique_ptr<RenderDelayBuffer> render_delay_buffer(
-          RenderDelayBuffer::Create(config, 3));
+          RenderDelayBuffer::Create2(config, 3));
 
       // Analyze the correlation between render and capture.
       for (size_t k = 0; k < (600 + delay_samples / sub_block_size); ++k) {
@@ -261,7 +261,7 @@
     std::fill(capture.begin(), capture.end(), 0.f);
     ApmDataDumper data_dumper(0);
     std::unique_ptr<RenderDelayBuffer> render_delay_buffer(
-        RenderDelayBuffer::Create(config, 3));
+        RenderDelayBuffer::Create2(config, 3));
     MatchedFilter filter(&data_dumper, DetectOptimization(), sub_block_size,
                          kWindowSizeSubBlocks, kNumMatchedFilters,
                          kAlignmentShiftSubBlocks, 150,
@@ -306,7 +306,7 @@
                          config.delay.delay_estimate_smoothing,
                          config.delay.delay_candidate_detection_threshold);
     std::unique_ptr<RenderDelayBuffer> render_delay_buffer(
-        RenderDelayBuffer::Create(config, 3));
+        RenderDelayBuffer::Create2(EchoCanceller3Config(), 3));
     Decimator capture_decimator(down_sampling_factor);
 
     // Analyze the correlation between render and capture.
diff --git a/modules/audio_processing/aec3/render_delay_buffer.h b/modules/audio_processing/aec3/render_delay_buffer.h
index a6d6874..8c5667e 100644
--- a/modules/audio_processing/aec3/render_delay_buffer.h
+++ b/modules/audio_processing/aec3/render_delay_buffer.h
@@ -33,12 +33,13 @@
     kNone,
     kRenderUnderrun,
     kRenderOverrun,
-    kApiCallSkew,
-    kRenderDataLost
+    kApiCallSkew
   };
 
   static RenderDelayBuffer* Create(const EchoCanceller3Config& config,
                                    size_t num_bands);
+  static RenderDelayBuffer* Create2(const EchoCanceller3Config& config,
+                                    size_t num_bands);
   virtual ~RenderDelayBuffer() = default;
 
   // Resets the buffer alignment.
diff --git a/modules/audio_processing/aec3/render_delay_buffer2.cc b/modules/audio_processing/aec3/render_delay_buffer2.cc
new file mode 100644
index 0000000..6992c5b
--- /dev/null
+++ b/modules/audio_processing/aec3/render_delay_buffer2.cc
@@ -0,0 +1,453 @@
+/*
+ *  Copyright (c) 2018 The WebRTC project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include <string.h>
+#include <algorithm>
+#include <memory>
+#include <numeric>
+#include <vector>
+
+#include "absl/types/optional.h"
+#include "api/array_view.h"
+#include "api/audio/echo_canceller3_config.h"
+#include "modules/audio_processing/aec3/aec3_common.h"
+#include "modules/audio_processing/aec3/aec3_fft.h"
+#include "modules/audio_processing/aec3/decimator.h"
+#include "modules/audio_processing/aec3/downsampled_render_buffer.h"
+#include "modules/audio_processing/aec3/fft_buffer.h"
+#include "modules/audio_processing/aec3/fft_data.h"
+#include "modules/audio_processing/aec3/matrix_buffer.h"
+#include "modules/audio_processing/aec3/render_buffer.h"
+#include "modules/audio_processing/aec3/render_delay_buffer.h"
+#include "modules/audio_processing/aec3/vector_buffer.h"
+#include "modules/audio_processing/logging/apm_data_dumper.h"
+#include "rtc_base/atomicops.h"
+#include "rtc_base/checks.h"
+#include "rtc_base/logging.h"
+
+namespace webrtc {
+namespace {
+
+class RenderDelayBufferImpl2 final : public RenderDelayBuffer {
+ public:
+  RenderDelayBufferImpl2(const EchoCanceller3Config& config, size_t num_bands);
+  RenderDelayBufferImpl2() = delete;
+  ~RenderDelayBufferImpl2() override;
+
+  void Reset() override;
+  BufferingEvent Insert(const std::vector<std::vector<float>>& block) override;
+  BufferingEvent PrepareCaptureProcessing() override;
+  bool SetDelay(size_t delay) override;
+  size_t Delay() const override { return ComputeDelay(); }
+  size_t MaxDelay() const override {
+    return blocks_.buffer.size() - 1 - buffer_headroom_;
+  }
+  RenderBuffer* GetRenderBuffer() override { return &echo_remover_buffer_; }
+
+  const DownsampledRenderBuffer& GetDownsampledRenderBuffer() const override {
+    return low_rate_;
+  }
+
+  int BufferLatency() const;
+  bool CausalDelay(size_t delay) const override;
+  void SetAudioBufferDelay(size_t delay_ms) override;
+
+ private:
+  static int instance_count_;
+  std::unique_ptr<ApmDataDumper> data_dumper_;
+  const Aec3Optimization optimization_;
+  const EchoCanceller3Config config_;
+  size_t down_sampling_factor_;
+  const int sub_block_size_;
+  MatrixBuffer blocks_;
+  VectorBuffer spectra_;
+  FftBuffer ffts_;
+  absl::optional<size_t> delay_;
+  RenderBuffer echo_remover_buffer_;
+  DownsampledRenderBuffer low_rate_;
+  Decimator render_decimator_;
+  const Aec3Fft fft_;
+  std::vector<float> render_ds_;
+  const int buffer_headroom_;
+  bool last_call_was_render_ = false;
+  int num_api_calls_in_a_row_ = 0;
+  int max_observed_jitter_ = 1;
+  size_t capture_call_counter_ = 0;
+  size_t render_call_counter_ = 0;
+  bool render_activity_ = false;
+  size_t render_activity_counter_ = 0;
+  absl::optional<size_t> external_audio_buffer_delay_;
+  bool external_audio_buffer_delay_verified_after_reset_ = false;
+  size_t min_latency_blocks_ = 0;
+  size_t excess_render_detection_counter_ = 0;
+  size_t num_bands_;
+
+  int MapDelayToTotalDelay(size_t delay) const;
+  int ComputeDelay() const;
+  void ApplyTotalDelay(int delay);
+  void InsertBlock(const std::vector<std::vector<float>>& block,
+                   int previous_write);
+  bool DetectActiveRender(rtc::ArrayView<const float> x) const;
+  bool DetectExcessRenderBlocks();
+  void IncrementWriteIndices();
+  void IncrementLowRateReadIndices();
+  void IncrementReadIndices();
+  bool RenderOverrun();
+  bool RenderUnderrun();
+};
+
+int RenderDelayBufferImpl2::instance_count_ = 0;
+
+RenderDelayBufferImpl2::RenderDelayBufferImpl2(
+    const EchoCanceller3Config& config,
+    size_t num_bands)
+    : data_dumper_(
+          new ApmDataDumper(rtc::AtomicOps::Increment(&instance_count_))),
+      optimization_(DetectOptimization()),
+      config_(config),
+      down_sampling_factor_(config.delay.down_sampling_factor),
+      sub_block_size_(static_cast<int>(down_sampling_factor_ > 0
+                                           ? kBlockSize / down_sampling_factor_
+                                           : kBlockSize)),
+      blocks_(GetRenderDelayBufferSize(down_sampling_factor_,
+                                       config.delay.num_filters,
+                                       config.filter.main.length_blocks),
+              num_bands,
+              kBlockSize),
+      spectra_(blocks_.buffer.size(), kFftLengthBy2Plus1),
+      ffts_(blocks_.buffer.size()),
+      delay_(config_.delay.default_delay),
+      echo_remover_buffer_(&blocks_, &spectra_, &ffts_),
+      low_rate_(GetDownSampledBufferSize(down_sampling_factor_,
+                                         config.delay.num_filters)),
+      render_decimator_(down_sampling_factor_),
+      fft_(),
+      render_ds_(sub_block_size_, 0.f),
+      buffer_headroom_(config.filter.main.length_blocks),
+      num_bands_(num_bands) {
+  RTC_DCHECK_EQ(blocks_.buffer.size(), ffts_.buffer.size());
+  RTC_DCHECK_EQ(spectra_.buffer.size(), ffts_.buffer.size());
+
+  Reset();
+}
+
+RenderDelayBufferImpl2::~RenderDelayBufferImpl2() = default;
+
+// Resets the buffer delays and clears the reported delays.
+void RenderDelayBufferImpl2::Reset() {
+  last_call_was_render_ = false;
+  num_api_calls_in_a_row_ = 1;
+  min_latency_blocks_ = 0;
+  excess_render_detection_counter_ = 0;
+
+  // Initialize the read index to one sub-block before the write index.
+  low_rate_.read = low_rate_.OffsetIndex(low_rate_.write, sub_block_size_);
+
+  // Check for any external audio buffer delay and whether it is feasible.
+  if (external_audio_buffer_delay_) {
+    const size_t headroom = 2;
+    size_t audio_buffer_delay_to_set;
+    // Minimum delay is 1 (like the low-rate render buffer).
+    if (*external_audio_buffer_delay_ <= headroom) {
+      audio_buffer_delay_to_set = 1;
+    } else {
+      audio_buffer_delay_to_set = *external_audio_buffer_delay_ - headroom;
+    }
+
+    audio_buffer_delay_to_set = std::min(audio_buffer_delay_to_set, MaxDelay());
+
+    // When an external delay estimate is available, use that delay as the
+    // initial render buffer delay.
+    ApplyTotalDelay(audio_buffer_delay_to_set);
+    delay_ = ComputeDelay();
+
+    external_audio_buffer_delay_verified_after_reset_ = false;
+  } else {
+    // If an external delay estimate is not available, use that delay as the
+    // initial delay. Set the render buffer delays to the default delay.
+    ApplyTotalDelay(config_.delay.default_delay);
+
+    // Unset the delays which are set by SetDelay.
+    delay_ = absl::nullopt;
+  }
+}
+
+// Inserts a new block into the render buffers.
+RenderDelayBuffer::BufferingEvent RenderDelayBufferImpl2::Insert(
+    const std::vector<std::vector<float>>& block) {
+  ++render_call_counter_;
+  if (delay_) {
+    if (!last_call_was_render_) {
+      last_call_was_render_ = true;
+      num_api_calls_in_a_row_ = 1;
+    } else {
+      if (++num_api_calls_in_a_row_ > max_observed_jitter_) {
+        max_observed_jitter_ = num_api_calls_in_a_row_;
+        RTC_LOG(LS_WARNING)
+            << "New max number api jitter observed at render block "
+            << render_call_counter_ << ":  " << num_api_calls_in_a_row_
+            << " blocks";
+      }
+    }
+  }
+
+  // Increase the write indices to where the new blocks should be written.
+  const int previous_write = blocks_.write;
+  IncrementWriteIndices();
+
+  // Allow overrun and do a reset when render overrun occurrs due to more render
+  // data being inserted than capture data is received.
+  BufferingEvent event =
+      RenderOverrun() ? BufferingEvent::kRenderOverrun : BufferingEvent::kNone;
+
+  // Detect and update render activity.
+  if (!render_activity_) {
+    render_activity_counter_ += DetectActiveRender(block[0]) ? 1 : 0;
+    render_activity_ = render_activity_counter_ >= 20;
+  }
+
+  // Insert the new render block into the specified position.
+  InsertBlock(block, previous_write);
+
+  if (event != BufferingEvent::kNone) {
+    Reset();
+  }
+
+  return event;
+}
+
+// Prepares the render buffers for processing another capture block.
+RenderDelayBuffer::BufferingEvent
+RenderDelayBufferImpl2::PrepareCaptureProcessing() {
+  RenderDelayBuffer::BufferingEvent event = BufferingEvent::kNone;
+  ++capture_call_counter_;
+
+  if (delay_) {
+    if (last_call_was_render_) {
+      last_call_was_render_ = false;
+      num_api_calls_in_a_row_ = 1;
+    } else {
+      if (++num_api_calls_in_a_row_ > max_observed_jitter_) {
+        max_observed_jitter_ = num_api_calls_in_a_row_;
+        RTC_LOG(LS_WARNING)
+            << "New max number api jitter observed at capture block "
+            << capture_call_counter_ << ":  " << num_api_calls_in_a_row_
+            << " blocks";
+      }
+    }
+  }
+
+  if (DetectExcessRenderBlocks()) {
+    // Too many render blocks compared to capture blocks. Risk of delay ending
+    // up before the filter used by the delay estimator.
+    RTC_LOG(LS_WARNING) << "Excess render blocks detected at block "
+                        << capture_call_counter_;
+    Reset();
+    event = BufferingEvent::kRenderOverrun;
+  } else if (RenderUnderrun()) {
+    // Don't increment the read indices of the low rate buffer if there is a
+    // render underrun.
+    RTC_LOG(LS_WARNING) << "Render buffer underrun detected at block "
+                        << capture_call_counter_;
+    IncrementReadIndices();
+    // Incrementing the buffer index without increasing the low rate buffer
+    // index means that the delay is reduced by one.
+    if (delay_ && *delay_ > 0)
+      delay_ = *delay_ - 1;
+    event = BufferingEvent::kRenderUnderrun;
+  } else {
+    // Increment the read indices in the render buffers to point to the most
+    // recent block to use in the capture processing.
+    IncrementLowRateReadIndices();
+    IncrementReadIndices();
+  }
+
+  echo_remover_buffer_.SetRenderActivity(render_activity_);
+  if (render_activity_) {
+    render_activity_counter_ = 0;
+    render_activity_ = false;
+  }
+
+  return event;
+}
+
+// Sets the delay and returns a bool indicating whether the delay was changed.
+bool RenderDelayBufferImpl2::SetDelay(size_t delay) {
+  if (!external_audio_buffer_delay_verified_after_reset_ &&
+      external_audio_buffer_delay_ && delay_) {
+    int difference = static_cast<int>(delay) - static_cast<int>(*delay_);
+    RTC_LOG(LS_WARNING) << "Mismatch between first estimated delay after reset "
+                           "and externally reported audio buffer delay: "
+                        << difference << " blocks";
+    external_audio_buffer_delay_verified_after_reset_ = true;
+  }
+  if (delay_ && *delay_ == delay) {
+    return false;
+  }
+  delay_ = delay;
+
+  // Compute the total delay and limit the delay to the allowed range.
+  int total_delay = MapDelayToTotalDelay(*delay_);
+  total_delay =
+      std::min(MaxDelay(), static_cast<size_t>(std::max(total_delay, 0)));
+
+  // Apply the delay to the buffers.
+  ApplyTotalDelay(total_delay);
+  return true;
+}
+
+// Returns whether the specified delay is causal.
+bool RenderDelayBufferImpl2::CausalDelay(size_t delay) const {
+  // TODO(gustaf): Remove this from RenderDelayBuffer.
+  return true;
+}
+
+void RenderDelayBufferImpl2::SetAudioBufferDelay(size_t delay_ms) {
+  if (!external_audio_buffer_delay_) {
+    RTC_LOG(LS_WARNING)
+        << "Receiving a first externally reported audio buffer delay of "
+        << delay_ms << " ms.";
+  }
+
+  // Convert delay from milliseconds to blocks (rounded down).
+  external_audio_buffer_delay_ = delay_ms >> ((num_bands_ == 1) ? 1 : 2);
+}
+
+// Maps the externally computed delay to the delay used internally.
+int RenderDelayBufferImpl2::MapDelayToTotalDelay(
+    size_t external_delay_blocks) const {
+  const int latency_blocks = BufferLatency();
+  return latency_blocks + static_cast<int>(external_delay_blocks);
+}
+
+// Returns the delay (not including call jitter).
+int RenderDelayBufferImpl2::ComputeDelay() const {
+  const int latency_blocks = BufferLatency();
+  int internal_delay = spectra_.read >= spectra_.write
+                           ? spectra_.read - spectra_.write
+                           : spectra_.size + spectra_.read - spectra_.write;
+
+  return internal_delay - latency_blocks;
+}
+
+// Set the read indices according to the delay.
+void RenderDelayBufferImpl2::ApplyTotalDelay(int delay) {
+  RTC_LOG(LS_WARNING) << "Applying total delay of " << delay << " blocks.";
+  blocks_.read = blocks_.OffsetIndex(blocks_.write, -delay);
+  spectra_.read = spectra_.OffsetIndex(spectra_.write, delay);
+  ffts_.read = ffts_.OffsetIndex(ffts_.write, delay);
+}
+
+// Inserts a block into the render buffers.
+void RenderDelayBufferImpl2::InsertBlock(
+    const std::vector<std::vector<float>>& block,
+    int previous_write) {
+  auto& b = blocks_;
+  auto& lr = low_rate_;
+  auto& ds = render_ds_;
+  auto& f = ffts_;
+  auto& s = spectra_;
+  RTC_DCHECK_EQ(block.size(), b.buffer[b.write].size());
+  for (size_t k = 0; k < block.size(); ++k) {
+    RTC_DCHECK_EQ(block[k].size(), b.buffer[b.write][k].size());
+    std::copy(block[k].begin(), block[k].end(), b.buffer[b.write][k].begin());
+  }
+
+  data_dumper_->DumpWav("aec3_render_decimator_input", block[0].size(),
+                        block[0].data(), 16000, 1);
+  render_decimator_.Decimate(block[0], ds);
+  data_dumper_->DumpWav("aec3_render_decimator_output", ds.size(), ds.data(),
+                        16000 / down_sampling_factor_, 1);
+  std::copy(ds.rbegin(), ds.rend(), lr.buffer.begin() + lr.write);
+  fft_.PaddedFft(block[0], b.buffer[previous_write][0], &f.buffer[f.write]);
+  f.buffer[f.write].Spectrum(optimization_, s.buffer[s.write]);
+}
+
+bool RenderDelayBufferImpl2::DetectActiveRender(
+    rtc::ArrayView<const float> x) const {
+  const float x_energy = std::inner_product(x.begin(), x.end(), x.begin(), 0.f);
+  return x_energy > (config_.render_levels.active_render_limit *
+                     config_.render_levels.active_render_limit) *
+                        kFftLengthBy2;
+}
+
+bool RenderDelayBufferImpl2::DetectExcessRenderBlocks() {
+  bool excess_render_detected = false;
+  const size_t latency_blocks = static_cast<size_t>(BufferLatency());
+  // The recently seen minimum latency in blocks. Should be close to 0.
+  min_latency_blocks_ = std::min(min_latency_blocks_, latency_blocks);
+  // After processing a configurable number of blocks the minimum latency is
+  // checked.
+  if (++excess_render_detection_counter_ >=
+      config_.buffering.excess_render_detection_interval_blocks) {
+    // If the minimum latency is not lower than the threshold there have been
+    // more render than capture frames.
+    excess_render_detected = min_latency_blocks_ >
+                             config_.buffering.max_allowed_excess_render_blocks;
+    // Reset the counter and let the minimum latency be the current latency.
+    min_latency_blocks_ = latency_blocks;
+    excess_render_detection_counter_ = 0;
+  }
+
+  data_dumper_->DumpRaw("aec3_latency_blocks", latency_blocks);
+  data_dumper_->DumpRaw("aec3_min_latency_blocks", min_latency_blocks_);
+  data_dumper_->DumpRaw("aec3_excess_render_detected", excess_render_detected);
+  return excess_render_detected;
+}
+
+// Computes the latency in the buffer (the number of unread sub-blocks).
+int RenderDelayBufferImpl2::BufferLatency() const {
+  const DownsampledRenderBuffer& l = low_rate_;
+  int latency_samples = (l.buffer.size() + l.read - l.write) % l.buffer.size();
+  int latency_blocks = latency_samples / sub_block_size_;
+  return latency_blocks;
+}
+
+// Increments the write indices for the render buffers.
+void RenderDelayBufferImpl2::IncrementWriteIndices() {
+  low_rate_.UpdateWriteIndex(-sub_block_size_);
+  blocks_.IncWriteIndex();
+  spectra_.DecWriteIndex();
+  ffts_.DecWriteIndex();
+}
+
+// Increments the read indices of the low rate render buffers.
+void RenderDelayBufferImpl2::IncrementLowRateReadIndices() {
+  low_rate_.UpdateReadIndex(-sub_block_size_);
+}
+
+// Increments the read indices for the render buffers.
+void RenderDelayBufferImpl2::IncrementReadIndices() {
+  if (blocks_.read != blocks_.write) {
+    blocks_.IncReadIndex();
+    spectra_.DecReadIndex();
+    ffts_.DecReadIndex();
+  }
+}
+
+// Checks for a render buffer overrun.
+bool RenderDelayBufferImpl2::RenderOverrun() {
+  return low_rate_.read == low_rate_.write || blocks_.read == blocks_.write;
+}
+
+// Checks for a render buffer underrun.
+bool RenderDelayBufferImpl2::RenderUnderrun() {
+  return low_rate_.read == low_rate_.write;
+}
+
+}  // namespace
+
+RenderDelayBuffer* RenderDelayBuffer::Create2(
+    const EchoCanceller3Config& config,
+    size_t num_bands) {
+  return new RenderDelayBufferImpl2(config, num_bands);
+}
+
+}  // namespace webrtc
diff --git a/modules/audio_processing/aec3/render_delay_buffer_unittest.cc b/modules/audio_processing/aec3/render_delay_buffer_unittest.cc
index ee89597..d1530c6 100644
--- a/modules/audio_processing/aec3/render_delay_buffer_unittest.cc
+++ b/modules/audio_processing/aec3/render_delay_buffer_unittest.cc
@@ -38,7 +38,7 @@
   for (auto rate : {8000, 16000, 32000, 48000}) {
     SCOPED_TRACE(ProduceDebugText(rate));
     std::unique_ptr<RenderDelayBuffer> delay_buffer(
-        RenderDelayBuffer::Create(config, NumBandsForRate(rate)));
+        RenderDelayBuffer::Create2(config, NumBandsForRate(rate)));
     std::vector<std::vector<float>> block_to_insert(
         NumBandsForRate(rate), std::vector<float>(kBlockSize, 0.f));
     for (size_t k = 0; k < 10; ++k) {
@@ -62,7 +62,7 @@
 TEST(RenderDelayBuffer, AvailableBlock) {
   constexpr size_t kNumBands = 1;
   std::unique_ptr<RenderDelayBuffer> delay_buffer(
-      RenderDelayBuffer::Create(EchoCanceller3Config(), kNumBands));
+      RenderDelayBuffer::Create2(EchoCanceller3Config(), kNumBands));
   std::vector<std::vector<float>> input_block(
       kNumBands, std::vector<float>(kBlockSize, 1.f));
   EXPECT_EQ(RenderDelayBuffer::BufferingEvent::kNone,
@@ -74,7 +74,7 @@
 TEST(RenderDelayBuffer, SetDelay) {
   EchoCanceller3Config config;
   std::unique_ptr<RenderDelayBuffer> delay_buffer(
-      RenderDelayBuffer::Create(config, 1));
+      RenderDelayBuffer::Create2(config, 1));
   ASSERT_TRUE(delay_buffer->Delay());
   delay_buffer->Reset();
   size_t initial_internal_delay = config.delay.min_echo_path_delay_blocks +
@@ -93,7 +93,7 @@
 // tests on test bots has been fixed.
 TEST(RenderDelayBuffer, DISABLED_WrongDelay) {
   std::unique_ptr<RenderDelayBuffer> delay_buffer(
-      RenderDelayBuffer::Create(EchoCanceller3Config(), 3));
+      RenderDelayBuffer::Create2(EchoCanceller3Config(), 3));
   EXPECT_DEATH(delay_buffer->SetDelay(21), "");
 }
 
@@ -101,7 +101,7 @@
 TEST(RenderDelayBuffer, WrongNumberOfBands) {
   for (auto rate : {16000, 32000, 48000}) {
     SCOPED_TRACE(ProduceDebugText(rate));
-    std::unique_ptr<RenderDelayBuffer> delay_buffer(RenderDelayBuffer::Create(
+    std::unique_ptr<RenderDelayBuffer> delay_buffer(RenderDelayBuffer::Create2(
         EchoCanceller3Config(), NumBandsForRate(rate)));
     std::vector<std::vector<float>> block_to_insert(
         NumBandsForRate(rate < 48000 ? rate + 16000 : 16000),
@@ -115,7 +115,7 @@
   for (auto rate : {8000, 16000, 32000, 48000}) {
     SCOPED_TRACE(ProduceDebugText(rate));
     std::unique_ptr<RenderDelayBuffer> delay_buffer(
-        RenderDelayBuffer::Create(EchoCanceller3Config(), 3));
+        RenderDelayBuffer::Create2(EchoCanceller3Config(), 3));
     std::vector<std::vector<float>> block_to_insert(
         NumBandsForRate(rate), std::vector<float>(kBlockSize - 1, 0.f));
     EXPECT_DEATH(delay_buffer->Insert(block_to_insert), "");
diff --git a/modules/audio_processing/aec3/render_delay_controller.h b/modules/audio_processing/aec3/render_delay_controller.h
index 06cb12c..41ba422 100644
--- a/modules/audio_processing/aec3/render_delay_controller.h
+++ b/modules/audio_processing/aec3/render_delay_controller.h
@@ -27,6 +27,8 @@
   static RenderDelayController* Create(const EchoCanceller3Config& config,
                                        int non_causal_offset,
                                        int sample_rate_hz);
+  static RenderDelayController* Create2(const EchoCanceller3Config& config,
+                                        int sample_rate_hz);
   virtual ~RenderDelayController() = default;
 
   // Resets the delay controller. If the delay confidence is reset, the reset
diff --git a/modules/audio_processing/aec3/render_delay_controller2.cc b/modules/audio_processing/aec3/render_delay_controller2.cc
new file mode 100644
index 0000000..e27d5f3
--- /dev/null
+++ b/modules/audio_processing/aec3/render_delay_controller2.cc
@@ -0,0 +1,213 @@
+/*
+ *  Copyright (c) 2018 The WebRTC project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+#include <stddef.h>
+#include <algorithm>
+#include <memory>
+
+#include "absl/types/optional.h"
+#include "api/array_view.h"
+#include "api/audio/echo_canceller3_config.h"
+#include "modules/audio_processing/aec3/aec3_common.h"
+#include "modules/audio_processing/aec3/delay_estimate.h"
+#include "modules/audio_processing/aec3/downsampled_render_buffer.h"
+#include "modules/audio_processing/aec3/echo_path_delay_estimator.h"
+#include "modules/audio_processing/aec3/render_delay_controller.h"
+#include "modules/audio_processing/aec3/render_delay_controller_metrics.h"
+#include "modules/audio_processing/logging/apm_data_dumper.h"
+#include "rtc_base/atomicops.h"
+#include "rtc_base/checks.h"
+#include "rtc_base/constructormagic.h"
+#include "system_wrappers/include/field_trial.h"
+
+namespace webrtc {
+
+namespace {
+
+bool UseEarlyDelayDetection() {
+  return !field_trial::IsEnabled("WebRTC-Aec3EarlyDelayDetectionKillSwitch");
+}
+
+class RenderDelayControllerImpl2 final : public RenderDelayController {
+ public:
+  RenderDelayControllerImpl2(const EchoCanceller3Config& config,
+                             int sample_rate_hz);
+  ~RenderDelayControllerImpl2() override;
+  void Reset(bool reset_delay_confidence) override;
+  void LogRenderCall() override;
+  absl::optional<DelayEstimate> GetDelay(
+      const DownsampledRenderBuffer& render_buffer,
+      size_t render_delay_buffer_delay,
+      const absl::optional<int>& echo_remover_delay,
+      rtc::ArrayView<const float> capture) override;
+
+ private:
+  static int instance_count_;
+  std::unique_ptr<ApmDataDumper> data_dumper_;
+  const bool use_early_delay_detection_;
+  const int delay_headroom_blocks_;
+  const int hysteresis_limit_1_blocks_;
+  const int hysteresis_limit_2_blocks_;
+  absl::optional<DelayEstimate> delay_;
+  EchoPathDelayEstimator delay_estimator_;
+  RenderDelayControllerMetrics metrics_;
+  absl::optional<DelayEstimate> delay_samples_;
+  size_t capture_call_counter_ = 0;
+  int delay_change_counter_ = 0;
+  DelayEstimate::Quality last_delay_estimate_quality_;
+  RTC_DISALLOW_IMPLICIT_CONSTRUCTORS(RenderDelayControllerImpl2);
+};
+
+DelayEstimate ComputeBufferDelay(
+    const absl::optional<DelayEstimate>& current_delay,
+    int delay_headroom_blocks,
+    int hysteresis_limit_1_blocks,
+    int hysteresis_limit_2_blocks,
+    DelayEstimate estimated_delay) {
+  // The below division is not exact and the truncation is intended.
+  const int echo_path_delay_blocks = estimated_delay.delay >> kBlockSizeLog2;
+
+  // Compute the buffer delay increase required to achieve the desired latency.
+  size_t new_delay_blocks =
+      std::max(echo_path_delay_blocks - delay_headroom_blocks, 0);
+
+  // Add hysteresis.
+  if (current_delay) {
+    size_t current_delay_blocks = current_delay->delay;
+    if (new_delay_blocks > current_delay_blocks) {
+      if (new_delay_blocks <=
+          current_delay_blocks + hysteresis_limit_1_blocks) {
+        new_delay_blocks = current_delay_blocks;
+      }
+    } else if (new_delay_blocks < current_delay_blocks) {
+      size_t hysteresis_limit = std::max(
+          static_cast<int>(current_delay_blocks) - hysteresis_limit_2_blocks,
+          0);
+      if (new_delay_blocks >= hysteresis_limit) {
+        new_delay_blocks = current_delay_blocks;
+      }
+    }
+  }
+
+  DelayEstimate new_delay = estimated_delay;
+  new_delay.delay = new_delay_blocks;
+  return new_delay;
+}
+
+int RenderDelayControllerImpl2::instance_count_ = 0;
+
+RenderDelayControllerImpl2::RenderDelayControllerImpl2(
+    const EchoCanceller3Config& config,
+    int sample_rate_hz)
+    : data_dumper_(
+          new ApmDataDumper(rtc::AtomicOps::Increment(&instance_count_))),
+      use_early_delay_detection_(UseEarlyDelayDetection()),
+      delay_headroom_blocks_(
+          static_cast<int>(config.delay.delay_headroom_blocks)),
+      hysteresis_limit_1_blocks_(
+          static_cast<int>(config.delay.hysteresis_limit_1_blocks)),
+      hysteresis_limit_2_blocks_(
+          static_cast<int>(config.delay.hysteresis_limit_2_blocks)),
+      delay_estimator_(data_dumper_.get(), config),
+      last_delay_estimate_quality_(DelayEstimate::Quality::kCoarse) {
+  RTC_DCHECK(ValidFullBandRate(sample_rate_hz));
+  delay_estimator_.LogDelayEstimationProperties(sample_rate_hz, 0);
+}
+
+RenderDelayControllerImpl2::~RenderDelayControllerImpl2() = default;
+
+void RenderDelayControllerImpl2::Reset(bool reset_delay_confidence) {
+  delay_ = absl::nullopt;
+  delay_samples_ = absl::nullopt;
+  delay_estimator_.Reset(reset_delay_confidence);
+  delay_change_counter_ = 0;
+  if (reset_delay_confidence) {
+    last_delay_estimate_quality_ = DelayEstimate::Quality::kCoarse;
+  }
+}
+
+void RenderDelayControllerImpl2::LogRenderCall() {}
+
+absl::optional<DelayEstimate> RenderDelayControllerImpl2::GetDelay(
+    const DownsampledRenderBuffer& render_buffer,
+    size_t render_delay_buffer_delay,
+    const absl::optional<int>& echo_remover_delay,
+    rtc::ArrayView<const float> capture) {
+  RTC_DCHECK_EQ(kBlockSize, capture.size());
+  ++capture_call_counter_;
+
+  auto delay_samples = delay_estimator_.EstimateDelay(render_buffer, capture);
+
+  // Overrule the delay estimator delay if the echo remover reports a delay.
+  if (echo_remover_delay) {
+    int total_echo_remover_delay_samples =
+        (render_delay_buffer_delay + *echo_remover_delay) * kBlockSize;
+    delay_samples = DelayEstimate(DelayEstimate::Quality::kRefined,
+                                  total_echo_remover_delay_samples);
+  }
+
+  if (delay_samples) {
+    if (!delay_samples_ || delay_samples->delay != delay_samples_->delay) {
+      delay_change_counter_ = 0;
+    }
+    if (delay_samples_) {
+      delay_samples_->blocks_since_last_change =
+          delay_samples_->delay == delay_samples->delay
+              ? delay_samples_->blocks_since_last_change + 1
+              : 0;
+      delay_samples_->blocks_since_last_update = 0;
+      delay_samples_->delay = delay_samples->delay;
+      delay_samples_->quality = delay_samples->quality;
+    } else {
+      delay_samples_ = delay_samples;
+    }
+  } else {
+    if (delay_samples_) {
+      ++delay_samples_->blocks_since_last_change;
+      ++delay_samples_->blocks_since_last_update;
+    }
+  }
+
+  if (delay_change_counter_ < 2 * kNumBlocksPerSecond) {
+    ++delay_change_counter_;
+  }
+
+  if (delay_samples_) {
+    // Compute the render delay buffer delay.
+    const bool use_hysteresis =
+        last_delay_estimate_quality_ == DelayEstimate::Quality::kRefined &&
+        delay_samples_->quality == DelayEstimate::Quality::kRefined;
+    delay_ = ComputeBufferDelay(delay_, delay_headroom_blocks_,
+                                use_hysteresis ? hysteresis_limit_1_blocks_ : 0,
+                                use_hysteresis ? hysteresis_limit_2_blocks_ : 0,
+                                *delay_samples_);
+    last_delay_estimate_quality_ = delay_samples_->quality;
+  }
+
+  metrics_.Update(delay_samples_ ? absl::optional<size_t>(delay_samples_->delay)
+                                 : absl::nullopt,
+                  delay_ ? delay_->delay : 0, 0);
+
+  data_dumper_->DumpRaw("aec3_render_delay_controller_delay",
+                        delay_samples ? delay_samples->delay : 0);
+  data_dumper_->DumpRaw("aec3_render_delay_controller_buffer_delay",
+                        delay_ ? delay_->delay : 0);
+
+  return delay_;
+}
+
+}  // namespace
+
+RenderDelayController* RenderDelayController::Create2(
+    const EchoCanceller3Config& config,
+    int sample_rate_hz) {
+  return new RenderDelayControllerImpl2(config, sample_rate_hz);
+}
+
+}  // namespace webrtc
diff --git a/modules/audio_processing/aec3/render_delay_controller_unittest.cc b/modules/audio_processing/aec3/render_delay_controller_unittest.cc
index 93c6499..e9f02d3 100644
--- a/modules/audio_processing/aec3/render_delay_controller_unittest.cc
+++ b/modules/audio_processing/aec3/render_delay_controller_unittest.cc
@@ -57,10 +57,9 @@
       for (auto rate : {8000, 16000, 32000, 48000}) {
         SCOPED_TRACE(ProduceDebugText(rate));
         std::unique_ptr<RenderDelayBuffer> delay_buffer(
-            RenderDelayBuffer::Create(config, NumBandsForRate(rate)));
+            RenderDelayBuffer::Create2(config, NumBandsForRate(rate)));
         std::unique_ptr<RenderDelayController> delay_controller(
-            RenderDelayController::Create(
-                config, RenderDelayBuffer::DelayEstimatorOffset(config), rate));
+            RenderDelayController::Create2(config, rate));
         for (size_t k = 0; k < 100; ++k) {
           auto delay = delay_controller->GetDelay(
               delay_buffer->GetDownsampledRenderBuffer(), delay_buffer->Delay(),
@@ -87,11 +86,9 @@
         std::vector<std::vector<float>> render_block(
             NumBandsForRate(rate), std::vector<float>(kBlockSize, 0.f));
         std::unique_ptr<RenderDelayBuffer> render_delay_buffer(
-            RenderDelayBuffer::Create(config, NumBandsForRate(rate)));
+            RenderDelayBuffer::Create2(config, NumBandsForRate(rate)));
         std::unique_ptr<RenderDelayController> delay_controller(
-            RenderDelayController::Create(
-                EchoCanceller3Config(),
-                RenderDelayBuffer::DelayEstimatorOffset(config), rate));
+            RenderDelayController::Create2(EchoCanceller3Config(), rate));
         for (size_t k = 0; k < 10; ++k) {
           render_delay_buffer->Insert(render_block);
           render_delay_buffer->PrepareCaptureProcessing();
@@ -128,11 +125,9 @@
           absl::optional<DelayEstimate> delay_blocks;
           SCOPED_TRACE(ProduceDebugText(rate, delay_samples));
           std::unique_ptr<RenderDelayBuffer> render_delay_buffer(
-              RenderDelayBuffer::Create(config, NumBandsForRate(rate)));
+              RenderDelayBuffer::Create2(config, NumBandsForRate(rate)));
           std::unique_ptr<RenderDelayController> delay_controller(
-              RenderDelayController::Create(
-                  config, RenderDelayBuffer::DelayEstimatorOffset(config),
-                  rate));
+              RenderDelayController::Create2(config, rate));
           DelayBuffer<float> signal_delay_buffer(delay_samples);
           for (size_t k = 0; k < (400 + delay_samples / kBlockSize); ++k) {
             RandomizeSampleVector(&random_generator, render_block[0]);
@@ -179,11 +174,9 @@
           absl::optional<DelayEstimate> delay_blocks;
           SCOPED_TRACE(ProduceDebugText(rate, -delay_samples));
           std::unique_ptr<RenderDelayBuffer> render_delay_buffer(
-              RenderDelayBuffer::Create(config, NumBandsForRate(rate)));
+              RenderDelayBuffer::Create2(config, NumBandsForRate(rate)));
           std::unique_ptr<RenderDelayController> delay_controller(
-              RenderDelayController::Create(
-                  EchoCanceller3Config(),
-                  RenderDelayBuffer::DelayEstimatorOffset(config), rate));
+              RenderDelayController::Create2(EchoCanceller3Config(), rate));
           DelayBuffer<float> signal_delay_buffer(-delay_samples);
           for (int k = 0;
                k < (400 - delay_samples / static_cast<int>(kBlockSize)); ++k) {
@@ -223,11 +216,9 @@
           absl::optional<DelayEstimate> delay_blocks;
           SCOPED_TRACE(ProduceDebugText(rate, delay_samples));
           std::unique_ptr<RenderDelayBuffer> render_delay_buffer(
-              RenderDelayBuffer::Create(config, NumBandsForRate(rate)));
+              RenderDelayBuffer::Create2(config, NumBandsForRate(rate)));
           std::unique_ptr<RenderDelayController> delay_controller(
-              RenderDelayController::Create(
-                  config, RenderDelayBuffer::DelayEstimatorOffset(config),
-                  rate));
+              RenderDelayController::Create2(config, rate));
           DelayBuffer<float> signal_delay_buffer(delay_samples);
           for (size_t j = 0; j < (1000 + delay_samples / kBlockSize) /
                                          config.delay.api_call_jitter_blocks +
@@ -280,11 +271,10 @@
       for (auto rate : {8000, 16000, 32000, 48000}) {
         SCOPED_TRACE(ProduceDebugText(rate));
         std::unique_ptr<RenderDelayBuffer> render_delay_buffer(
-            RenderDelayBuffer::Create(config, NumBandsForRate(rate)));
+            RenderDelayBuffer::Create2(config, NumBandsForRate(rate)));
 
         std::unique_ptr<RenderDelayController> delay_controller(
-            RenderDelayController::Create(
-                config, RenderDelayBuffer::DelayEstimatorOffset(config), rate));
+            RenderDelayController::Create2(config, rate));
       }
     }
   }
@@ -300,12 +290,10 @@
   for (auto rate : {8000, 16000, 32000, 48000}) {
     SCOPED_TRACE(ProduceDebugText(rate));
     std::unique_ptr<RenderDelayBuffer> render_delay_buffer(
-        RenderDelayBuffer::Create(config, NumBandsForRate(rate)));
+        RenderDelayBuffer::Create2(config, NumBandsForRate(rate)));
     EXPECT_DEATH(
         std::unique_ptr<RenderDelayController>(
-            RenderDelayController::Create(
-                EchoCanceller3Config(),
-                RenderDelayBuffer::DelayEstimatorOffset(config), rate))
+            RenderDelayController::Create2(EchoCanceller3Config(), rate))
             ->GetDelay(render_delay_buffer->GetDownsampledRenderBuffer(),
                        render_delay_buffer->Delay(), echo_remover_delay, block),
         "");
@@ -320,11 +308,10 @@
     SCOPED_TRACE(ProduceDebugText(rate));
     EchoCanceller3Config config;
     std::unique_ptr<RenderDelayBuffer> render_delay_buffer(
-        RenderDelayBuffer::Create(config, NumBandsForRate(rate)));
+        RenderDelayBuffer::Create2(config, NumBandsForRate(rate)));
     EXPECT_DEATH(
-        std::unique_ptr<RenderDelayController>(RenderDelayController::Create(
-            EchoCanceller3Config(),
-            RenderDelayBuffer::DelayEstimatorOffset(config), rate)),
+        std::unique_ptr<RenderDelayController>(
+            RenderDelayController::Create2(EchoCanceller3Config(), rate)),
         "");
   }
 }
diff --git a/modules/audio_processing/aec3/render_signal_analyzer_unittest.cc b/modules/audio_processing/aec3/render_signal_analyzer_unittest.cc
index f9b1955..a993f8f 100644
--- a/modules/audio_processing/aec3/render_signal_analyzer_unittest.cc
+++ b/modules/audio_processing/aec3/render_signal_analyzer_unittest.cc
@@ -59,7 +59,7 @@
   std::vector<std::vector<float>> x(3, std::vector<float>(kBlockSize, 0.f));
   std::array<float, kBlockSize> x_old;
   std::unique_ptr<RenderDelayBuffer> render_delay_buffer(
-      RenderDelayBuffer::Create(EchoCanceller3Config(), 3));
+      RenderDelayBuffer::Create2(EchoCanceller3Config(), 3));
   std::array<float, kFftLengthBy2Plus1> mask;
   x_old.fill(0.f);
 
@@ -93,7 +93,7 @@
   EchoCanceller3Config config;
   config.delay.min_echo_path_delay_blocks = 0;
   std::unique_ptr<RenderDelayBuffer> render_delay_buffer(
-      RenderDelayBuffer::Create(config, 3));
+      RenderDelayBuffer::Create2(config, 3));
 
   std::array<float, kFftLengthBy2Plus1> mask;
   x_old.fill(0.f);
diff --git a/modules/audio_processing/aec3/residual_echo_estimator_unittest.cc b/modules/audio_processing/aec3/residual_echo_estimator_unittest.cc
index 832d8ca..2e73a7e 100644
--- a/modules/audio_processing/aec3/residual_echo_estimator_unittest.cc
+++ b/modules/audio_processing/aec3/residual_echo_estimator_unittest.cc
@@ -27,7 +27,7 @@
   EchoCanceller3Config config;
   AecState aec_state(config);
   std::unique_ptr<RenderDelayBuffer> render_delay_buffer(
-      RenderDelayBuffer::Create(config, 3));
+      RenderDelayBuffer::Create2(config, 3));
   std::vector<std::array<float, kFftLengthBy2Plus1>> H2;
   std::array<float, kFftLengthBy2Plus1> S2_linear;
   std::array<float, kFftLengthBy2Plus1> Y2;
@@ -48,7 +48,7 @@
   ResidualEchoEstimator estimator(config);
   AecState aec_state(config);
   std::unique_ptr<RenderDelayBuffer> render_delay_buffer(
-      RenderDelayBuffer::Create(config, 3));
+      RenderDelayBuffer::Create2(config, 3));
 
   std::array<float, kFftLengthBy2Plus1> E2_main;
   std::array<float, kFftLengthBy2Plus1> E2_shadow;
diff --git a/modules/audio_processing/aec3/shadow_filter_update_gain_unittest.cc b/modules/audio_processing/aec3/shadow_filter_update_gain_unittest.cc
index c040bbf..017c679 100644
--- a/modules/audio_processing/aec3/shadow_filter_update_gain_unittest.cc
+++ b/modules/audio_processing/aec3/shadow_filter_update_gain_unittest.cc
@@ -53,7 +53,7 @@
   config.delay.min_echo_path_delay_blocks = 0;
   config.delay.default_delay = 1;
   std::unique_ptr<RenderDelayBuffer> render_delay_buffer(
-      RenderDelayBuffer::Create(config, 3));
+      RenderDelayBuffer::Create2(config, 3));
 
   std::array<float, kBlockSize> x_old;
   x_old.fill(0.f);
diff --git a/modules/audio_processing/aec3/subtractor_unittest.cc b/modules/audio_processing/aec3/subtractor_unittest.cc
index 9427562..8d14cc1 100644
--- a/modules/audio_processing/aec3/subtractor_unittest.cc
+++ b/modules/audio_processing/aec3/subtractor_unittest.cc
@@ -44,7 +44,7 @@
   config.delay.min_echo_path_delay_blocks = 0;
   config.delay.default_delay = 1;
   std::unique_ptr<RenderDelayBuffer> render_delay_buffer(
-      RenderDelayBuffer::Create(config, 3));
+      RenderDelayBuffer::Create2(config, 3));
   RenderSignalAnalyzer render_signal_analyzer(config);
   Random random_generator(42U);
   Aec3Fft fft;
@@ -127,7 +127,7 @@
   EchoCanceller3Config config;
   Subtractor subtractor(config, &data_dumper, DetectOptimization());
   std::unique_ptr<RenderDelayBuffer> render_delay_buffer(
-      RenderDelayBuffer::Create(config, 3));
+      RenderDelayBuffer::Create2(config, 3));
   RenderSignalAnalyzer render_signal_analyzer(config);
   std::vector<float> y(kBlockSize, 0.f);
 
@@ -143,7 +143,7 @@
   EchoCanceller3Config config;
   Subtractor subtractor(config, &data_dumper, DetectOptimization());
   std::unique_ptr<RenderDelayBuffer> render_delay_buffer(
-      RenderDelayBuffer::Create(config, 3));
+      RenderDelayBuffer::Create2(config, 3));
   RenderSignalAnalyzer render_signal_analyzer(config);
   std::vector<float> y(kBlockSize - 1, 0.f);
   SubtractorOutput output;
diff --git a/modules/audio_processing/aec3/suppression_gain_unittest.cc b/modules/audio_processing/aec3/suppression_gain_unittest.cc
index 1ff96ca..651fd36 100644
--- a/modules/audio_processing/aec3/suppression_gain_unittest.cc
+++ b/modules/audio_processing/aec3/suppression_gain_unittest.cc
@@ -77,7 +77,7 @@
   ApmDataDumper data_dumper(42);
   Subtractor subtractor(config, &data_dumper, DetectOptimization());
   std::unique_ptr<RenderDelayBuffer> render_delay_buffer(
-      RenderDelayBuffer::Create(config, 3));
+      RenderDelayBuffer::Create2(config, 3));
   absl::optional<DelayEstimate> delay_estimate;
 
   // Ensure that a strong noise is detected to mask any echoes.