AEC3: Enable usage of external delay estimator

This change makes it possible to disable AEC3's render delay
controller and delay estimator, and instead rely on an external
delay estimator. The delay is communicated via SetAudioBufferDelay.

When the feature is enabled, no echo removal will be performed
until the first delay is provided.

The delay is

Bug: b/130016532
Change-Id: I16643109d78d770ff1d2713cf247b0b9cce1bc1c
Reviewed-on: https://webrtc-review.googlesource.com/c/src/+/131327
Commit-Queue: Gustaf Ullberg <gustaf@webrtc.org>
Reviewed-by: Per Ã…hgren <peah@webrtc.org>
Cr-Commit-Position: refs/heads/master@{#27467}
diff --git a/api/audio/echo_canceller3_config.h b/api/audio/echo_canceller3_config.h
index e304f15..c19226e 100644
--- a/api/audio/echo_canceller3_config.h
+++ b/api/audio/echo_canceller3_config.h
@@ -46,6 +46,7 @@
       int initial;
       int converged;
     } delay_selection_thresholds = {5, 20};
+    bool use_external_delay_estimator = false;
   } delay;
 
   struct Filter {
diff --git a/modules/audio_processing/aec3/block_processor.cc b/modules/audio_processing/aec3/block_processor.cc
index 2674827..0997b1a 100644
--- a/modules/audio_processing/aec3/block_processor.cc
+++ b/modules/audio_processing/aec3/block_processor.cc
@@ -114,7 +114,8 @@
     if (!capture_properly_started_) {
       capture_properly_started_ = true;
       render_buffer_->Reset();
-      delay_controller_->Reset(true);
+      if (delay_controller_)
+        delay_controller_->Reset(true);
     }
   } else {
     // If no render data has yet arrived, do not process the capture signal.
@@ -129,7 +130,8 @@
       render_properly_started_) {
     echo_path_variability.delay_change =
         EchoPathVariability::DelayAdjustment::kBufferFlush;
-    delay_controller_->Reset(true);
+    if (delay_controller_)
+      delay_controller_->Reset(true);
     RTC_LOG(LS_WARNING) << "Reset due to render buffer overrun at block  "
                         << capture_call_counter_;
   }
@@ -142,35 +144,46 @@
       render_buffer_->PrepareCaptureProcessing();
   // Reset the delay controller at render buffer underrun.
   if (buffer_event == RenderDelayBuffer::BufferingEvent::kRenderUnderrun) {
-    delay_controller_->Reset(false);
+    if (delay_controller_)
+      delay_controller_->Reset(false);
   }
 
   data_dumper_->DumpWav("aec3_processblock_capture_input2", kBlockSize,
                         &(*capture_block)[0][0],
                         LowestBandRate(sample_rate_hz_), 1);
 
-  // Compute and apply the render delay required to achieve proper signal
-  // alignment.
-  estimated_delay_ =
-      delay_controller_->GetDelay(render_buffer_->GetDownsampledRenderBuffer(),
-                                  render_buffer_->Delay(), (*capture_block)[0]);
+  bool has_delay_estimator = !config_.delay.use_external_delay_estimator;
+  if (has_delay_estimator) {
+    RTC_DCHECK(delay_controller_);
+    // Compute and apply the render delay required to achieve proper signal
+    // alignment.
+    estimated_delay_ = delay_controller_->GetDelay(
+        render_buffer_->GetDownsampledRenderBuffer(), render_buffer_->Delay(),
+        (*capture_block)[0]);
 
-  if (estimated_delay_) {
-    bool delay_change = render_buffer_->SetDelay(estimated_delay_->delay);
-    if (delay_change) {
-      RTC_LOG(LS_WARNING) << "Delay changed to " << estimated_delay_->delay
-                          << " at block " << capture_call_counter_;
-      echo_path_variability.delay_change =
-          EchoPathVariability::DelayAdjustment::kNewDetectedDelay;
+    if (estimated_delay_) {
+      bool delay_change =
+          render_buffer_->AlignFromDelay(estimated_delay_->delay);
+      if (delay_change) {
+        RTC_LOG(LS_WARNING) << "Delay changed to " << estimated_delay_->delay
+                            << " at block " << capture_call_counter_;
+        echo_path_variability.delay_change =
+            EchoPathVariability::DelayAdjustment::kNewDetectedDelay;
+      }
     }
+
+    echo_path_variability.clock_drift = delay_controller_->HasClockdrift();
+
+  } else {
+    render_buffer_->AlignFromExternalDelay();
   }
 
-  echo_path_variability.clock_drift = delay_controller_->HasClockdrift();
-
   // Remove the echo from the capture signal.
-  echo_remover_->ProcessCapture(
-      echo_path_variability, capture_signal_saturation, estimated_delay_,
-      render_buffer_->GetRenderBuffer(), capture_block);
+  if (has_delay_estimator || render_buffer_->HasReceivedBufferDelay()) {
+    echo_remover_->ProcessCapture(
+        echo_path_variability, capture_signal_saturation, estimated_delay_,
+        render_buffer_->GetRenderBuffer(), capture_block);
+  }
 
   // Update the metrics.
   metrics_.UpdateCapture(false);
@@ -193,7 +206,8 @@
                         RenderDelayBuffer::BufferingEvent::kNone);
 
   render_properly_started_ = true;
-  delay_controller_->LogRenderCall();
+  if (delay_controller_)
+    delay_controller_->LogRenderCall();
 }
 
 void BlockProcessorImpl::UpdateEchoLeakageStatus(bool leakage_detected) {
@@ -217,8 +231,11 @@
                                        int sample_rate_hz) {
   std::unique_ptr<RenderDelayBuffer> render_buffer(
       RenderDelayBuffer::Create(config, NumBandsForRate(sample_rate_hz)));
-  std::unique_ptr<RenderDelayController> delay_controller(
-      RenderDelayController::Create(config, sample_rate_hz));
+  std::unique_ptr<RenderDelayController> delay_controller;
+  if (!config.delay.use_external_delay_estimator) {
+    delay_controller.reset(
+        RenderDelayController::Create(config, sample_rate_hz));
+  }
   std::unique_ptr<EchoRemover> echo_remover(
       EchoRemover::Create(config, sample_rate_hz));
   return Create(config, sample_rate_hz, std::move(render_buffer),
@@ -229,8 +246,11 @@
     const EchoCanceller3Config& config,
     int sample_rate_hz,
     std::unique_ptr<RenderDelayBuffer> render_buffer) {
-  std::unique_ptr<RenderDelayController> delay_controller(
-      RenderDelayController::Create(config, sample_rate_hz));
+  std::unique_ptr<RenderDelayController> delay_controller;
+  if (!config.delay.use_external_delay_estimator) {
+    delay_controller.reset(
+        RenderDelayController::Create(config, sample_rate_hz));
+  }
   std::unique_ptr<EchoRemover> echo_remover(
       EchoRemover::Create(config, sample_rate_hz));
   return Create(config, sample_rate_hz, std::move(render_buffer),
diff --git a/modules/audio_processing/aec3/block_processor_unittest.cc b/modules/audio_processing/aec3/block_processor_unittest.cc
index 439419f..29a25eb 100644
--- a/modules/audio_processing/aec3/block_processor_unittest.cc
+++ b/modules/audio_processing/aec3/block_processor_unittest.cc
@@ -118,7 +118,7 @@
     EXPECT_CALL(*render_delay_buffer_mock, Insert(_))
         .Times(kNumBlocks)
         .WillRepeatedly(Return(RenderDelayBuffer::BufferingEvent::kNone));
-    EXPECT_CALL(*render_delay_buffer_mock, SetDelay(kDelayInBlocks))
+    EXPECT_CALL(*render_delay_buffer_mock, AlignFromDelay(kDelayInBlocks))
         .Times(AtLeast(1));
     EXPECT_CALL(*render_delay_buffer_mock, MaxDelay()).WillOnce(Return(30));
     EXPECT_CALL(*render_delay_buffer_mock, Delay())
@@ -162,7 +162,7 @@
         .WillRepeatedly(Return(RenderDelayBuffer::BufferingEvent::kNone));
     EXPECT_CALL(*render_delay_buffer_mock, PrepareCaptureProcessing())
         .Times(kNumBlocks);
-    EXPECT_CALL(*render_delay_buffer_mock, SetDelay(9)).Times(AtLeast(1));
+    EXPECT_CALL(*render_delay_buffer_mock, AlignFromDelay(9)).Times(AtLeast(1));
     EXPECT_CALL(*render_delay_buffer_mock, Delay())
         .Times(kNumBlocks)
         .WillRepeatedly(Return(0));
diff --git a/modules/audio_processing/aec3/echo_remover_unittest.cc b/modules/audio_processing/aec3/echo_remover_unittest.cc
index 82d149a..92cc890 100644
--- a/modules/audio_processing/aec3/echo_remover_unittest.cc
+++ b/modules/audio_processing/aec3/echo_remover_unittest.cc
@@ -161,7 +161,7 @@
       std::unique_ptr<EchoRemover> remover(EchoRemover::Create(config, rate));
       std::unique_ptr<RenderDelayBuffer> render_buffer(
           RenderDelayBuffer::Create(config, NumBandsForRate(rate)));
-      render_buffer->SetDelay(delay_samples / kBlockSize);
+      render_buffer->AlignFromDelay(delay_samples / kBlockSize);
 
       std::vector<std::unique_ptr<DelayBuffer<float>>> delay_buffers(x.size());
       for (size_t j = 0; j < x.size(); ++j) {
diff --git a/modules/audio_processing/aec3/mock/mock_render_delay_buffer.h b/modules/audio_processing/aec3/mock/mock_render_delay_buffer.h
index ba5e98c..0dd1b91 100644
--- a/modules/audio_processing/aec3/mock/mock_render_delay_buffer.h
+++ b/modules/audio_processing/aec3/mock/mock_render_delay_buffer.h
@@ -32,7 +32,8 @@
                RenderDelayBuffer::BufferingEvent(
                    const std::vector<std::vector<float>>& block));
   MOCK_METHOD0(PrepareCaptureProcessing, RenderDelayBuffer::BufferingEvent());
-  MOCK_METHOD1(SetDelay, bool(size_t delay));
+  MOCK_METHOD1(AlignFromDelay, bool(size_t delay));
+  MOCK_METHOD0(AlignFromExternalDelay, void());
   MOCK_CONST_METHOD0(Delay, size_t());
   MOCK_CONST_METHOD0(MaxDelay, size_t());
   MOCK_METHOD0(GetRenderBuffer, RenderBuffer*());
@@ -40,6 +41,7 @@
                      const DownsampledRenderBuffer&());
   MOCK_CONST_METHOD1(CausalDelay, bool(size_t delay));
   MOCK_METHOD1(SetAudioBufferDelay, void(size_t delay_ms));
+  MOCK_METHOD0(HasReceivedBufferDelay, bool());
 
  private:
   RenderBuffer* FakeGetRenderBuffer() { return &render_buffer_; }
diff --git a/modules/audio_processing/aec3/render_delay_buffer.cc b/modules/audio_processing/aec3/render_delay_buffer.cc
index fea2d6e..0b2e979 100644
--- a/modules/audio_processing/aec3/render_delay_buffer.cc
+++ b/modules/audio_processing/aec3/render_delay_buffer.cc
@@ -44,7 +44,8 @@
   void Reset() override;
   BufferingEvent Insert(const std::vector<std::vector<float>>& block) override;
   BufferingEvent PrepareCaptureProcessing() override;
-  bool SetDelay(size_t delay) override;
+  bool AlignFromDelay(size_t delay) override;
+  void AlignFromExternalDelay() override;
   size_t Delay() const override { return ComputeDelay(); }
   size_t MaxDelay() const override {
     return blocks_.buffer.size() - 1 - buffer_headroom_;
@@ -57,6 +58,7 @@
 
   int BufferLatency() const;
   void SetAudioBufferDelay(size_t delay_ms) override;
+  bool HasReceivedBufferDelay() override;
 
  private:
   static int instance_count_;
@@ -78,8 +80,8 @@
   bool last_call_was_render_ = false;
   int num_api_calls_in_a_row_ = 0;
   int max_observed_jitter_ = 1;
-  size_t capture_call_counter_ = 0;
-  size_t render_call_counter_ = 0;
+  int64_t capture_call_counter_ = 0;
+  int64_t render_call_counter_ = 0;
   bool render_activity_ = false;
   size_t render_activity_counter_ = 0;
   absl::optional<size_t> external_audio_buffer_delay_;
@@ -172,7 +174,7 @@
     // initial delay. Set the render buffer delays to the default delay.
     ApplyTotalDelay(config_.delay.default_delay);
 
-    // Unset the delays which are set by SetDelay.
+    // Unset the delays which are set by AlignFromDelay.
     delay_ = absl::nullopt;
   }
 }
@@ -277,7 +279,8 @@
 }
 
 // Sets the delay and returns a bool indicating whether the delay was changed.
-bool RenderDelayBufferImpl::SetDelay(size_t delay) {
+bool RenderDelayBufferImpl::AlignFromDelay(size_t delay) {
+  RTC_DCHECK(!config_.delay.use_external_delay_estimator);
   if (!external_audio_buffer_delay_verified_after_reset_ &&
       external_audio_buffer_delay_ && delay_) {
     int difference = static_cast<int>(delay) - static_cast<int>(*delay_);
@@ -312,6 +315,10 @@
   external_audio_buffer_delay_ = delay_ms >> ((num_bands_ == 1) ? 1 : 2);
 }
 
+bool RenderDelayBufferImpl::HasReceivedBufferDelay() {
+  return external_audio_buffer_delay_.has_value();
+}
+
 // Maps the externally computed delay to the delay used internally.
 int RenderDelayBufferImpl::MapDelayToTotalDelay(
     size_t external_delay_blocks) const {
@@ -337,6 +344,15 @@
   ffts_.read = ffts_.OffsetIndex(ffts_.write, delay);
 }
 
+void RenderDelayBufferImpl::AlignFromExternalDelay() {
+  RTC_DCHECK(config_.delay.use_external_delay_estimator);
+  if (external_audio_buffer_delay_) {
+    int64_t delay = render_call_counter_ - capture_call_counter_ +
+                    *external_audio_buffer_delay_;
+    ApplyTotalDelay(delay);
+  }
+}
+
 // Inserts a block into the render buffers.
 void RenderDelayBufferImpl::InsertBlock(
     const std::vector<std::vector<float>>& block,
diff --git a/modules/audio_processing/aec3/render_delay_buffer.h b/modules/audio_processing/aec3/render_delay_buffer.h
index 6926c67..89b3a2a 100644
--- a/modules/audio_processing/aec3/render_delay_buffer.h
+++ b/modules/audio_processing/aec3/render_delay_buffer.h
@@ -48,7 +48,10 @@
 
   // Sets the buffer delay and returns a bool indicating whether the delay
   // changed.
-  virtual bool SetDelay(size_t delay) = 0;
+  virtual bool AlignFromDelay(size_t delay) = 0;
+
+  // Sets the buffer delay from the most recently reported external delay.
+  virtual void AlignFromExternalDelay() = 0;
 
   // Gets the buffer delay.
   virtual size_t Delay() const = 0;
@@ -67,6 +70,10 @@
 
   // Provides an optional external estimate of the audio buffer delay.
   virtual void SetAudioBufferDelay(size_t delay_ms) = 0;
+
+  // Returns whether an external delay estimate has been reported via
+  // SetAudioBufferDelay.
+  virtual bool HasReceivedBufferDelay() = 0;
 };
 
 }  // namespace webrtc
diff --git a/modules/audio_processing/aec3/render_delay_buffer_unittest.cc b/modules/audio_processing/aec3/render_delay_buffer_unittest.cc
index 641a081..21b7ffc 100644
--- a/modules/audio_processing/aec3/render_delay_buffer_unittest.cc
+++ b/modules/audio_processing/aec3/render_delay_buffer_unittest.cc
@@ -70,8 +70,8 @@
   delay_buffer->PrepareCaptureProcessing();
 }
 
-// Verifies the SetDelay method.
-TEST(RenderDelayBuffer, SetDelay) {
+// Verifies the AlignFromDelay method.
+TEST(RenderDelayBuffer, AlignFromDelay) {
   EchoCanceller3Config config;
   std::unique_ptr<RenderDelayBuffer> delay_buffer(
       RenderDelayBuffer::Create(config, 1));
@@ -80,7 +80,7 @@
   size_t initial_internal_delay = 0;
   for (size_t delay = initial_internal_delay;
        delay < initial_internal_delay + 20; ++delay) {
-    ASSERT_TRUE(delay_buffer->SetDelay(delay));
+    ASSERT_TRUE(delay_buffer->AlignFromDelay(delay));
     EXPECT_EQ(delay, delay_buffer->Delay());
   }
 }
@@ -93,7 +93,7 @@
 TEST(RenderDelayBuffer, DISABLED_WrongDelay) {
   std::unique_ptr<RenderDelayBuffer> delay_buffer(
       RenderDelayBuffer::Create(EchoCanceller3Config(), 3));
-  EXPECT_DEATH(delay_buffer->SetDelay(21), "");
+  EXPECT_DEATH(delay_buffer->AlignFromDelay(21), "");
 }
 
 // Verifies the check for the number of bands in the inserted blocks.
diff --git a/modules/audio_processing/aec3/signal_dependent_erle_estimator_unittest.cc b/modules/audio_processing/aec3/signal_dependent_erle_estimator_unittest.cc
index 4e62c94..fa0be5c 100644
--- a/modules/audio_processing/aec3/signal_dependent_erle_estimator_unittest.cc
+++ b/modules/audio_processing/aec3/signal_dependent_erle_estimator_unittest.cc
@@ -65,7 +65,7 @@
     : render_delay_buffer_(RenderDelayBuffer::Create(cfg, 1)),
       H2_(cfg.filter.main.length_blocks),
       x_(1, std::vector<float>(kBlockSize, 0.f)) {
-  render_delay_buffer_->SetDelay(4);
+  render_delay_buffer_->AlignFromDelay(4);
   render_buffer_ = render_delay_buffer_->GetRenderBuffer();
   for (auto& H : H2_) {
     H.fill(0.f);