AEC3: Fix render delay buffer alignment issue at call start

Internal counters in the RenderDelayBuffer can slip out of sync with external counters, leading to buffer misalignment.
This CL gives the RenderDelayBuffer an opportunity to update its counters.

Tested:
Passes: modules_unittests --gtest_filter=BlockProcessor.*
Fails as expected due to new unit test: modules_unittests --gtest_filter=BlockProcessor.* --force_fieldtrials="WebRTC-Aec3RenderBufferCallCounterUpdateKillSwitch/Enabled/"

audioproc_f with default AEC settings has been verified to be bit-exact on a large number of aecdumps.

Bug: webrtc:11803
Change-Id: I9363b834c8c8c934add0335013df60bf131da4bc
Reviewed-on: https://webrtc-review.googlesource.com/c/src/+/180126
Reviewed-by: Per Ã…hgren <peah@webrtc.org>
Commit-Queue: Sam Zackrisson <saza@webrtc.org>
Cr-Commit-Position: refs/heads/master@{#31795}
diff --git a/modules/audio_processing/aec3/block_processor.cc b/modules/audio_processing/aec3/block_processor.cc
index 9116c81..f2f3261 100644
--- a/modules/audio_processing/aec3/block_processor.cc
+++ b/modules/audio_processing/aec3/block_processor.cc
@@ -128,6 +128,7 @@
     }
   } else {
     // If no render data has yet arrived, do not process the capture signal.
+    render_buffer_->HandleSkippedCaptureProcessing();
     return;
   }
 
diff --git a/modules/audio_processing/aec3/block_processor_unittest.cc b/modules/audio_processing/aec3/block_processor_unittest.cc
index 911dad4..d87e27a 100644
--- a/modules/audio_processing/aec3/block_processor_unittest.cc
+++ b/modules/audio_processing/aec3/block_processor_unittest.cc
@@ -30,6 +30,7 @@
 
 using ::testing::_;
 using ::testing::AtLeast;
+using ::testing::NiceMock;
 using ::testing::Return;
 using ::testing::StrictMock;
 
@@ -129,6 +130,14 @@
   return ss.Release();
 }
 
+void FillSampleVector(int call_counter,
+                      int delay,
+                      rtc::ArrayView<float> samples) {
+  for (size_t i = 0; i < samples.size(); ++i) {
+    samples[i] = (call_counter - delay) * 10000.0f + i;
+  }
+}
+
 }  // namespace
 
 // Verifies that the delay controller functionality is properly integrated with
@@ -301,4 +310,77 @@
 
 #endif
 
+// Verifies that external delay estimator delays are applied correctly when a
+// call begins with a sequence of capture blocks.
+TEST(BlockProcessor, ExternalDelayAppliedCorrectlyWithInitialCaptureCalls) {
+  constexpr int kNumRenderChannels = 1;
+  constexpr int kNumCaptureChannels = 1;
+  constexpr int kSampleRateHz = 16000;
+
+  EchoCanceller3Config config;
+  config.delay.use_external_delay_estimator = true;
+
+  std::unique_ptr<RenderDelayBuffer> delay_buffer(
+      RenderDelayBuffer::Create(config, kSampleRateHz, kNumRenderChannels));
+
+  std::unique_ptr<testing::NiceMock<webrtc::test::MockEchoRemover>>
+      echo_remover_mock(new NiceMock<webrtc::test::MockEchoRemover>());
+  webrtc::test::MockEchoRemover* echo_remover_mock_pointer =
+      echo_remover_mock.get();
+
+  std::unique_ptr<BlockProcessor> block_processor(BlockProcessor::Create(
+      config, kSampleRateHz, kNumRenderChannels, kNumCaptureChannels,
+      std::move(delay_buffer), /*delay_controller=*/nullptr,
+      std::move(echo_remover_mock)));
+
+  std::vector<std::vector<std::vector<float>>> render_block(
+      NumBandsForRate(kSampleRateHz),
+      std::vector<std::vector<float>>(kNumRenderChannels,
+                                      std::vector<float>(kBlockSize, 0.f)));
+  std::vector<std::vector<std::vector<float>>> capture_block(
+      NumBandsForRate(kSampleRateHz),
+      std::vector<std::vector<float>>(kNumCaptureChannels,
+                                      std::vector<float>(kBlockSize, 0.f)));
+
+  // Process...
+  // - 10 capture calls, where no render data is available,
+  // - 10 render calls, populating the buffer,
+  // - 2 capture calls, verifying that the delay was applied correctly.
+  constexpr int kDelayInBlocks = 5;
+  constexpr int kDelayInMs = 20;
+  block_processor->SetAudioBufferDelay(kDelayInMs);
+
+  int capture_call_counter = 0;
+  int render_call_counter = 0;
+  for (size_t k = 0; k < 10; ++k) {
+    FillSampleVector(++capture_call_counter, kDelayInBlocks,
+                     capture_block[0][0]);
+    block_processor->ProcessCapture(false, false, nullptr, &capture_block);
+  }
+  for (size_t k = 0; k < 10; ++k) {
+    FillSampleVector(++render_call_counter, 0, render_block[0][0]);
+    block_processor->BufferRender(render_block);
+  }
+
+  EXPECT_CALL(*echo_remover_mock_pointer, ProcessCapture)
+      .WillRepeatedly(
+          [](EchoPathVariability /*echo_path_variability*/,
+             bool /*capture_signal_saturation*/,
+             const absl::optional<DelayEstimate>& /*external_delay*/,
+             RenderBuffer* render_buffer,
+             std::vector<std::vector<std::vector<float>>>* /*linear_output*/,
+             std::vector<std::vector<std::vector<float>>>* capture) {
+            const auto& render = render_buffer->Block(0);
+            for (size_t i = 0; i < kBlockSize; ++i) {
+              EXPECT_FLOAT_EQ(render[0][0][i], (*capture)[0][0][i]);
+            }
+          });
+
+  FillSampleVector(++capture_call_counter, kDelayInBlocks, capture_block[0][0]);
+  block_processor->ProcessCapture(false, false, nullptr, &capture_block);
+
+  FillSampleVector(++capture_call_counter, kDelayInBlocks, capture_block[0][0]);
+  block_processor->ProcessCapture(false, false, nullptr, &capture_block);
+}
+
 }  // namespace webrtc
diff --git a/modules/audio_processing/aec3/mock/mock_render_delay_buffer.h b/modules/audio_processing/aec3/mock/mock_render_delay_buffer.h
index 26f58cf..9d7b8f4 100644
--- a/modules/audio_processing/aec3/mock/mock_render_delay_buffer.h
+++ b/modules/audio_processing/aec3/mock/mock_render_delay_buffer.h
@@ -32,6 +32,7 @@
               Insert,
               (const std::vector<std::vector<std::vector<float>>>& block),
               (override));
+  MOCK_METHOD(void, HandleSkippedCaptureProcessing, (), (override));
   MOCK_METHOD(RenderDelayBuffer::BufferingEvent,
               PrepareCaptureProcessing,
               (),
diff --git a/modules/audio_processing/aec3/render_delay_buffer.cc b/modules/audio_processing/aec3/render_delay_buffer.cc
index 10e81d8..f5030e1 100644
--- a/modules/audio_processing/aec3/render_delay_buffer.cc
+++ b/modules/audio_processing/aec3/render_delay_buffer.cc
@@ -35,10 +35,16 @@
 #include "rtc_base/atomic_ops.h"
 #include "rtc_base/checks.h"
 #include "rtc_base/logging.h"
+#include "system_wrappers/include/field_trial.h"
 
 namespace webrtc {
 namespace {
 
+bool UpdateCaptureCallCounterOnSkippedBlocks() {
+  return !field_trial::IsEnabled(
+      "WebRTC-Aec3RenderBufferCallCounterUpdateKillSwitch");
+}
+
 class RenderDelayBufferImpl final : public RenderDelayBuffer {
  public:
   RenderDelayBufferImpl(const EchoCanceller3Config& config,
@@ -51,6 +57,7 @@
   BufferingEvent Insert(
       const std::vector<std::vector<std::vector<float>>>& block) override;
   BufferingEvent PrepareCaptureProcessing() override;
+  void HandleSkippedCaptureProcessing() override;
   bool AlignFromDelay(size_t delay) override;
   void AlignFromExternalDelay() override;
   size_t Delay() const override { return ComputeDelay(); }
@@ -72,6 +79,7 @@
   std::unique_ptr<ApmDataDumper> data_dumper_;
   const Aec3Optimization optimization_;
   const EchoCanceller3Config config_;
+  const bool update_capture_call_counter_on_skipped_blocks_;
   const float render_linear_amplitude_gain_;
   const rtc::LoggingSeverity delay_log_level_;
   size_t down_sampling_factor_;
@@ -122,6 +130,8 @@
           new ApmDataDumper(rtc::AtomicOps::Increment(&instance_count_))),
       optimization_(DetectOptimization()),
       config_(config),
+      update_capture_call_counter_on_skipped_blocks_(
+          UpdateCaptureCallCounterOnSkippedBlocks()),
       render_linear_amplitude_gain_(
           std::pow(10.0f, config_.render_levels.render_power_gain_db / 20.f)),
       delay_log_level_(config_.delay.log_warning_on_delay_changes
@@ -243,6 +253,12 @@
   return event;
 }
 
+void RenderDelayBufferImpl::HandleSkippedCaptureProcessing() {
+  if (update_capture_call_counter_on_skipped_blocks_) {
+    ++capture_call_counter_;
+  }
+}
+
 // Prepares the render buffers for processing another capture block.
 RenderDelayBuffer::BufferingEvent
 RenderDelayBufferImpl::PrepareCaptureProcessing() {
diff --git a/modules/audio_processing/aec3/render_delay_buffer.h b/modules/audio_processing/aec3/render_delay_buffer.h
index 0758e9d..79ffc4d 100644
--- a/modules/audio_processing/aec3/render_delay_buffer.h
+++ b/modules/audio_processing/aec3/render_delay_buffer.h
@@ -48,6 +48,9 @@
   // an enum indicating whether there was a special event that occurred.
   virtual BufferingEvent PrepareCaptureProcessing() = 0;
 
+  // Called on capture blocks where PrepareCaptureProcessing is not called.
+  virtual void HandleSkippedCaptureProcessing() = 0;
+
   // Sets the buffer delay and returns a bool indicating whether the delay
   // changed.
   virtual bool AlignFromDelay(size_t delay) = 0;