Optimize block_delay_buffer.

Reducing pointer following. This will allow the compiler to optimize more efficiently with the "-fno-strict-aliasing" flag.

Bug: None
Change-Id: Ic126bd2d53969a7e9d15e1c1081d5278e27a816c
Reviewed-on: https://webrtc-review.googlesource.com/c/src/+/238664
Reviewed-by: Per Ã…hgren <peah@webrtc.org>
Reviewed-by: Ivo Creusen <ivoc@webrtc.org>
Commit-Queue: Christian Schuldt <cschuldt@google.com>
Cr-Commit-Position: refs/heads/main@{#35414}
diff --git a/modules/audio_processing/aec3/block_delay_buffer.cc b/modules/audio_processing/aec3/block_delay_buffer.cc
index b9eb3c9..059bbaf 100644
--- a/modules/audio_processing/aec3/block_delay_buffer.cc
+++ b/modules/audio_processing/aec3/block_delay_buffer.cc
@@ -41,17 +41,24 @@
     RTC_DCHECK_EQ(buf_[ch].size(), frame->num_bands());
     RTC_DCHECK_EQ(buf_[ch].size(), num_bands);
     rtc::ArrayView<float* const> frame_ch(frame->split_bands(ch), num_bands);
+    const size_t delay = delay_;
 
     for (size_t band = 0; band < num_bands; ++band) {
       RTC_DCHECK_EQ(delay_, buf_[ch][band].size());
       i = i_start;
 
-      for (size_t k = 0; k < frame_length_; ++k) {
-        const float tmp = buf_[ch][band][i];
-        buf_[ch][band][i] = frame_ch[band][k];
-        frame_ch[band][k] = tmp;
+      // Offloading these pointers and class variables to local variables allows
+      // the compiler to optimize the below loop when compiling with
+      // '-fno-strict-aliasing'.
+      float* buf_ch_band = buf_[ch][band].data();
+      float* frame_ch_band = frame_ch[band];
 
-        i = i < delay_ - 1 ? i + 1 : 0;
+      for (size_t k = 0, frame_length = frame_length_; k < frame_length; ++k) {
+        const float tmp = buf_ch_band[i];
+        buf_ch_band[i] = frame_ch_band[k];
+        frame_ch_band[k] = tmp;
+
+        i = i < delay - 1 ? i + 1 : 0;
       }
     }
   }