Made the method PartitionDelay independent of the AEC state.

This CL is step towards simplifying the AEC code, making it more
modifiable and modular.

The changes should be bitexact.

BUG=webrtc:5201, webrtc:5298

Review-Url: https://codereview.webrtc.org/1936203002
Cr-Commit-Position: refs/heads/master@{#12654}
diff --git a/webrtc/modules/audio_processing/aec/aec_core.cc b/webrtc/modules/audio_processing/aec/aec_core.cc
index 02250e8..a3be9e4 100644
--- a/webrtc/modules/audio_processing/aec/aec_core.cc
+++ b/webrtc/modules/audio_processing/aec/aec_core.cc
@@ -329,7 +329,9 @@
   }
 }
 
-static int PartitionDelay(const AecCore* aec) {
+static int PartitionDelay(int num_partitions,
+                          float h_fft_buf[2]
+                                         [kExtendedNumPartitions * PART_LEN1]) {
   // Measures the energy in each filter partition and returns the partition with
   // highest energy.
   // TODO(bjornv): Spread computational cost by computing one partition per
@@ -338,13 +340,13 @@
   int i;
   int delay = 0;
 
-  for (i = 0; i < aec->num_partitions; i++) {
+  for (i = 0; i < num_partitions; i++) {
     int j;
     int pos = i * PART_LEN1;
     float wfEn = 0;
     for (j = 0; j < PART_LEN1; j++) {
-      wfEn += aec->wfBuf[0][pos + j] * aec->wfBuf[0][pos + j] +
-              aec->wfBuf[1][pos + j] * aec->wfBuf[1][pos + j];
+      wfEn += h_fft_buf[0][pos + j] * h_fft_buf[0][pos + j] +
+              h_fft_buf[1][pos + j] * h_fft_buf[1][pos + j];
     }
 
     if (wfEn > wfEnMax) {
@@ -1053,7 +1055,7 @@
   aec->delayEstCtr++;
   if (aec->delayEstCtr == delayEstInterval) {
     aec->delayEstCtr = 0;
-    aec->delayIdx = WebRtcAec_PartitionDelay(aec);
+    aec->delayIdx = WebRtcAec_PartitionDelay(aec->num_partitions, aec->wfBuf);
   }
 
   // Use delayed far.
diff --git a/webrtc/modules/audio_processing/aec/aec_core_internal.h b/webrtc/modules/audio_processing/aec/aec_core_internal.h
index f5a89e1..b5c9d58 100644
--- a/webrtc/modules/audio_processing/aec/aec_core_internal.h
+++ b/webrtc/modules/audio_processing/aec/aec_core_internal.h
@@ -234,7 +234,9 @@
                                           int* extreme_filter_divergence);
 extern WebRtcAecSubBandCoherence WebRtcAec_SubbandCoherence;
 
-typedef int (*WebRtcAecPartitionDelay)(const AecCore* aec);
+typedef int (*WebRtcAecPartitionDelay)(
+    int num_partitions,
+    float h_fft_buf[2][kExtendedNumPartitions * PART_LEN1]);
 extern WebRtcAecPartitionDelay WebRtcAec_PartitionDelay;
 
 typedef void (*WebRtcAecStoreAsComplex)(const float* data,
diff --git a/webrtc/modules/audio_processing/aec/aec_core_neon.cc b/webrtc/modules/audio_processing/aec/aec_core_neon.cc
index 01e6ce7..06743b5 100644
--- a/webrtc/modules/audio_processing/aec/aec_core_neon.cc
+++ b/webrtc/modules/audio_processing/aec/aec_core_neon.cc
@@ -448,7 +448,9 @@
   }
 }
 
-static int PartitionDelayNEON(const AecCore* aec) {
+static int PartitionDelayNEON(
+    int num_partitions,
+    float h_fft_buf[2][kExtendedNumPartitions * PART_LEN1]) {
   // Measures the energy in each filter partition and returns the partition with
   // highest energy.
   // TODO(bjornv): Spread computational cost by computing one partition per
@@ -457,15 +459,15 @@
   int i;
   int delay = 0;
 
-  for (i = 0; i < aec->num_partitions; i++) {
+  for (i = 0; i < num_partitions; i++) {
     int j;
     int pos = i * PART_LEN1;
     float wfEn = 0;
     float32x4_t vec_wfEn = vdupq_n_f32(0.0f);
     // vectorized code (four at once)
     for (j = 0; j + 3 < PART_LEN1; j += 4) {
-      const float32x4_t vec_wfBuf0 = vld1q_f32(&aec->wfBuf[0][pos + j]);
-      const float32x4_t vec_wfBuf1 = vld1q_f32(&aec->wfBuf[1][pos + j]);
+      const float32x4_t vec_wfBuf0 = vld1q_f32(&h_fft_buf[0][pos + j]);
+      const float32x4_t vec_wfBuf1 = vld1q_f32(&h_fft_buf[1][pos + j]);
       vec_wfEn = vmlaq_f32(vec_wfEn, vec_wfBuf0, vec_wfBuf0);
       vec_wfEn = vmlaq_f32(vec_wfEn, vec_wfBuf1, vec_wfBuf1);
     }
@@ -481,8 +483,8 @@
 
     // scalar code for the remaining items.
     for (; j < PART_LEN1; j++) {
-      wfEn += aec->wfBuf[0][pos + j] * aec->wfBuf[0][pos + j] +
-              aec->wfBuf[1][pos + j] * aec->wfBuf[1][pos + j];
+      wfEn += h_fft_buf[0][pos + j] * h_fft_buf[0][pos + j] +
+              h_fft_buf[1][pos + j] * h_fft_buf[1][pos + j];
     }
 
     if (wfEn > wfEnMax) {
diff --git a/webrtc/modules/audio_processing/aec/aec_core_sse2.cc b/webrtc/modules/audio_processing/aec/aec_core_sse2.cc
index 91d98b9..ec466f6 100644
--- a/webrtc/modules/audio_processing/aec/aec_core_sse2.cc
+++ b/webrtc/modules/audio_processing/aec/aec_core_sse2.cc
@@ -449,7 +449,9 @@
   _mm_store_ss(dst, sum);
 }
 
-static int PartitionDelaySSE2(const AecCore* aec) {
+static int PartitionDelaySSE2(
+    int num_partitions,
+    float h_fft_buf[2][kExtendedNumPartitions * PART_LEN1]) {
   // Measures the energy in each filter partition and returns the partition with
   // highest energy.
   // TODO(bjornv): Spread computational cost by computing one partition per
@@ -458,15 +460,15 @@
   int i;
   int delay = 0;
 
-  for (i = 0; i < aec->num_partitions; i++) {
+  for (i = 0; i < num_partitions; i++) {
     int j;
     int pos = i * PART_LEN1;
     float wfEn = 0;
     __m128 vec_wfEn = _mm_set1_ps(0.0f);
     // vectorized code (four at once)
     for (j = 0; j + 3 < PART_LEN1; j += 4) {
-      const __m128 vec_wfBuf0 = _mm_loadu_ps(&aec->wfBuf[0][pos + j]);
-      const __m128 vec_wfBuf1 = _mm_loadu_ps(&aec->wfBuf[1][pos + j]);
+      const __m128 vec_wfBuf0 = _mm_loadu_ps(&h_fft_buf[0][pos + j]);
+      const __m128 vec_wfBuf1 = _mm_loadu_ps(&h_fft_buf[1][pos + j]);
       vec_wfEn = _mm_add_ps(vec_wfEn, _mm_mul_ps(vec_wfBuf0, vec_wfBuf0));
       vec_wfEn = _mm_add_ps(vec_wfEn, _mm_mul_ps(vec_wfBuf1, vec_wfBuf1));
     }
@@ -474,8 +476,8 @@
 
     // scalar code for the remaining items.
     for (; j < PART_LEN1; j++) {
-      wfEn += aec->wfBuf[0][pos + j] * aec->wfBuf[0][pos + j] +
-              aec->wfBuf[1][pos + j] * aec->wfBuf[1][pos + j];
+      wfEn += h_fft_buf[0][pos + j] * h_fft_buf[0][pos + j] +
+              h_fft_buf[1][pos + j] * h_fft_buf[1][pos + j];
     }
 
     if (wfEn > wfEnMax) {