This CL refactors the buffering of the incoming near-end signal inside
the AEC. This solves the following issues:
-Even though the buffering was previously done using ringbuffers, those
were inefficiently used which caused a lot of hidden memcopys.
-The ringbuffers wasted a lot of space in the AEC state as they were too
long.
-The lowest and two upper bands were decoupled in the buffering, which
required extra code to handle.
-On top of the ringbuffers there was a second linear buffer that was
stored in the state which caused even more data to be stored on the
state.
-The incoming nearend frames were passed to the functions in the form
of buffers on the state, which made the code harder to read as it was
not immediately clear where the nearend signal was used, and when it
was modified.
The CL addresses this by replacing all the buffers by two linear buffers:
-One buffer before the AEC processing for producing nearend
blocks of size 64 that can be processed by the AEC.
-One inside the AEC processing that buffers the current
nearend block until the next block is processed.
The changes have been tested to be bitexact.
This CL will be followed by several other CLs, that refactor the other
buffers in the AEC.
BUG=webrtc:5298, webrtc:6018
Review-Url: https://codereview.webrtc.org/2311833002
Cr-Commit-Position: refs/heads/master@{#14141}
diff --git a/webrtc/modules/audio_processing/aec/aec_core.cc b/webrtc/modules/audio_processing/aec/aec_core.cc
index 08079b8..7cadcbb 100644
--- a/webrtc/modules/audio_processing/aec/aec_core.cc
+++ b/webrtc/modules/audio_processing/aec/aec_core.cc
@@ -1105,6 +1105,7 @@
}
static void EchoSuppression(AecCore* aec,
+ float* nearend_extended_block_lowest_band,
float farend[PART_LEN2],
float* echo_subtractor_output,
float* output,
@@ -1133,7 +1134,7 @@
// Analysis filter banks for the echo suppressor.
// Windowed near-end ffts.
- WindowData(fft, aec->dBuf);
+ WindowData(fft, nearend_extended_block_lowest_band);
aec_rdft_forward_128(fft);
StoreAsComplex(fft, dfw);
@@ -1214,7 +1215,7 @@
// compute gain factor
for (j = 0; j < aec->num_bands - 1; ++j) {
for (i = 0; i < PART_LEN; i++) {
- outputH[j][i] = aec->dBufH[j][i] * nlpGainHband;
+ outputH[j][i] = aec->previous_nearend_block[j + 1][i] * nlpGainHband;
}
}
@@ -1233,22 +1234,19 @@
}
// Copy the current block to the old position.
- memcpy(aec->dBuf, aec->dBuf + PART_LEN, sizeof(float) * PART_LEN);
memcpy(aec->eBuf, aec->eBuf + PART_LEN, sizeof(float) * PART_LEN);
- // Copy the current block to the old position for H band
- for (j = 0; j < aec->num_bands - 1; ++j) {
- memcpy(aec->dBufH[j], aec->dBufH[j] + PART_LEN, sizeof(float) * PART_LEN);
- }
-
memmove(aec->xfwBuf + PART_LEN1, aec->xfwBuf,
sizeof(aec->xfwBuf) - sizeof(complex_t) * PART_LEN1);
}
-static void ProcessBlock(AecCore* aec) {
+static void ProcessBlock(AecCore* aec,
+ float nearend_block[NUM_HIGH_BANDS_MAX + 1]
+ [PART_LEN]) {
size_t i;
float fft[PART_LEN2];
+ float nearend_extended_block_lowest_band[PART_LEN2];
float x_fft[2][PART_LEN1];
float df[2][PART_LEN1];
float far_spectrum = 0.0f;
@@ -1264,8 +1262,6 @@
const float ramp = 1.0002f;
const float gInitNoise[2] = {0.999f, 0.001f};
- float nearend[PART_LEN];
- float* nearend_ptr = NULL;
float farend[PART_LEN2];
float* farend_ptr = NULL;
float echo_subtractor_output[PART_LEN];
@@ -1278,17 +1274,6 @@
outputH_ptr[i] = outputH[i];
}
- // Concatenate old and new nearend blocks.
- for (i = 0; i < aec->num_bands - 1; ++i) {
- WebRtc_ReadBuffer(aec->nearFrBufH[i],
- reinterpret_cast<void**>(&nearend_ptr),
- nearend, PART_LEN);
- memcpy(aec->dBufH[i] + PART_LEN, nearend_ptr, sizeof(nearend));
- }
- WebRtc_ReadBuffer(aec->nearFrBuf, reinterpret_cast<void**>(&nearend_ptr),
- nearend, PART_LEN);
- memcpy(aec->dBuf + PART_LEN, nearend_ptr, sizeof(nearend));
-
// We should always have at least one element stored in |far_buf|.
assert(WebRtc_available_read(aec->far_time_buf) > 0);
WebRtc_ReadBuffer(aec->far_time_buf, reinterpret_cast<void**>(&farend_ptr),
@@ -1296,14 +1281,15 @@
aec->data_dumper->DumpWav("aec_far", PART_LEN, &farend_ptr[PART_LEN],
std::min(aec->sampFreq, 16000), 1);
- aec->data_dumper->DumpWav("aec_near", PART_LEN, nearend_ptr,
+ aec->data_dumper->DumpWav("aec_near", PART_LEN, &nearend_block[0][0],
std::min(aec->sampFreq, 16000), 1);
if (aec->metricsMode == 1) {
// Update power levels
UpdateLevel(&aec->farlevel,
CalculatePower(&farend_ptr[PART_LEN], PART_LEN));
- UpdateLevel(&aec->nearlevel, CalculatePower(nearend_ptr, PART_LEN));
+ UpdateLevel(&aec->nearlevel,
+ CalculatePower(&nearend_block[0][0], PART_LEN));
}
// Convert far-end signal to the frequency domain.
@@ -1311,8 +1297,14 @@
Fft(fft, x_fft);
x_fft_ptr = &x_fft[0][0];
+ // Form extended nearend frame.
+ memcpy(&nearend_extended_block_lowest_band[0],
+ &aec->previous_nearend_block[0][0], sizeof(float) * PART_LEN);
+ memcpy(&nearend_extended_block_lowest_band[PART_LEN], &nearend_block[0][0],
+ sizeof(float) * PART_LEN);
+
// Near fft
- memcpy(fft, aec->dBuf, sizeof(float) * PART_LEN2);
+ memcpy(fft, nearend_extended_block_lowest_band, sizeof(float) * PART_LEN2);
Fft(fft, df);
// Power smoothing.
@@ -1394,7 +1386,7 @@
EchoSubtraction(aec->num_partitions, aec->extended_filter_enabled,
&aec->extreme_filter_divergence, aec->filter_step_size,
aec->error_threshold, &x_fft[0][0], &aec->xfBufBlockPos,
- aec->xfBuf, nearend_ptr, aec->xPow, aec->wfBuf,
+ aec->xfBuf, &nearend_block[0][0], aec->xPow, aec->wfBuf,
echo_subtractor_output);
aec->data_dumper->DumpRaw("aec_h_fft", PART_LEN1 * aec->num_partitions,
&aec->wfBuf[0][0]);
@@ -1410,13 +1402,20 @@
}
// Perform echo suppression.
- EchoSuppression(aec, farend_ptr, echo_subtractor_output, output, outputH_ptr);
+ EchoSuppression(aec, nearend_extended_block_lowest_band, farend_ptr,
+ echo_subtractor_output, output, outputH_ptr);
if (aec->metricsMode == 1) {
UpdateLevel(&aec->nlpoutlevel, CalculatePower(output, PART_LEN));
UpdateMetrics(aec);
}
+ // Store the nearend signal until the next frame.
+ for (i = 0; i < aec->num_bands; ++i) {
+ memcpy(&aec->previous_nearend_block[i][0], &nearend_block[i][0],
+ sizeof(float) * PART_LEN);
+ }
+
// Store the output block.
WebRtc_WriteBuffer(aec->outFrBuf, output, PART_LEN);
// For high bands
@@ -1435,12 +1434,8 @@
if (!aec) {
return NULL;
}
-
- aec->nearFrBuf = WebRtc_CreateBuffer(FRAME_LEN + PART_LEN, sizeof(float));
- if (!aec->nearFrBuf) {
- WebRtcAec_FreeAec(aec);
- return NULL;
- }
+ aec->nearend_buffer_size = 0;
+ memset(&aec->nearend_buffer[0], 0, sizeof(aec->nearend_buffer));
aec->outFrBuf = WebRtc_CreateBuffer(FRAME_LEN + PART_LEN, sizeof(float));
if (!aec->outFrBuf) {
@@ -1449,12 +1444,6 @@
}
for (i = 0; i < NUM_HIGH_BANDS_MAX; ++i) {
- aec->nearFrBufH[i] =
- WebRtc_CreateBuffer(FRAME_LEN + PART_LEN, sizeof(float));
- if (!aec->nearFrBufH[i]) {
- WebRtcAec_FreeAec(aec);
- return NULL;
- }
aec->outFrBufH[i] =
WebRtc_CreateBuffer(FRAME_LEN + PART_LEN, sizeof(float));
if (!aec->outFrBufH[i]) {
@@ -1539,11 +1528,9 @@
return;
}
- WebRtc_FreeBuffer(aec->nearFrBuf);
WebRtc_FreeBuffer(aec->outFrBuf);
for (i = 0; i < NUM_HIGH_BANDS_MAX; ++i) {
- WebRtc_FreeBuffer(aec->nearFrBufH[i]);
WebRtc_FreeBuffer(aec->outFrBufH[i]);
}
@@ -1606,10 +1593,11 @@
aec->num_bands = (size_t)(sampFreq / 16000);
}
- WebRtc_InitBuffer(aec->nearFrBuf);
+ aec->nearend_buffer_size = 0;
+ memset(&aec->nearend_buffer[0], 0, sizeof(aec->nearend_buffer));
+
WebRtc_InitBuffer(aec->outFrBuf);
for (i = 0; i < NUM_HIGH_BANDS_MAX; ++i) {
- WebRtc_InitBuffer(aec->nearFrBufH[i]);
WebRtc_InitBuffer(aec->outFrBufH[i]);
}
@@ -1672,12 +1660,8 @@
aec->knownDelay = 0;
// Initialize buffers
- memset(aec->dBuf, 0, sizeof(aec->dBuf));
+ memset(aec->previous_nearend_block, 0, sizeof(aec->previous_nearend_block));
memset(aec->eBuf, 0, sizeof(aec->eBuf));
- // For H bands
- for (i = 0; i < NUM_HIGH_BANDS_MAX; ++i) {
- memset(aec->dBufH[i], 0, sizeof(aec->dBufH[i]));
- }
memset(aec->xPow, 0, sizeof(aec->xPow));
memset(aec->dPow, 0, sizeof(aec->dPow));
@@ -1762,9 +1746,10 @@
size_t num_samples,
int knownDelay,
float* const* out) {
- size_t i, j;
int out_elements = 0;
+ RTC_DCHECK(num_samples == 80 || num_samples == 160);
+
aec->frame_count++;
// For each frame the process is as follows:
// 1) If the system_delay indicates on being too small for processing a
@@ -1795,16 +1780,7 @@
assert(aec->num_bands == num_bands);
- for (j = 0; j < num_samples; j += FRAME_LEN) {
- // TODO(bjornv): Change the near-end buffer handling to be the same as for
- // far-end, that is, with a near_pre_buf.
- // Buffer the near-end frame.
- WebRtc_WriteBuffer(aec->nearFrBuf, &nearend[0][j], FRAME_LEN);
- // For H band
- for (i = 1; i < num_bands; ++i) {
- WebRtc_WriteBuffer(aec->nearFrBufH[i - 1], &nearend[i][j], FRAME_LEN);
- }
-
+ for (size_t j = 0; j < num_samples; j += FRAME_LEN) {
// 1) At most we process |aec->mult|+1 partitions in 10 ms. Make sure we
// have enough far-end data for that by stuffing the buffer if the
// |system_delay| indicates others.
@@ -1836,7 +1812,7 @@
DelaySource::kDelayAgnostic);
int far_near_buffer_diff =
WebRtc_available_read(aec->far_time_buf) -
- WebRtc_available_read(aec->nearFrBuf) / PART_LEN;
+ (aec->nearend_buffer_size + FRAME_LEN) / PART_LEN;
WebRtc_SoftResetDelayEstimator(aec->delay_estimator, moved_elements);
WebRtc_SoftResetDelayEstimatorFarend(aec->delay_estimator_farend,
moved_elements);
@@ -1851,9 +1827,35 @@
}
}
- // 4) Process as many blocks as possible.
- while (WebRtc_available_read(aec->nearFrBuf) >= PART_LEN) {
- ProcessBlock(aec);
+ // Form a process a block of samples.
+ RTC_DCHECK_EQ(16, FRAME_LEN - PART_LEN);
+ float nearend_block[NUM_HIGH_BANDS_MAX + 1][PART_LEN];
+ const int num_samples_to_block = PART_LEN - aec->nearend_buffer_size;
+ const int num_samples_to_buffer = FRAME_LEN - num_samples_to_block;
+ for (size_t i = 0; i < num_bands; ++i) {
+ memcpy(&nearend_block[i][0], &aec->nearend_buffer[i][0],
+ aec->nearend_buffer_size * sizeof(float));
+ memcpy(&nearend_block[i][aec->nearend_buffer_size], &nearend[i][j],
+ num_samples_to_block * sizeof(float));
+ }
+ ProcessBlock(aec, nearend_block);
+
+ if (num_samples_to_buffer == PART_LEN) {
+ // If possible form and process a second block of samples.
+ for (size_t i = 0; i < num_bands; ++i) {
+ memcpy(&nearend_block[i][0], &nearend[i][j + num_samples_to_block],
+ num_samples_to_buffer * sizeof(float));
+ }
+ ProcessBlock(aec, nearend_block);
+ aec->nearend_buffer_size = 0;
+ } else {
+ // Buffer the remaining samples in the frame.
+ for (size_t i = 0; i < num_bands; ++i) {
+ memcpy(&aec->nearend_buffer[i][0],
+ &nearend[i][j + num_samples_to_block],
+ num_samples_to_buffer * sizeof(float));
+ }
+ aec->nearend_buffer_size = num_samples_to_buffer;
}
// 5) Update system delay with respect to the entire frame.
@@ -1865,14 +1867,14 @@
out_elements = static_cast<int>(WebRtc_available_read(aec->outFrBuf));
if (out_elements < FRAME_LEN) {
WebRtc_MoveReadPtr(aec->outFrBuf, out_elements - FRAME_LEN);
- for (i = 0; i < num_bands - 1; ++i) {
+ for (size_t i = 0; i < num_bands - 1; ++i) {
WebRtc_MoveReadPtr(aec->outFrBufH[i], out_elements - FRAME_LEN);
}
}
// Obtain an output frame.
WebRtc_ReadBuffer(aec->outFrBuf, NULL, &out[0][j], FRAME_LEN);
// For H bands.
- for (i = 1; i < num_bands; ++i) {
+ for (size_t i = 1; i < num_bands; ++i) {
WebRtc_ReadBuffer(aec->outFrBufH[i - 1], NULL, &out[i][j], FRAME_LEN);
}
}
diff --git a/webrtc/modules/audio_processing/aec/aec_core.h b/webrtc/modules/audio_processing/aec/aec_core.h
index 1ab2020..8f4bc89 100644
--- a/webrtc/modules/audio_processing/aec/aec_core.h
+++ b/webrtc/modules/audio_processing/aec/aec_core.h
@@ -128,16 +128,20 @@
int inSamples, outSamples;
int delayEstCtr;
- RingBuffer* nearFrBuf;
+ // Nearend buffer used for changing from FRAME_LEN to PART_LEN sample block
+ // sizes. The buffer stores all the incoming bands and for each band a maximum
+ // of PART_LEN - (FRAME_LEN - PART_LEN) values need to be buffered in order to
+ // change the block size from FRAME_LEN to PART_LEN.
+ float nearend_buffer[NUM_HIGH_BANDS_MAX + 1]
+ [PART_LEN - (FRAME_LEN - PART_LEN)];
+ int nearend_buffer_size;
RingBuffer* outFrBuf;
- RingBuffer* nearFrBufH[NUM_HIGH_BANDS_MAX];
RingBuffer* outFrBufH[NUM_HIGH_BANDS_MAX];
- float dBuf[PART_LEN2]; // nearend
float eBuf[PART_LEN2]; // error
- float dBufH[NUM_HIGH_BANDS_MAX][PART_LEN2]; // nearend
+ float previous_nearend_block[NUM_HIGH_BANDS_MAX + 1][PART_LEN];
float xPow[PART_LEN1];
float dPow[PART_LEN1];
diff --git a/webrtc/modules/audio_processing/aec/echo_cancellation.cc b/webrtc/modules/audio_processing/aec/echo_cancellation.cc
index c26b1a0..4f65c2b 100644
--- a/webrtc/modules/audio_processing/aec/echo_cancellation.cc
+++ b/webrtc/modules/audio_processing/aec/echo_cancellation.cc
@@ -683,6 +683,7 @@
int32_t skew) {
size_t i;
const int delay_diff_offset = kDelayDiffOffsetSamples;
+ RTC_DCHECK(num_samples == 80 || num_samples == 160);
#if defined(WEBRTC_UNTRUSTED_DELAY)
reported_delay_ms = kFixedDelayMs;
#else