Add lookahead to the delay estimator.
TEST=audioproc_unittest
Review URL: http://webrtc-codereview.appspot.com/279014
git-svn-id: http://webrtc.googlecode.com/svn/trunk@992 4adac7df-926f-26a2-2b94-8c16560cd09d
diff --git a/src/modules/audio_processing/aec/aec_core.c b/src/modules/audio_processing/aec/aec_core.c
index ac18599..ee85c2e 100644
--- a/src/modules/audio_processing/aec/aec_core.c
+++ b/src/modules/audio_processing/aec/aec_core.c
@@ -178,7 +178,8 @@
if (WebRtc_CreateDelayEstimator(&aec->delay_estimator,
PART_LEN1,
- kMaxDelay) == -1) {
+ kMaxDelayBlocks,
+ kLookaheadBlocks) == -1) {
WebRtcAec_FreeAec(aec);
aec = NULL;
return -1;
@@ -700,7 +701,7 @@
PART_LEN1,
aec->echoState);
if (delay_estimate >= 0) {
- // Update delay estimate buffer
+ // Update delay estimate buffer.
aec->delay_histogram[delay_estimate]++;
}
}
diff --git a/src/modules/audio_processing/aec/aec_core.h b/src/modules/audio_processing/aec/aec_core.h
index 78b5349..f858a05 100644
--- a/src/modules/audio_processing/aec/aec_core.h
+++ b/src/modules/audio_processing/aec/aec_core.h
@@ -30,9 +30,10 @@
#define FAR_BUF_LEN (FILT_LEN2 * 2)
#define PREF_BAND_SIZE 24
-#define BLOCKL_MAX FRAME_LEN
-// Maximum delay in fixed point delay estimator, used for logging
-enum {kMaxDelay = 100};
+// Delay estimator constants, used for logging.
+enum { kMaxDelayBlocks = 60 };
+enum { kLookaheadBlocks = 15 };
+enum { kHistorySizeBlocks = kMaxDelayBlocks + kLookaheadBlocks };
typedef float complex_t[2];
// For performance reasons, some arrays of complex numbers are replaced by twice
@@ -141,7 +142,7 @@
int flag_Hband_cn; //for comfort noise
float cn_scale_Hband; //scale for comfort noise in H band
- int delay_histogram[kMaxDelay];
+ int delay_histogram[kHistorySizeBlocks];
int delay_logging_enabled;
void* delay_estimator;
diff --git a/src/modules/audio_processing/aec/echo_cancellation.c b/src/modules/audio_processing/aec/echo_cancellation.c
index a3bad49..91e968d 100644
--- a/src/modules/audio_processing/aec/echo_cancellation.c
+++ b/src/modules/audio_processing/aec/echo_cancellation.c
@@ -748,7 +748,7 @@
}
// Get number of delay values since last update
- for (i = 0; i < kMaxDelay; i++) {
+ for (i = 0; i < kHistorySizeBlocks; i++) {
num_delay_values += self->aec->delay_histogram[i];
}
if (num_delay_values == 0) {
@@ -760,17 +760,18 @@
delay_values = num_delay_values >> 1; // Start value for median count down
// Get median of delay values since last update
- for (i = 0; i < kMaxDelay; i++) {
+ for (i = 0; i < kHistorySizeBlocks; i++) {
delay_values -= self->aec->delay_histogram[i];
if (delay_values < 0) {
my_median = i;
break;
}
}
- *median = my_median * kMsPerBlock;
+ // Account for lookahead.
+ *median = (my_median - kLookaheadBlocks) * kMsPerBlock;
// Calculate the L1 norm, with median value as central moment
- for (i = 0; i < kMaxDelay; i++) {
+ for (i = 0; i < kHistorySizeBlocks; i++) {
l1_norm += (float) (fabs(i - my_median) * self->aec->delay_histogram[i]);
}
*std = (int) (l1_norm / (float) num_delay_values + 0.5f) * kMsPerBlock;
diff --git a/src/modules/audio_processing/aecm/aecm_core.c b/src/modules/audio_processing/aecm/aecm_core.c
index 2c670da..546ad34 100644
--- a/src/modules/audio_processing/aecm/aecm_core.c
+++ b/src/modules/audio_processing/aecm/aecm_core.c
@@ -211,7 +211,8 @@
if (WebRtc_CreateDelayEstimator(&aecm->delay_estimator,
PART_LEN1,
- MAX_DELAY) == -1) {
+ MAX_DELAY,
+ 0) == -1) {
WebRtcAecm_FreeCore(aecm);
aecm = NULL;
return -1;
diff --git a/src/modules/audio_processing/test/unit_test.cc b/src/modules/audio_processing/test/unit_test.cc
index 0d8b5ec..07884ae 100644
--- a/src/modules/audio_processing/test/unit_test.cc
+++ b/src/modules/audio_processing/test/unit_test.cc
@@ -1144,8 +1144,8 @@
webrtc::audioproc::Test::DelayMetrics reference_delay =
test->delay_metrics();
- EXPECT_EQ(median, reference_delay.median());
- EXPECT_EQ(std, reference_delay.std());
+ EXPECT_EQ(reference_delay.median(), median);
+ EXPECT_EQ(reference_delay.std(), std);
EXPECT_EQ(test->rms_level(), rms_level);
#endif
diff --git a/src/modules/audio_processing/utility/delay_estimator.c b/src/modules/audio_processing/utility/delay_estimator.c
index 9ada8b9..d06cccc 100644
--- a/src/modules/audio_processing/utility/delay_estimator.c
+++ b/src/modules/audio_processing/utility/delay_estimator.c
@@ -65,6 +65,10 @@
free(handle->binary_far_history);
handle->binary_far_history = NULL;
}
+ if (handle->binary_near_history != NULL) {
+ free(handle->binary_near_history);
+ handle->binary_near_history = NULL;
+ }
if (handle->delay_histogram != NULL) {
free(handle->delay_histogram);
handle->delay_histogram = NULL;
@@ -76,13 +80,22 @@
}
int WebRtc_CreateBinaryDelayEstimator(BinaryDelayEstimator_t** handle,
- int history_size) {
+ int max_delay,
+ int lookahead) {
BinaryDelayEstimator_t* self = NULL;
+ int history_size = max_delay + lookahead;
if (handle == NULL) {
return -1;
}
- if (history_size < 0) {
+ if (max_delay < 0) {
+ return -1;
+ }
+ if (lookahead < 0) {
+ return -1;
+ }
+ if (history_size < 2) {
+ // Must be this large for buffer shifting.
return -1;
}
@@ -97,6 +110,9 @@
self->binary_far_history = NULL;
self->delay_histogram = NULL;
+ self->history_size = history_size;
+ self->near_history_size = lookahead + 1;
+
// Allocate memory for spectrum buffers
self->mean_bit_counts = malloc(history_size * sizeof(int32_t));
if (self->mean_bit_counts == NULL) {
@@ -117,6 +133,13 @@
self = NULL;
return -1;
}
+ self->binary_near_history = malloc(self->near_history_size *
+ sizeof(uint32_t));
+ if (self->binary_near_history == NULL) {
+ WebRtc_FreeBinaryDelayEstimator(self);
+ self = NULL;
+ return -1;
+ }
self->delay_histogram = malloc(history_size * sizeof(int));
if (self->delay_histogram == NULL) {
WebRtc_FreeBinaryDelayEstimator(self);
@@ -124,26 +147,24 @@
return -1;
}
- self->history_size = history_size;
-
return 0;
}
int WebRtc_InitBinaryDelayEstimator(BinaryDelayEstimator_t* handle) {
assert(handle != NULL);
- // Set averaged bit counts to zero
+
memset(handle->mean_bit_counts, 0, sizeof(int32_t) * handle->history_size);
memset(handle->bit_counts, 0, sizeof(int32_t) * handle->history_size);
- // Set far end histories to zero
- memset(handle->binary_far_history,
- 0,
+ memset(handle->binary_far_history, 0,
sizeof(uint32_t) * handle->history_size);
- // Set delay histogram to zero
+ memset(handle->binary_near_history, 0,
+ sizeof(uint32_t) * handle->near_history_size);
memset(handle->delay_histogram, 0, sizeof(int) * handle->history_size);
- // Set VAD counter to zero
+
handle->vad_counter = 0;
- // Set delay memory to zero
- handle->last_delay = 0;
+
+ // Set to zero delay after compensating for lookahead.
+ handle->last_delay = handle->near_history_size - 1;
return 0;
}
@@ -169,6 +190,15 @@
// Insert new binary spectrum
handle->binary_far_history[0] = binary_far_spectrum;
+ if (handle->near_history_size > 1) {
+ memmove(&(handle->binary_near_history[1]),
+ &(handle->binary_near_history[0]),
+ (handle->near_history_size - 1) * sizeof(uint32_t));
+ handle->binary_near_history[0] = binary_near_spectrum;
+ binary_near_spectrum =
+ handle->binary_near_history[handle->near_history_size - 1];
+ }
+
// Compare with delayed spectra
BitCountComparison(binary_near_spectrum,
handle->binary_far_history,
@@ -198,7 +228,6 @@
handle->delay_histogram[min_position] += 3;
}
- handle->last_delay = 0;
for (i = 0; i < handle->history_size; i++) {
histogram_bin = handle->delay_histogram[i];
diff --git a/src/modules/audio_processing/utility/delay_estimator.h b/src/modules/audio_processing/utility/delay_estimator.h
index 43700fd..242124e 100644
--- a/src/modules/audio_processing/utility/delay_estimator.h
+++ b/src/modules/audio_processing/utility/delay_estimator.h
@@ -25,18 +25,22 @@
// determined at run-time.
int32_t* bit_counts;
- // Binary history variables
+ // Binary history variables.
uint32_t* binary_far_history;
+ uint32_t* binary_near_history;
- // Delay histogram variables
+ // Delay histogram variables.
int* delay_histogram;
int vad_counter;
- // Delay memory
+ // Delay memory.
int last_delay;
- // Buffer size parameters
+ // Buffer size.
int history_size;
+
+ // Near-end buffer size.
+ int near_history_size;
} BinaryDelayEstimator_t;
// Releases the memory allocated by WebRtc_CreateBinaryDelayEstimator(...)
@@ -45,20 +49,10 @@
//
int WebRtc_FreeBinaryDelayEstimator(BinaryDelayEstimator_t* handle);
-// Allocates the memory needed by the binary delay estimation. The memory needs
-// to be initialized separately using WebRtc_InitBinaryDelayEstimator(...).
-//
-// Inputs:
-// - handle : Instance that should be created
-// - history_size : Size of the far end history used to estimate the
-// delay from. Used to allocate memory for history
-// specific buffers.
-//
-// Output:
-// - handle : Created instance
-//
+// Refer to WebRtc_CreateDelayEstimator() in delay_estimator_wrapper.h
int WebRtc_CreateBinaryDelayEstimator(BinaryDelayEstimator_t** handle,
- int history_size);
+ int max_delay,
+ int lookahead);
// Initializes the delay estimation instance created with
// WebRtc_CreateBinaryDelayEstimator(...)
@@ -70,12 +64,13 @@
//
int WebRtc_InitBinaryDelayEstimator(BinaryDelayEstimator_t* handle);
-// Estimates and returns the delay between the binary far end and binary near
-// end spectra.
+// Estimates and returns the delay between the binary far-end and binary near-
+// end spectra. The value will be offset by the lookahead (i.e. the lookahead
+// should be subtracted from the returned value).
// Inputs:
// - handle : Pointer to the delay estimation instance
-// - binary_far_spectrum : Far end binary spectrum
-// - binary_near_spectrum : Near end binary spectrum of the current block
+// - binary_far_spectrum : Far-end binary spectrum
+// - binary_near_spectrum : Near-end binary spectrum of the current block
// - vad_value : The VAD decision of the current block
//
// Output:
@@ -102,14 +97,14 @@
//
int WebRtc_binary_last_delay(BinaryDelayEstimator_t* handle);
-// Returns the history size used in the far end buffers to calculate the delay
+// Returns the history size used in the far-end buffers to calculate the delay
// over.
//
// Input:
// - handle : Pointer to the delay estimation instance
//
// Return value:
-// - history_size : > 0 - Far end history size
+// - history_size : > 0 - Far-end history size
// -1 - Error
//
int WebRtc_history_size(BinaryDelayEstimator_t* handle);
diff --git a/src/modules/audio_processing/utility/delay_estimator_wrapper.c b/src/modules/audio_processing/utility/delay_estimator_wrapper.c
index ee0f011..6e4e44a 100644
--- a/src/modules/audio_processing/utility/delay_estimator_wrapper.c
+++ b/src/modules/audio_processing/utility/delay_estimator_wrapper.c
@@ -133,7 +133,8 @@
int WebRtc_CreateDelayEstimator(void** handle,
int spectrum_size,
- int history_size) {
+ int max_delay,
+ int lookahead) {
DelayEstimator_t *self = NULL;
// Check if the sub band used in the delay estimation is small enough to
@@ -158,7 +159,8 @@
// Create binary delay estimator.
if (WebRtc_CreateBinaryDelayEstimator(&self->binary_handle,
- history_size) != 0) {
+ max_delay,
+ lookahead) != 0) {
WebRtc_FreeDelayEstimator(self);
self = NULL;
return -1;
diff --git a/src/modules/audio_processing/utility/delay_estimator_wrapper.h b/src/modules/audio_processing/utility/delay_estimator_wrapper.h
index 313773a..31e1dde 100644
--- a/src/modules/audio_processing/utility/delay_estimator_wrapper.h
+++ b/src/modules/audio_processing/utility/delay_estimator_wrapper.h
@@ -26,20 +26,29 @@
// initialized separately through WebRtc_InitDelayEstimator(...).
//
// Inputs:
-// - handle : Instance that should be created
-// - spectrum_size : Size of the spectrum used both in far end and
-// near end. Used to allocate memory for spectrum
-// specific buffers.
-// - history_size : Size of the far end history used to estimate the
-// delay from. Used to allocate memory for history
-// specific buffers.
+// - handle : Instance that should be created.
+// - spectrum_size : Size of the spectrum used both in far-end and
+// near-end. Used to allocate memory for spectrum
+// specific buffers.
+// - max_delay : The maximum delay which can be estimated. Needed
+// to allocate memory for history buffers.
+// - lookahead : Amount of non-causal lookahead to use. This can detect
+// cases in which a near-end signal occurs before the
+// corresponding far-end signal. It will delay the
+// estimate for the current block by an equal amount,
+// and the returned values will be offset by it.
+//
+// A value of zero is the typical no-lookahead case. This
+// also represents the minimum delay which can be
+// estimated.
//
// Output:
-// - handle : Created instance
+// - handle : Created instance
//
int WebRtc_CreateDelayEstimator(void** handle,
int spectrum_size,
- int history_size);
+ int max_delay,
+ int lookahead);
// Initializes the delay estimation instance created with
// WebRtc_CreateDelayEstimator(...)
@@ -51,15 +60,17 @@
//
int WebRtc_InitDelayEstimator(void* handle);
-// Estimates and returns the delay between the far end and near end blocks.
+// Estimates and returns the delay between the far-end and near-end blocks. The
+// value will be offset by the lookahead (i.e. the lookahead should be
+// subtracted from the returned value).
// Inputs:
// - handle : Pointer to the delay estimation instance
-// - far_spectrum : Pointer to the far end spectrum data
-// - near_spectrum : Pointer to the near end spectrum data of the current
+// - far_spectrum : Pointer to the far-end spectrum data
+// - near_spectrum : Pointer to the near-end spectrum data of the current
// block
-// - spectrum_size : The size of the data arrays (same for both far and
-// near end)
-// - far_q : The Q-domain of the far end data
+// - spectrum_size : The size of the data arrays (same for both far- and
+// near-end)
+// - far_q : The Q-domain of the far-end data
// - vad_value : The VAD decision of the current block
//
// Output: