RNN VAD: LP residual optimized (part 4)
This CL removes the circular shift to compute the convolution in
`ComputeLpResidual()`, which is now 4x faster (benchmarked with the
`RnnVadTest.DISABLED_ComputeLpResidualBenchmark` unit test).
Note that the `RnnVadTest.LpResidualPipelineBitExactness` unit test
is still passing.
Bug: webrtc:10480
Change-Id: Ia7767d9b57378c12c8ff31f58fea03905be5c5de
Reviewed-on: https://webrtc-review.googlesource.com/c/src/+/189964
Commit-Queue: Alessio Bazzica <alessiob@webrtc.org>
Reviewed-by: Per Ã…hgren <peah@webrtc.org>
Cr-Commit-Position: refs/heads/master@{#32491}
diff --git a/modules/audio_processing/agc2/rnn_vad/BUILD.gn b/modules/audio_processing/agc2/rnn_vad/BUILD.gn
index 8b01122..fcf179c 100644
--- a/modules/audio_processing/agc2/rnn_vad/BUILD.gn
+++ b/modules/audio_processing/agc2/rnn_vad/BUILD.gn
@@ -75,6 +75,7 @@
deps = [
"../../../../api:array_view",
"../../../../rtc_base:checks",
+ "../../../../rtc_base:safe_compare",
]
}
diff --git a/modules/audio_processing/agc2/rnn_vad/lp_residual.cc b/modules/audio_processing/agc2/rnn_vad/lp_residual.cc
index e62bcc4..f732b97 100644
--- a/modules/audio_processing/agc2/rnn_vad/lp_residual.cc
+++ b/modules/audio_processing/agc2/rnn_vad/lp_residual.cc
@@ -16,6 +16,7 @@
#include <numeric>
#include "rtc_base/checks.h"
+#include "rtc_base/numerics/safe_compare.h"
namespace webrtc {
namespace rnn_vad {
@@ -117,19 +118,22 @@
rtc::ArrayView<const float, kNumLpcCoefficients> lpc_coeffs,
rtc::ArrayView<const float> x,
rtc::ArrayView<float> y) {
- RTC_DCHECK_LT(kNumLpcCoefficients, x.size());
+ RTC_DCHECK_GT(x.size(), kNumLpcCoefficients);
RTC_DCHECK_EQ(x.size(), y.size());
- std::array<float, kNumLpcCoefficients> input_chunk;
- input_chunk.fill(0.f);
- for (size_t i = 0; i < y.size(); ++i) {
- const float sum = std::inner_product(input_chunk.begin(), input_chunk.end(),
- lpc_coeffs.begin(), x[i]);
- // Circular shift and add a new sample.
- for (size_t j = kNumLpcCoefficients - 1; j > 0; --j)
- input_chunk[j] = input_chunk[j - 1];
- input_chunk[0] = x[i];
- // Copy result.
- y[i] = sum;
+ // The code below implements the following operation:
+ // y[i] = x[i] + dot_product({x[i], ..., x[i - kNumLpcCoefficients + 1]},
+ // lpc_coeffs)
+ // Edge case: i < kNumLpcCoefficients.
+ y[0] = x[0];
+ for (int i = 1; i < kNumLpcCoefficients; ++i) {
+ y[i] =
+ std::inner_product(x.crend() - i, x.crend(), lpc_coeffs.cbegin(), x[i]);
+ }
+ // Regular case.
+ auto last = x.crend();
+ for (int i = kNumLpcCoefficients; rtc::SafeLt(i, y.size()); ++i, --last) {
+ y[i] = std::inner_product(last - kNumLpcCoefficients, last,
+ lpc_coeffs.cbegin(), x[i]);
}
}
diff --git a/modules/audio_processing/agc2/rnn_vad/lp_residual.h b/modules/audio_processing/agc2/rnn_vad/lp_residual.h
index cddedca..2e54dd9 100644
--- a/modules/audio_processing/agc2/rnn_vad/lp_residual.h
+++ b/modules/audio_processing/agc2/rnn_vad/lp_residual.h
@@ -19,7 +19,7 @@
namespace rnn_vad {
// LPC inverse filter length.
-constexpr size_t kNumLpcCoefficients = 5;
+constexpr int kNumLpcCoefficients = 5;
// Given a frame |x|, computes a post-processed version of LPC coefficients
// tailored for pitch estimation.