RNN VAD: FC layer isolated into rnn_fc.h/.cc
Refactoring done to more easily and cleanly add SIMD optimizations and
to remove `FullyConnectedLayer` from the RNN VAD api.
Minor improvements (readability, API):
- `FullyConnectedLayer` gets the ActivationFunction enum and not
a function view anymore
- SSE2 optimization moved into `FullyConnectedLayer::ComputeOutputSse2`
- layer name added for improved logs
Bug: webrtc:10480
Change-Id: Ida4903a67655e19ef0464f378c433c1f6e96dca7
Reviewed-on: https://webrtc-review.googlesource.com/c/src/+/195444
Commit-Queue: Alessio Bazzica <alessiob@webrtc.org>
Reviewed-by: Sam Zackrisson <saza@webrtc.org>
Cr-Commit-Position: refs/heads/master@{#32766}
diff --git a/modules/audio_processing/agc2/rnn_vad/BUILD.gn b/modules/audio_processing/agc2/rnn_vad/BUILD.gn
index 4351afd..c57971a 100644
--- a/modules/audio_processing/agc2/rnn_vad/BUILD.gn
+++ b/modules/audio_processing/agc2/rnn_vad/BUILD.gn
@@ -24,6 +24,7 @@
deps = [
":rnn_vad_common",
+ ":rnn_vad_layers",
":rnn_vad_lp_residual",
":rnn_vad_pitch",
":rnn_vad_sequence_buffer",
@@ -78,6 +79,24 @@
]
}
+rtc_source_set("rnn_vad_layers") {
+ sources = [
+ "rnn_fc.cc",
+ "rnn_fc.h",
+ ]
+ deps = [
+ ":rnn_vad_common",
+ "..:cpu_features",
+ "../../../../api:array_view",
+ "../../../../api:function_view",
+ "../../../../rtc_base:checks",
+ "../../../../rtc_base:safe_conversions",
+ "../../../../rtc_base/system:arch",
+ "//third_party/rnnoise:rnn_vad",
+ ]
+ absl_deps = [ "//third_party/abseil-cpp/absl/strings" ]
+}
+
rtc_source_set("vector_math") {
sources = [ "vector_math.h" ]
deps = [
@@ -221,6 +240,7 @@
"pitch_search_internal_unittest.cc",
"pitch_search_unittest.cc",
"ring_buffer_unittest.cc",
+ "rnn_fc_unittest.cc",
"rnn_unittest.cc",
"rnn_vad_unittest.cc",
"sequence_buffer_unittest.cc",
@@ -233,6 +253,7 @@
":rnn_vad",
":rnn_vad_auto_correlation",
":rnn_vad_common",
+ ":rnn_vad_layers",
":rnn_vad_lp_residual",
":rnn_vad_pitch",
":rnn_vad_ring_buffer",
diff --git a/modules/audio_processing/agc2/rnn_vad/rnn.cc b/modules/audio_processing/agc2/rnn_vad/rnn.cc
index 1c9b736..9d6d28f 100644
--- a/modules/audio_processing/agc2/rnn_vad/rnn.cc
+++ b/modules/audio_processing/agc2/rnn_vad/rnn.cc
@@ -60,37 +60,6 @@
return x < 0.f ? 0.f : x;
}
-std::vector<float> GetScaledParams(rtc::ArrayView<const int8_t> params) {
- std::vector<float> scaled_params(params.size());
- std::transform(params.begin(), params.end(), scaled_params.begin(),
- [](int8_t x) -> float {
- return rnnoise::kWeightsScale * static_cast<float>(x);
- });
- return scaled_params;
-}
-
-// TODO(bugs.chromium.org/10480): Hard-code optimized layout and remove this
-// function to improve setup time.
-// Casts and scales |weights| and re-arranges the layout.
-std::vector<float> GetPreprocessedFcWeights(
- rtc::ArrayView<const int8_t> weights,
- int output_size) {
- if (output_size == 1) {
- return GetScaledParams(weights);
- }
- // Transpose, scale and cast.
- const int input_size = rtc::CheckedDivExact(
- rtc::dchecked_cast<int>(weights.size()), output_size);
- std::vector<float> w(weights.size());
- for (int o = 0; o < output_size; ++o) {
- for (int i = 0; i < input_size; ++i) {
- w[o * input_size + i] = rnnoise::kWeightsScale *
- static_cast<float>(weights[i * output_size + o]);
- }
- }
- return w;
-}
-
constexpr int kNumGruGates = 3; // Update, reset, output.
// TODO(bugs.chromium.org/10480): Hard-coded optimized layout and remove this
@@ -202,106 +171,8 @@
}
}
-// Fully connected layer un-optimized implementation.
-void ComputeFullyConnectedLayerOutput(
- int input_size,
- int output_size,
- rtc::ArrayView<const float> input,
- rtc::ArrayView<const float> bias,
- rtc::ArrayView<const float> weights,
- rtc::FunctionView<float(float)> activation_function,
- rtc::ArrayView<float> output) {
- RTC_DCHECK_EQ(input.size(), input_size);
- RTC_DCHECK_EQ(bias.size(), output_size);
- RTC_DCHECK_EQ(weights.size(), input_size * output_size);
- for (int o = 0; o < output_size; ++o) {
- output[o] = bias[o];
- // TODO(bugs.chromium.org/9076): Benchmark how different layouts for
- // |weights_| change the performance across different platforms.
- for (int i = 0; i < input_size; ++i) {
- output[o] += input[i] * weights[o * input_size + i];
- }
- output[o] = activation_function(output[o]);
- }
-}
-
-#if defined(WEBRTC_ARCH_X86_FAMILY)
-// Fully connected layer SSE2 implementation.
-void ComputeFullyConnectedLayerOutputSse2(
- int input_size,
- int output_size,
- rtc::ArrayView<const float> input,
- rtc::ArrayView<const float> bias,
- rtc::ArrayView<const float> weights,
- rtc::FunctionView<float(float)> activation_function,
- rtc::ArrayView<float> output) {
- RTC_DCHECK_EQ(input.size(), input_size);
- RTC_DCHECK_EQ(bias.size(), output_size);
- RTC_DCHECK_EQ(weights.size(), input_size * output_size);
- const int input_size_by_4 = input_size >> 2;
- const int offset = input_size & ~3;
- __m128 sum_wx_128;
- const float* v = reinterpret_cast<const float*>(&sum_wx_128);
- for (int o = 0; o < output_size; ++o) {
- // Perform 128 bit vector operations.
- sum_wx_128 = _mm_set1_ps(0);
- const float* x_p = input.data();
- const float* w_p = weights.data() + o * input_size;
- for (int i = 0; i < input_size_by_4; ++i, x_p += 4, w_p += 4) {
- sum_wx_128 = _mm_add_ps(sum_wx_128,
- _mm_mul_ps(_mm_loadu_ps(x_p), _mm_loadu_ps(w_p)));
- }
- // Perform non-vector operations for any remaining items, sum up bias term
- // and results from the vectorized code, and apply the activation function.
- output[o] = activation_function(
- std::inner_product(input.begin() + offset, input.end(),
- weights.begin() + o * input_size + offset,
- bias[o] + v[0] + v[1] + v[2] + v[3]));
- }
-}
-#endif
-
} // namespace
-FullyConnectedLayer::FullyConnectedLayer(
- const int input_size,
- const int output_size,
- const rtc::ArrayView<const int8_t> bias,
- const rtc::ArrayView<const int8_t> weights,
- rtc::FunctionView<float(float)> activation_function,
- const AvailableCpuFeatures& cpu_features)
- : input_size_(input_size),
- output_size_(output_size),
- bias_(GetScaledParams(bias)),
- weights_(GetPreprocessedFcWeights(weights, output_size)),
- activation_function_(activation_function),
- cpu_features_(cpu_features) {
- RTC_DCHECK_LE(output_size_, kFullyConnectedLayerMaxUnits)
- << "Static over-allocation of fully-connected layers output vectors is "
- "not sufficient.";
- RTC_DCHECK_EQ(output_size_, bias_.size())
- << "Mismatching output size and bias terms array size.";
- RTC_DCHECK_EQ(input_size_ * output_size_, weights_.size())
- << "Mismatching input-output size and weight coefficients array size.";
-}
-
-FullyConnectedLayer::~FullyConnectedLayer() = default;
-
-void FullyConnectedLayer::ComputeOutput(rtc::ArrayView<const float> input) {
-#if defined(WEBRTC_ARCH_X86_FAMILY)
- // TODO(bugs.chromium.org/10480): Add AVX2.
- if (cpu_features_.sse2) {
- ComputeFullyConnectedLayerOutputSse2(input_size_, output_size_, input,
- bias_, weights_, activation_function_,
- output_);
- return;
- }
-#endif
- // TODO(bugs.chromium.org/10480): Add Neon.
- ComputeFullyConnectedLayerOutput(input_size_, output_size_, input, bias_,
- weights_, activation_function_, output_);
-}
-
GatedRecurrentLayer::GatedRecurrentLayer(
const int input_size,
const int output_size,
@@ -346,8 +217,9 @@
kInputLayerOutputSize,
kInputDenseBias,
kInputDenseWeights,
- TansigApproximated,
- cpu_features),
+ ActivationFunction::kTansigApproximated,
+ cpu_features,
+ /*layer_name=*/"FC1"),
hidden_(kInputLayerOutputSize,
kHiddenLayerOutputSize,
kHiddenGruBias,
@@ -357,8 +229,9 @@
kOutputLayerOutputSize,
kOutputDenseBias,
kOutputDenseWeights,
- SigmoidApproximated,
- cpu_features) {
+ ActivationFunction::kSigmoidApproximated,
+ cpu_features,
+ /*layer_name=*/"FC2") {
// Input-output chaining size checks.
RTC_DCHECK_EQ(input_.size(), hidden_.input_size())
<< "The input and the hidden layers sizes do not match.";
diff --git a/modules/audio_processing/agc2/rnn_vad/rnn.h b/modules/audio_processing/agc2/rnn_vad/rnn.h
index c886034..df99c3c 100644
--- a/modules/audio_processing/agc2/rnn_vad/rnn.h
+++ b/modules/audio_processing/agc2/rnn_vad/rnn.h
@@ -21,54 +21,15 @@
#include "api/function_view.h"
#include "modules/audio_processing/agc2/cpu_features.h"
#include "modules/audio_processing/agc2/rnn_vad/common.h"
+#include "modules/audio_processing/agc2/rnn_vad/rnn_fc.h"
#include "rtc_base/system/arch.h"
namespace webrtc {
namespace rnn_vad {
-// Maximum number of units for an FC layer.
-constexpr int kFullyConnectedLayerMaxUnits = 24;
-
// Maximum number of units for a GRU layer.
constexpr int kGruLayerMaxUnits = 24;
-// Fully-connected layer with a custom activation function which owns the output
-// buffer.
-class FullyConnectedLayer {
- public:
- // Ctor. `output_size` cannot be greater than `kFullyConnectedLayerMaxUnits`.
- FullyConnectedLayer(int input_size,
- int output_size,
- rtc::ArrayView<const int8_t> bias,
- rtc::ArrayView<const int8_t> weights,
- rtc::FunctionView<float(float)> activation_function,
- const AvailableCpuFeatures& cpu_features);
- FullyConnectedLayer(const FullyConnectedLayer&) = delete;
- FullyConnectedLayer& operator=(const FullyConnectedLayer&) = delete;
- ~FullyConnectedLayer();
-
- // Returns the size of the input vector.
- int input_size() const { return input_size_; }
- // Returns the pointer to the first element of the output buffer.
- const float* data() const { return output_.data(); }
- // Returns the size of the output buffer.
- int size() const { return output_size_; }
-
- // Computes the fully-connected layer output.
- void ComputeOutput(rtc::ArrayView<const float> input);
-
- private:
- const int input_size_;
- const int output_size_;
- const std::vector<float> bias_;
- const std::vector<float> weights_;
- rtc::FunctionView<float(float)> activation_function_;
- // The output vector of a recurrent layer has length equal to |output_size_|.
- // However, for efficiency, over-allocation is used.
- std::array<float, kFullyConnectedLayerMaxUnits> output_;
- const AvailableCpuFeatures cpu_features_;
-};
-
// Recurrent layer with gated recurrent units (GRUs) with sigmoid and ReLU as
// activation functions for the update/reset and output gates respectively. It
// owns the output buffer.
diff --git a/modules/audio_processing/agc2/rnn_vad/rnn_fc.cc b/modules/audio_processing/agc2/rnn_vad/rnn_fc.cc
new file mode 100644
index 0000000..2363317
--- /dev/null
+++ b/modules/audio_processing/agc2/rnn_vad/rnn_fc.cc
@@ -0,0 +1,151 @@
+/*
+ * Copyright (c) 2020 The WebRTC project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+// Defines WEBRTC_ARCH_X86_FAMILY, used below.
+#include "rtc_base/system/arch.h"
+
+#if defined(WEBRTC_ARCH_X86_FAMILY)
+#include <emmintrin.h>
+#endif
+
+#include <algorithm>
+#include <numeric>
+
+#include "modules/audio_processing/agc2/rnn_vad/rnn_fc.h"
+#include "rtc_base/checks.h"
+#include "rtc_base/numerics/safe_conversions.h"
+#include "third_party/rnnoise/src/rnn_activations.h"
+#include "third_party/rnnoise/src/rnn_vad_weights.h"
+
+namespace webrtc {
+namespace rnn_vad {
+namespace {
+
+std::vector<float> GetScaledParams(rtc::ArrayView<const int8_t> params) {
+ std::vector<float> scaled_params(params.size());
+ std::transform(params.begin(), params.end(), scaled_params.begin(),
+ [](int8_t x) -> float {
+ return ::rnnoise::kWeightsScale * static_cast<float>(x);
+ });
+ return scaled_params;
+}
+
+// TODO(bugs.chromium.org/10480): Hard-code optimized layout and remove this
+// function to improve setup time.
+// Casts and scales |weights| and re-arranges the layout.
+std::vector<float> PreprocessWeights(rtc::ArrayView<const int8_t> weights,
+ int output_size) {
+ if (output_size == 1) {
+ return GetScaledParams(weights);
+ }
+ // Transpose, scale and cast.
+ const int input_size = rtc::CheckedDivExact(
+ rtc::dchecked_cast<int>(weights.size()), output_size);
+ std::vector<float> w(weights.size());
+ for (int o = 0; o < output_size; ++o) {
+ for (int i = 0; i < input_size; ++i) {
+ w[o * input_size + i] = rnnoise::kWeightsScale *
+ static_cast<float>(weights[i * output_size + o]);
+ }
+ }
+ return w;
+}
+
+rtc::FunctionView<float(float)> GetActivationFunction(
+ ActivationFunction activation_function) {
+ switch (activation_function) {
+ case ActivationFunction::kTansigApproximated:
+ return ::rnnoise::TansigApproximated;
+ break;
+ case ActivationFunction::kSigmoidApproximated:
+ return ::rnnoise::SigmoidApproximated;
+ break;
+ }
+}
+
+} // namespace
+
+FullyConnectedLayer::FullyConnectedLayer(
+ const int input_size,
+ const int output_size,
+ const rtc::ArrayView<const int8_t> bias,
+ const rtc::ArrayView<const int8_t> weights,
+ ActivationFunction activation_function,
+ const AvailableCpuFeatures& cpu_features,
+ absl::string_view layer_name)
+ : input_size_(input_size),
+ output_size_(output_size),
+ bias_(GetScaledParams(bias)),
+ weights_(PreprocessWeights(weights, output_size)),
+ cpu_features_(cpu_features),
+ activation_function_(GetActivationFunction(activation_function)) {
+ RTC_DCHECK_LE(output_size_, kFullyConnectedLayerMaxUnits)
+ << "Insufficient FC layer over-allocation (" << layer_name << ").";
+ RTC_DCHECK_EQ(output_size_, bias_.size())
+ << "Mismatching output size and bias terms array size (" << layer_name
+ << ").";
+ RTC_DCHECK_EQ(input_size_ * output_size_, weights_.size())
+ << "Mismatching input-output size and weight coefficients array size ("
+ << layer_name << ").";
+}
+
+FullyConnectedLayer::~FullyConnectedLayer() = default;
+
+void FullyConnectedLayer::ComputeOutput(rtc::ArrayView<const float> input) {
+ RTC_DCHECK_EQ(input.size(), input_size_);
+#if defined(WEBRTC_ARCH_X86_FAMILY)
+ // TODO(bugs.chromium.org/10480): Add AVX2.
+ if (cpu_features_.sse2) {
+ ComputeOutputSse2(input);
+ return;
+ }
+#endif
+ // TODO(bugs.chromium.org/10480): Add Neon.
+
+ // Un-optimized implementation.
+ for (int o = 0; o < output_size_; ++o) {
+ output_[o] = bias_[o];
+ // TODO(bugs.chromium.org/9076): Benchmark how different layouts for
+ // |weights_| change the performance across different platforms.
+ for (int i = 0; i < input_size_; ++i) {
+ output_[o] += input[i] * weights_[o * input_size_ + i];
+ }
+ output_[o] = activation_function_(output_[o]);
+ }
+}
+
+#if defined(WEBRTC_ARCH_X86_FAMILY)
+void FullyConnectedLayer::ComputeOutputSse2(rtc::ArrayView<const float> input) {
+ const int input_size_by_4 = input_size_ >> 2;
+ const int offset = input_size_ & ~3;
+ // TODO(bugs.chromium.org/10480): Check if reinterpret_cast below is ok.
+ __m128 sum_wx_128;
+ const float* v = reinterpret_cast<const float*>(&sum_wx_128);
+ for (int o = 0; o < output_size_; ++o) {
+ // Perform 128 bit vector operations.
+ sum_wx_128 = _mm_set1_ps(0);
+ const float* x_p = input.data();
+ const float* w_p = weights_.data() + o * input.size();
+ for (int i = 0; i < input_size_by_4; ++i, x_p += 4, w_p += 4) {
+ sum_wx_128 = _mm_add_ps(sum_wx_128,
+ _mm_mul_ps(_mm_loadu_ps(x_p), _mm_loadu_ps(w_p)));
+ }
+ // Perform non-vector operations for any remaining items, sum up bias term
+ // and results from the vectorized code, and apply the activation function.
+ output_[o] = activation_function_(
+ std::inner_product(input.begin() + offset, input.end(),
+ weights_.begin() + o * input.size() + offset,
+ bias_[o] + v[0] + v[1] + v[2] + v[3]));
+ }
+}
+#endif // defined(WEBRTC_ARCH_X86_FAMILY)
+
+} // namespace rnn_vad
+} // namespace webrtc
diff --git a/modules/audio_processing/agc2/rnn_vad/rnn_fc.h b/modules/audio_processing/agc2/rnn_vad/rnn_fc.h
new file mode 100644
index 0000000..d05d95c
--- /dev/null
+++ b/modules/audio_processing/agc2/rnn_vad/rnn_fc.h
@@ -0,0 +1,76 @@
+/*
+ * Copyright (c) 2020 The WebRTC project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#ifndef MODULES_AUDIO_PROCESSING_AGC2_RNN_VAD_RNN_FC_H_
+#define MODULES_AUDIO_PROCESSING_AGC2_RNN_VAD_RNN_FC_H_
+
+#include <array>
+#include <vector>
+
+#include "absl/strings/string_view.h"
+#include "api/array_view.h"
+#include "api/function_view.h"
+#include "modules/audio_processing/agc2/cpu_features.h"
+#include "rtc_base/system/arch.h"
+
+namespace webrtc {
+namespace rnn_vad {
+
+// Activation function for a neural network cell.
+enum class ActivationFunction { kTansigApproximated, kSigmoidApproximated };
+
+// Maximum number of units for an FC layer.
+constexpr int kFullyConnectedLayerMaxUnits = 24;
+
+// Fully-connected layer with a custom activation function which owns the output
+// buffer.
+class FullyConnectedLayer {
+ public:
+ // Ctor. `output_size` cannot be greater than `kFullyConnectedLayerMaxUnits`.
+ FullyConnectedLayer(int input_size,
+ int output_size,
+ rtc::ArrayView<const int8_t> bias,
+ rtc::ArrayView<const int8_t> weights,
+ ActivationFunction activation_function,
+ const AvailableCpuFeatures& cpu_features,
+ absl::string_view layer_name);
+ FullyConnectedLayer(const FullyConnectedLayer&) = delete;
+ FullyConnectedLayer& operator=(const FullyConnectedLayer&) = delete;
+ ~FullyConnectedLayer();
+
+ // Returns the size of the input vector.
+ int input_size() const { return input_size_; }
+ // Returns the pointer to the first element of the output buffer.
+ const float* data() const { return output_.data(); }
+ // Returns the size of the output buffer.
+ int size() const { return output_size_; }
+
+ // Computes the fully-connected layer output.
+ void ComputeOutput(rtc::ArrayView<const float> input);
+
+ private:
+#if defined(WEBRTC_ARCH_X86_FAMILY)
+ void ComputeOutputSse2(rtc::ArrayView<const float> input);
+#endif
+
+ const int input_size_;
+ const int output_size_;
+ const std::vector<float> bias_;
+ const std::vector<float> weights_;
+ const AvailableCpuFeatures cpu_features_;
+ rtc::FunctionView<float(float)> activation_function_;
+ // Over-allocated array with size equal to `output_size_`.
+ std::array<float, kFullyConnectedLayerMaxUnits> output_;
+};
+
+} // namespace rnn_vad
+} // namespace webrtc
+
+#endif // MODULES_AUDIO_PROCESSING_AGC2_RNN_VAD_RNN_FC_H_
diff --git a/modules/audio_processing/agc2/rnn_vad/rnn_fc_unittest.cc b/modules/audio_processing/agc2/rnn_vad/rnn_fc_unittest.cc
new file mode 100644
index 0000000..1094832
--- /dev/null
+++ b/modules/audio_processing/agc2/rnn_vad/rnn_fc_unittest.cc
@@ -0,0 +1,109 @@
+/*
+ * Copyright (c) 2020 The WebRTC project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include "modules/audio_processing/agc2/rnn_vad/rnn_fc.h"
+
+#include <array>
+#include <vector>
+
+#include "api/array_view.h"
+#include "modules/audio_processing/agc2/cpu_features.h"
+#include "modules/audio_processing/agc2/rnn_vad/test_utils.h"
+#include "modules/audio_processing/test/performance_timer.h"
+#include "rtc_base/logging.h"
+#include "rtc_base/system/arch.h"
+#include "test/gtest.h"
+#include "third_party/rnnoise/src/rnn_vad_weights.h"
+
+namespace webrtc {
+namespace rnn_vad {
+namespace test {
+namespace {
+
+using ::rnnoise::kInputDenseBias;
+using ::rnnoise::kInputDenseWeights;
+using ::rnnoise::kInputLayerInputSize;
+using ::rnnoise::kInputLayerOutputSize;
+
+// Fully connected layer test data.
+constexpr std::array<float, 42> kFullyConnectedInputVector = {
+ -1.00131f, -0.627069f, -7.81097f, 7.86285f, -2.87145f, 3.32365f,
+ -0.653161f, 0.529839f, -0.425307f, 0.25583f, 0.235094f, 0.230527f,
+ -0.144687f, 0.182785f, 0.57102f, 0.125039f, 0.479482f, -0.0255439f,
+ -0.0073141f, -0.147346f, -0.217106f, -0.0846906f, -8.34943f, 3.09065f,
+ 1.42628f, -0.85235f, -0.220207f, -0.811163f, 2.09032f, -2.01425f,
+ -0.690268f, -0.925327f, -0.541354f, 0.58455f, -0.606726f, -0.0372358f,
+ 0.565991f, 0.435854f, 0.420812f, 0.162198f, -2.13f, 10.0089f};
+constexpr std::array<float, 24> kFullyConnectedExpectedOutput = {
+ -0.623293f, -0.988299f, 0.999378f, 0.967168f, 0.103087f, -0.978545f,
+ -0.856347f, 0.346675f, 1.f, -0.717442f, -0.544176f, 0.960363f,
+ 0.983443f, 0.999991f, -0.824335f, 0.984742f, 0.990208f, 0.938179f,
+ 0.875092f, 0.999846f, 0.997707f, -0.999382f, 0.973153f, -0.966605f};
+
+class RnnParametrization
+ : public ::testing::TestWithParam<AvailableCpuFeatures> {};
+
+// Checks that the output of a fully connected layer is within tolerance given
+// test input data.
+TEST_P(RnnParametrization, CheckFullyConnectedLayerOutput) {
+ FullyConnectedLayer fc(kInputLayerInputSize, kInputLayerOutputSize,
+ kInputDenseBias, kInputDenseWeights,
+ ActivationFunction::kTansigApproximated,
+ /*cpu_features=*/GetParam(),
+ /*layer_name=*/"FC");
+ fc.ComputeOutput(kFullyConnectedInputVector);
+ ExpectNearAbsolute(kFullyConnectedExpectedOutput, fc, 1e-5f);
+}
+
+TEST_P(RnnParametrization, DISABLED_BenchmarkFullyConnectedLayer) {
+ const AvailableCpuFeatures cpu_features = GetParam();
+ FullyConnectedLayer fc(kInputLayerInputSize, kInputLayerOutputSize,
+ kInputDenseBias, kInputDenseWeights,
+ ActivationFunction::kTansigApproximated, cpu_features,
+ /*layer_name=*/"FC");
+
+ constexpr int kNumTests = 10000;
+ ::webrtc::test::PerformanceTimer perf_timer(kNumTests);
+ for (int k = 0; k < kNumTests; ++k) {
+ perf_timer.StartTimer();
+ fc.ComputeOutput(kFullyConnectedInputVector);
+ perf_timer.StopTimer();
+ }
+ RTC_LOG(LS_INFO) << "CPU features: " << cpu_features.ToString() << " | "
+ << (perf_timer.GetDurationAverage() / 1000) << " +/- "
+ << (perf_timer.GetDurationStandardDeviation() / 1000)
+ << " ms";
+}
+
+// Finds the relevant CPU features combinations to test.
+std::vector<AvailableCpuFeatures> GetCpuFeaturesToTest() {
+ std::vector<AvailableCpuFeatures> v;
+ v.push_back({/*sse2=*/false, /*avx2=*/false, /*neon=*/false});
+ AvailableCpuFeatures available = GetAvailableCpuFeatures();
+ if (available.sse2) {
+ AvailableCpuFeatures features(
+ {/*sse2=*/true, /*avx2=*/false, /*neon=*/false});
+ v.push_back(features);
+ }
+ return v;
+}
+
+INSTANTIATE_TEST_SUITE_P(
+ RnnVadTest,
+ RnnParametrization,
+ ::testing::ValuesIn(GetCpuFeaturesToTest()),
+ [](const ::testing::TestParamInfo<AvailableCpuFeatures>& info) {
+ return info.param.ToString();
+ });
+
+} // namespace
+} // namespace test
+} // namespace rnn_vad
+} // namespace webrtc
diff --git a/modules/audio_processing/agc2/rnn_vad/rnn_unittest.cc b/modules/audio_processing/agc2/rnn_vad/rnn_unittest.cc
index 19e0afd..4f42d11 100644
--- a/modules/audio_processing/agc2/rnn_vad/rnn_unittest.cc
+++ b/modules/audio_processing/agc2/rnn_vad/rnn_unittest.cc
@@ -20,9 +20,7 @@
#include "rtc_base/checks.h"
#include "rtc_base/logging.h"
#include "rtc_base/numerics/safe_conversions.h"
-#include "rtc_base/system/arch.h"
#include "test/gtest.h"
-#include "third_party/rnnoise/src/rnn_activations.h"
#include "third_party/rnnoise/src/rnn_vad_weights.h"
namespace webrtc {
@@ -67,21 +65,6 @@
}
}
-// Fully connected layer test data.
-constexpr std::array<float, 42> kFullyConnectedInputVector = {
- -1.00131f, -0.627069f, -7.81097f, 7.86285f, -2.87145f, 3.32365f,
- -0.653161f, 0.529839f, -0.425307f, 0.25583f, 0.235094f, 0.230527f,
- -0.144687f, 0.182785f, 0.57102f, 0.125039f, 0.479482f, -0.0255439f,
- -0.0073141f, -0.147346f, -0.217106f, -0.0846906f, -8.34943f, 3.09065f,
- 1.42628f, -0.85235f, -0.220207f, -0.811163f, 2.09032f, -2.01425f,
- -0.690268f, -0.925327f, -0.541354f, 0.58455f, -0.606726f, -0.0372358f,
- 0.565991f, 0.435854f, 0.420812f, 0.162198f, -2.13f, 10.0089f};
-constexpr std::array<float, 24> kFullyConnectedExpectedOutput = {
- -0.623293f, -0.988299f, 0.999378f, 0.967168f, 0.103087f, -0.978545f,
- -0.856347f, 0.346675f, 1.f, -0.717442f, -0.544176f, 0.960363f,
- 0.983443f, 0.999991f, -0.824335f, 0.984742f, 0.990208f, 0.938179f,
- 0.875092f, 0.999846f, 0.997707f, -0.999382f, 0.973153f, -0.966605f};
-
// Gated recurrent units layer test data.
constexpr int kGruInputSize = 5;
constexpr int kGruOutputSize = 4;
@@ -170,61 +153,6 @@
<< " ms";
}
-class RnnParametrization
- : public ::testing::TestWithParam<AvailableCpuFeatures> {};
-
-// Checks that the output of a fully connected layer is within tolerance given
-// test input data.
-TEST_P(RnnParametrization, CheckFullyConnectedLayerOutput) {
- FullyConnectedLayer fc(
- rnnoise::kInputLayerInputSize, rnnoise::kInputLayerOutputSize,
- rnnoise::kInputDenseBias, rnnoise::kInputDenseWeights,
- rnnoise::TansigApproximated, /*cpu_features=*/GetParam());
- fc.ComputeOutput(kFullyConnectedInputVector);
- ExpectNearAbsolute(kFullyConnectedExpectedOutput, fc, 1e-5f);
-}
-
-TEST_P(RnnParametrization, DISABLED_BenchmarkFullyConnectedLayer) {
- const AvailableCpuFeatures cpu_features = GetParam();
- FullyConnectedLayer fc(rnnoise::kInputLayerInputSize,
- rnnoise::kInputLayerOutputSize,
- rnnoise::kInputDenseBias, rnnoise::kInputDenseWeights,
- rnnoise::TansigApproximated, cpu_features);
-
- constexpr int kNumTests = 10000;
- ::webrtc::test::PerformanceTimer perf_timer(kNumTests);
- for (int k = 0; k < kNumTests; ++k) {
- perf_timer.StartTimer();
- fc.ComputeOutput(kFullyConnectedInputVector);
- perf_timer.StopTimer();
- }
- RTC_LOG(LS_INFO) << "CPU features: " << cpu_features.ToString() << " | "
- << (perf_timer.GetDurationAverage() / 1000) << " +/- "
- << (perf_timer.GetDurationStandardDeviation() / 1000)
- << " ms";
-}
-
-// Finds the relevant CPU features combinations to test.
-std::vector<AvailableCpuFeatures> GetCpuFeaturesToTest() {
- std::vector<AvailableCpuFeatures> v;
- v.push_back({/*sse2=*/false, /*avx2=*/false, /*neon=*/false});
- AvailableCpuFeatures available = GetAvailableCpuFeatures();
- if (available.sse2) {
- AvailableCpuFeatures features(
- {/*sse2=*/true, /*avx2=*/false, /*neon=*/false});
- v.push_back(features);
- }
- return v;
-}
-
-INSTANTIATE_TEST_SUITE_P(
- RnnVadTest,
- RnnParametrization,
- ::testing::ValuesIn(GetCpuFeaturesToTest()),
- [](const ::testing::TestParamInfo<AvailableCpuFeatures>& info) {
- return info.param.ToString();
- });
-
// Checks that the speech probability is zero with silence.
TEST(RnnVadTest, CheckZeroProbabilityWithSilence) {
RnnVad rnn_vad(GetAvailableCpuFeatures());