Alessio Bazzica | 0bd0a3f | 2018-05-08 09:10:45 | [diff] [blame] | 1 | /* |
| 2 | * Copyright (c) 2018 The WebRTC project authors. All Rights Reserved. |
| 3 | * |
| 4 | * Use of this source code is governed by a BSD-style license |
| 5 | * that can be found in the LICENSE file in the root of the source |
| 6 | * tree. An additional intellectual property rights grant can be found |
| 7 | * in the file PATENTS. All contributing project authors may |
| 8 | * be found in the AUTHORS file in the root of the source tree. |
| 9 | */ |
| 10 | |
| 11 | #include "modules/audio_processing/agc2/rnn_vad/spectral_features_internal.h" |
| 12 | |
Alessio Bazzica | 4a53766 | 2019-04-10 07:36:21 | [diff] [blame] | 13 | #include <algorithm> |
| 14 | #include <array> |
| 15 | #include <complex> |
| 16 | #include <numeric> |
| 17 | #include <vector> |
| 18 | |
| 19 | #include "api/array_view.h" |
Alessio Bazzica | 0bd0a3f | 2018-05-08 09:10:45 | [diff] [blame] | 20 | #include "modules/audio_processing/agc2/rnn_vad/test_utils.h" |
Alessio Bazzica | 4d4cce8 | 2019-04-10 09:11:09 | [diff] [blame] | 21 | #include "modules/audio_processing/utility/pffft_wrapper.h" |
Alessio Bazzica | f622ba7 | 2020-10-29 19:50:13 | [diff] [blame] | 22 | #include "rtc_base/numerics/safe_compare.h" |
Alessio Bazzica | 0bd0a3f | 2018-05-08 09:10:45 | [diff] [blame] | 23 | // TODO(bugs.webrtc.org/8948): Add when the issue is fixed. |
| 24 | // #include "test/fpe_observer.h" |
| 25 | #include "test/gtest.h" |
| 26 | |
| 27 | namespace webrtc { |
| 28 | namespace rnn_vad { |
Alessio Bazzica | 0bd0a3f | 2018-05-08 09:10:45 | [diff] [blame] | 29 | namespace { |
| 30 | |
Artem Titov | 0b48930 | 2021-07-28 18:50:03 | [diff] [blame] | 31 | // Generates the values for the array named `kOpusBandWeights24kHz20ms` in the |
Alessio Bazzica | 4a53766 | 2019-04-10 07:36:21 | [diff] [blame] | 32 | // anonymous namespace of the .cc file, which is the array of FFT coefficient |
| 33 | // weights for the Opus scale triangular filters. |
| 34 | std::vector<float> ComputeTriangularFiltersWeights() { |
| 35 | constexpr auto kOpusScaleNumBins24kHz20ms = GetOpusScaleNumBins24kHz20ms(); |
| 36 | const auto& v = kOpusScaleNumBins24kHz20ms; // Alias. |
Alessio Bazzica | f622ba7 | 2020-10-29 19:50:13 | [diff] [blame] | 37 | const int num_weights = std::accumulate(kOpusScaleNumBins24kHz20ms.begin(), |
| 38 | kOpusScaleNumBins24kHz20ms.end(), 0); |
Alessio Bazzica | 4a53766 | 2019-04-10 07:36:21 | [diff] [blame] | 39 | std::vector<float> weights(num_weights); |
Alessio Bazzica | f622ba7 | 2020-10-29 19:50:13 | [diff] [blame] | 40 | int next_fft_coeff_index = 0; |
| 41 | for (int band = 0; rtc::SafeLt(band, v.size()); ++band) { |
| 42 | const int band_size = v[band]; |
| 43 | for (int j = 0; rtc::SafeLt(j, band_size); ++j) { |
Alessio Bazzica | 4a53766 | 2019-04-10 07:36:21 | [diff] [blame] | 44 | weights[next_fft_coeff_index + j] = static_cast<float>(j) / band_size; |
| 45 | } |
| 46 | next_fft_coeff_index += band_size; |
| 47 | } |
| 48 | return weights; |
| 49 | } |
Alessio Bazzica | 0bd0a3f | 2018-05-08 09:10:45 | [diff] [blame] | 50 | |
Alessio Bazzica | 4a53766 | 2019-04-10 07:36:21 | [diff] [blame] | 51 | // Checks that the values returned by GetOpusScaleNumBins24kHz20ms() match the |
| 52 | // Opus scale frequency boundaries. |
| 53 | TEST(RnnVadTest, TestOpusScaleBoundaries) { |
| 54 | constexpr int kBandFrequencyBoundariesHz[kNumBands - 1] = { |
| 55 | 200, 400, 600, 800, 1000, 1200, 1400, 1600, 2000, 2400, 2800, |
| 56 | 3200, 4000, 4800, 5600, 6800, 8000, 9600, 12000, 15600, 20000}; |
| 57 | constexpr auto kOpusScaleNumBins24kHz20ms = GetOpusScaleNumBins24kHz20ms(); |
| 58 | int prev = 0; |
Alessio Bazzica | f622ba7 | 2020-10-29 19:50:13 | [diff] [blame] | 59 | for (int i = 0; rtc::SafeLt(i, kOpusScaleNumBins24kHz20ms.size()); ++i) { |
Alessio Bazzica | 4a53766 | 2019-04-10 07:36:21 | [diff] [blame] | 60 | int boundary = |
| 61 | kBandFrequencyBoundariesHz[i] * kFrameSize20ms24kHz / kSampleRate24kHz; |
| 62 | EXPECT_EQ(kOpusScaleNumBins24kHz20ms[i], boundary - prev); |
| 63 | prev = boundary; |
Alessio Bazzica | 0bd0a3f | 2018-05-08 09:10:45 | [diff] [blame] | 64 | } |
| 65 | } |
| 66 | |
Alessio Bazzica | 4a53766 | 2019-04-10 07:36:21 | [diff] [blame] | 67 | // Checks that the computed triangular filters weights for the Opus scale are |
| 68 | // monotonic withing each Opus band. This test should only be enabled when |
Artem Titov | 0b48930 | 2021-07-28 18:50:03 | [diff] [blame] | 69 | // ComputeTriangularFiltersWeights() is changed and `kOpusBandWeights24kHz20ms` |
Alessio Bazzica | 4a53766 | 2019-04-10 07:36:21 | [diff] [blame] | 70 | // is updated accordingly. |
| 71 | TEST(RnnVadTest, DISABLED_TestOpusScaleWeights) { |
| 72 | auto weights = ComputeTriangularFiltersWeights(); |
Alessio Bazzica | f622ba7 | 2020-10-29 19:50:13 | [diff] [blame] | 73 | int i = 0; |
| 74 | for (int band_size : GetOpusScaleNumBins24kHz20ms()) { |
Alessio Bazzica | 4a53766 | 2019-04-10 07:36:21 | [diff] [blame] | 75 | SCOPED_TRACE(band_size); |
| 76 | rtc::ArrayView<float> band_weights(weights.data() + i, band_size); |
| 77 | float prev = -1.f; |
| 78 | for (float weight : band_weights) { |
| 79 | EXPECT_LT(prev, weight); |
| 80 | prev = weight; |
| 81 | } |
| 82 | i += band_size; |
| 83 | } |
| 84 | } |
| 85 | |
Alessio Bazzica | c0c7d36 | 2019-04-23 14:34:22 | [diff] [blame] | 86 | // Checks that the computed band-wise auto-correlation is non-negative for a |
| 87 | // simple input vector of FFT coefficients. |
Alessio Bazzica | 4a53766 | 2019-04-10 07:36:21 | [diff] [blame] | 88 | TEST(RnnVadTest, SpectralCorrelatorValidOutput) { |
Alessio Bazzica | c0c7d36 | 2019-04-23 14:34:22 | [diff] [blame] | 89 | // Input: vector of (1, 1j) values. |
Alessio Bazzica | 4d4cce8 | 2019-04-10 09:11:09 | [diff] [blame] | 90 | Pffft fft(kFrameSize20ms24kHz, Pffft::FftType::kReal); |
| 91 | auto in = fft.CreateBuffer(); |
Alessio Bazzica | 4a53766 | 2019-04-10 07:36:21 | [diff] [blame] | 92 | std::array<float, kOpusBands24kHz> out; |
Alessio Bazzica | 4d4cce8 | 2019-04-10 09:11:09 | [diff] [blame] | 93 | auto in_view = in->GetView(); |
| 94 | std::fill(in_view.begin(), in_view.end(), 1.f); |
| 95 | in_view[1] = 0.f; // Nyquist frequency. |
Alessio Bazzica | c0c7d36 | 2019-04-23 14:34:22 | [diff] [blame] | 96 | // Compute and check output. |
| 97 | SpectralCorrelator e; |
Alessio Bazzica | 4d4cce8 | 2019-04-10 09:11:09 | [diff] [blame] | 98 | e.ComputeAutoCorrelation(in_view, out); |
Alessio Bazzica | f622ba7 | 2020-10-29 19:50:13 | [diff] [blame] | 99 | for (int i = 0; i < kOpusBands24kHz; ++i) { |
Alessio Bazzica | 4a53766 | 2019-04-10 07:36:21 | [diff] [blame] | 100 | SCOPED_TRACE(i); |
| 101 | EXPECT_GT(out[i], 0.f); |
| 102 | } |
| 103 | } |
| 104 | |
Alessio Bazzica | c0c7d36 | 2019-04-23 14:34:22 | [diff] [blame] | 105 | // Checks that the computed smoothed log magnitude spectrum is within tolerance |
| 106 | // given hard-coded test input data. |
Alessio Bazzica | 4a53766 | 2019-04-10 07:36:21 | [diff] [blame] | 107 | TEST(RnnVadTest, ComputeSmoothedLogMagnitudeSpectrumWithinTolerance) { |
Alessio Bazzica | 0bd0a3f | 2018-05-08 09:10:45 | [diff] [blame] | 108 | constexpr std::array<float, kNumBands> input = { |
| 109 | {86.060539245605f, 275.668334960938f, 43.406528472900f, 6.541896820068f, |
| 110 | 17.964015960693f, 8.090919494629f, 1.261920094490f, 1.212702631950f, |
| 111 | 1.619154453278f, 0.508935272694f, 0.346316039562f, 0.237035423517f, |
| 112 | 0.172424271703f, 0.271657168865f, 0.126088857651f, 0.139967113733f, |
| 113 | 0.207200810313f, 0.155893072486f, 0.091090843081f, 0.033391401172f, |
| 114 | 0.013879744336f, 0.011973354965f}}; |
| 115 | constexpr std::array<float, kNumBands> expected_output = { |
| 116 | {1.934854507446f, 2.440402746201f, 1.637655138969f, 0.816367030144f, |
| 117 | 1.254645109177f, 0.908534288406f, 0.104459829628f, 0.087320849299f, |
| 118 | 0.211962252855f, -0.284886807203f, -0.448164641857f, -0.607240796089f, |
| 119 | -0.738917350769f, -0.550279200077f, -0.866177439690f, -0.824003994465f, |
| 120 | -0.663138568401f, -0.780171751976f, -0.995288193226f, -1.362596273422f, |
| 121 | -1.621970295906f, -1.658103585243f}}; |
| 122 | std::array<float, kNumBands> computed_output; |
| 123 | { |
| 124 | // TODO(bugs.webrtc.org/8948): Add when the issue is fixed. |
| 125 | // FloatingPointExceptionObserver fpe_observer; |
Alessio Bazzica | 4a53766 | 2019-04-10 07:36:21 | [diff] [blame] | 126 | ComputeSmoothedLogMagnitudeSpectrum(input, computed_output); |
Alessio Bazzica | 0bd0a3f | 2018-05-08 09:10:45 | [diff] [blame] | 127 | ExpectNearAbsolute(expected_output, computed_output, 1e-5f); |
| 128 | } |
| 129 | } |
| 130 | |
Alessio Bazzica | c0c7d36 | 2019-04-23 14:34:22 | [diff] [blame] | 131 | // Checks that the computed DCT is within tolerance given hard-coded test input |
| 132 | // data. |
| 133 | TEST(RnnVadTest, ComputeDctWithinTolerance) { |
Alessio Bazzica | 0bd0a3f | 2018-05-08 09:10:45 | [diff] [blame] | 134 | constexpr std::array<float, kNumBands> input = { |
| 135 | {0.232155621052f, 0.678957760334f, 0.220818966627f, -0.077363930643f, |
| 136 | -0.559227049351f, 0.432545185089f, 0.353900641203f, 0.398993015289f, |
| 137 | 0.409774333239f, 0.454977899790f, 0.300520688295f, -0.010286616161f, |
| 138 | 0.272525429726f, 0.098067551851f, 0.083649002016f, 0.046226885170f, |
| 139 | -0.033228103071f, 0.144773483276f, -0.117661058903f, -0.005628800020f, |
| 140 | -0.009547689930f, -0.045382082462f}}; |
| 141 | constexpr std::array<float, kNumBands> expected_output = { |
| 142 | {0.697072803974f, 0.442710995674f, -0.293156713247f, -0.060711503029f, |
| 143 | 0.292050391436f, 0.489301353693f, 0.402255415916f, 0.134404733777f, |
| 144 | -0.086305990815f, -0.199605688453f, -0.234511867166f, -0.413774639368f, |
| 145 | -0.388507157564f, -0.032798115164f, 0.044605545700f, 0.112466648221f, |
| 146 | -0.050096966326f, 0.045971218497f, -0.029815061018f, -0.410366982222f, |
| 147 | -0.209233760834f, -0.128037497401f}}; |
Alessio Bazzica | 2f1e6d4 | 2018-05-15 13:52:38 | [diff] [blame] | 148 | auto dct_table = ComputeDctTable(); |
Alessio Bazzica | 0bd0a3f | 2018-05-08 09:10:45 | [diff] [blame] | 149 | std::array<float, kNumBands> computed_output; |
| 150 | { |
| 151 | // TODO(bugs.webrtc.org/8948): Add when the issue is fixed. |
| 152 | // FloatingPointExceptionObserver fpe_observer; |
Alessio Bazzica | 2f1e6d4 | 2018-05-15 13:52:38 | [diff] [blame] | 153 | ComputeDct(input, dct_table, computed_output); |
Alessio Bazzica | 0bd0a3f | 2018-05-08 09:10:45 | [diff] [blame] | 154 | ExpectNearAbsolute(expected_output, computed_output, 1e-5f); |
| 155 | } |
| 156 | } |
| 157 | |
Alessio Bazzica | bb1a28d | 2020-12-07 16:02:22 | [diff] [blame] | 158 | } // namespace |
Alessio Bazzica | 0bd0a3f | 2018-05-08 09:10:45 | [diff] [blame] | 159 | } // namespace rnn_vad |
| 160 | } // namespace webrtc |