peah | 5585001 | 2016-03-20 01:01:09 | [diff] [blame] | 1 | /* |
| 2 | * Copyright (c) 2016 The WebRTC project authors. All Rights Reserved. |
| 3 | * |
| 4 | * Use of this source code is governed by a BSD-style license |
| 5 | * that can be found in the LICENSE file in the root of the source |
| 6 | * tree. An additional intellectual property rights grant can be found |
| 7 | * in the file PATENTS. All contributing project authors may |
| 8 | * be found in the AUTHORS file in the root of the source tree. |
| 9 | */ |
| 10 | #include <vector> |
| 11 | |
Mirko Bonadei | 92ea95e | 2017-09-15 04:47:31 | [diff] [blame^] | 12 | #include "api/array_view.h" |
| 13 | #include "modules/audio_processing/audio_buffer.h" |
| 14 | #include "modules/audio_processing/noise_suppression_impl.h" |
| 15 | #include "modules/audio_processing/test/audio_buffer_tools.h" |
| 16 | #include "modules/audio_processing/test/bitexactness_tools.h" |
| 17 | #include "test/gtest.h" |
peah | 5585001 | 2016-03-20 01:01:09 | [diff] [blame] | 18 | |
| 19 | namespace webrtc { |
| 20 | namespace { |
| 21 | |
| 22 | const int kNumFramesToProcess = 1000; |
| 23 | |
| 24 | // Process one frame of data and produce the output. |
| 25 | void ProcessOneFrame(int sample_rate_hz, |
| 26 | AudioBuffer* capture_buffer, |
| 27 | NoiseSuppressionImpl* noise_suppressor) { |
| 28 | if (sample_rate_hz > AudioProcessing::kSampleRate16kHz) { |
| 29 | capture_buffer->SplitIntoFrequencyBands(); |
| 30 | } |
| 31 | |
| 32 | noise_suppressor->AnalyzeCaptureAudio(capture_buffer); |
| 33 | noise_suppressor->ProcessCaptureAudio(capture_buffer); |
| 34 | |
| 35 | if (sample_rate_hz > AudioProcessing::kSampleRate16kHz) { |
| 36 | capture_buffer->MergeFrequencyBands(); |
| 37 | } |
| 38 | } |
| 39 | |
| 40 | // Processes a specified amount of frames, verifies the results and reports |
| 41 | // any errors. |
| 42 | void RunBitexactnessTest(int sample_rate_hz, |
| 43 | size_t num_channels, |
| 44 | NoiseSuppressionImpl::Level level, |
| 45 | float speech_probability_reference, |
| 46 | rtc::ArrayView<const float> noise_estimate_reference, |
| 47 | rtc::ArrayView<const float> output_reference) { |
| 48 | rtc::CriticalSection crit_capture; |
| 49 | NoiseSuppressionImpl noise_suppressor(&crit_capture); |
| 50 | noise_suppressor.Initialize(num_channels, sample_rate_hz); |
| 51 | noise_suppressor.Enable(true); |
| 52 | noise_suppressor.set_level(level); |
| 53 | |
| 54 | int samples_per_channel = rtc::CheckedDivExact(sample_rate_hz, 100); |
| 55 | const StreamConfig capture_config(sample_rate_hz, num_channels, false); |
| 56 | AudioBuffer capture_buffer( |
| 57 | capture_config.num_frames(), capture_config.num_channels(), |
| 58 | capture_config.num_frames(), capture_config.num_channels(), |
| 59 | capture_config.num_frames()); |
| 60 | test::InputAudioFile capture_file( |
| 61 | test::GetApmCaptureTestVectorFileName(sample_rate_hz)); |
| 62 | std::vector<float> capture_input(samples_per_channel * num_channels); |
| 63 | for (size_t frame_no = 0; frame_no < kNumFramesToProcess; ++frame_no) { |
| 64 | ReadFloatSamplesFromStereoFile(samples_per_channel, num_channels, |
| 65 | &capture_file, capture_input); |
| 66 | |
| 67 | test::CopyVectorToAudioBuffer(capture_config, capture_input, |
| 68 | &capture_buffer); |
| 69 | |
| 70 | ProcessOneFrame(sample_rate_hz, &capture_buffer, &noise_suppressor); |
| 71 | } |
| 72 | |
| 73 | // Extract test results. |
| 74 | std::vector<float> capture_output; |
| 75 | test::ExtractVectorFromAudioBuffer(capture_config, &capture_buffer, |
| 76 | &capture_output); |
| 77 | float speech_probability = noise_suppressor.speech_probability(); |
| 78 | std::vector<float> noise_estimate = noise_suppressor.NoiseEstimate(); |
| 79 | |
peah | 7ea928e | 2016-03-30 15:13:57 | [diff] [blame] | 80 | const float kVectorElementErrorBound = 1.0f / 32768.0f; |
peah | 5585001 | 2016-03-20 01:01:09 | [diff] [blame] | 81 | EXPECT_FLOAT_EQ(speech_probability_reference, speech_probability); |
peah | 7ea928e | 2016-03-30 15:13:57 | [diff] [blame] | 82 | EXPECT_TRUE(test::VerifyArray(noise_estimate_reference, noise_estimate, |
| 83 | kVectorElementErrorBound)); |
peah | 5585001 | 2016-03-20 01:01:09 | [diff] [blame] | 84 | |
| 85 | // Compare the output with the reference. Only the first values of the output |
| 86 | // from last frame processed are compared in order not having to specify all |
| 87 | // preceeding frames as testvectors. As the algorithm being tested has a |
| 88 | // memory, testing only the last frame implicitly also tests the preceeding |
| 89 | // frames. |
peah | 7ea928e | 2016-03-30 15:13:57 | [diff] [blame] | 90 | EXPECT_TRUE(test::VerifyDeinterleavedArray( |
peah | 5585001 | 2016-03-20 01:01:09 | [diff] [blame] | 91 | capture_config.num_frames(), capture_config.num_channels(), |
peah | 7ea928e | 2016-03-30 15:13:57 | [diff] [blame] | 92 | output_reference, capture_output, kVectorElementErrorBound)); |
peah | 5585001 | 2016-03-20 01:01:09 | [diff] [blame] | 93 | } |
| 94 | |
| 95 | } // namespace |
| 96 | |
aluebs | 853c840 | 2016-04-05 17:03:34 | [diff] [blame] | 97 | TEST(NoiseSuppresionBitExactnessTest, Mono8kHzLow) { |
peah | 5585001 | 2016-03-20 01:01:09 | [diff] [blame] | 98 | #if defined(WEBRTC_ARCH_ARM64) |
| 99 | const float kSpeechProbabilityReference = -4.0f; |
aluebs | 853c840 | 2016-04-05 17:03:34 | [diff] [blame] | 100 | const float kNoiseEstimateReference[] = |
| 101 | {1432.341431f, 3321.919922f, 7677.521973f}; |
peah | 5585001 | 2016-03-20 01:01:09 | [diff] [blame] | 102 | const float kOutputReference[] = {0.003510f, 0.004517f, 0.004669f}; |
| 103 | #elif defined(WEBRTC_ARCH_ARM) |
| 104 | const float kSpeechProbabilityReference = -4.0f; |
aluebs | 853c840 | 2016-04-05 17:03:34 | [diff] [blame] | 105 | const float kNoiseEstimateReference[] = |
| 106 | {1432.341431f, 3321.919922f, 7677.521973f}; |
peah | 5585001 | 2016-03-20 01:01:09 | [diff] [blame] | 107 | const float kOutputReference[] = {0.003510f, 0.004517f, 0.004669f}; |
| 108 | #else |
| 109 | const float kSpeechProbabilityReference = 0.73421317f; |
aluebs | 853c840 | 2016-04-05 17:03:34 | [diff] [blame] | 110 | const float kNoiseEstimateReference[] = |
| 111 | {1175.266113f, 3289.305908f, 7532.991211f}; |
peah | 5585001 | 2016-03-20 01:01:09 | [diff] [blame] | 112 | const float kOutputReference[] = {0.003263f, 0.004402f, 0.004537f}; |
| 113 | #endif |
| 114 | |
| 115 | RunBitexactnessTest(8000, 1, NoiseSuppression::Level::kLow, |
| 116 | kSpeechProbabilityReference, kNoiseEstimateReference, |
| 117 | kOutputReference); |
| 118 | } |
| 119 | |
aluebs | 853c840 | 2016-04-05 17:03:34 | [diff] [blame] | 120 | TEST(NoiseSuppresionBitExactnessTest, Mono16kHzLow) { |
peah | 5585001 | 2016-03-20 01:01:09 | [diff] [blame] | 121 | #if defined(WEBRTC_ARCH_ARM64) |
| 122 | const float kSpeechProbabilityReference = -4.0f; |
aluebs | 853c840 | 2016-04-05 17:03:34 | [diff] [blame] | 123 | const float kNoiseEstimateReference[] = |
| 124 | {2534.461914f, 6277.638672f, 14367.499023f}; |
peah | 5585001 | 2016-03-20 01:01:09 | [diff] [blame] | 125 | const float kOutputReference[] = {0.003449f, 0.004334f, 0.004303f}; |
| 126 | #elif defined(WEBRTC_ARCH_ARM) |
| 127 | const float kSpeechProbabilityReference = -4.0f; |
aluebs | 853c840 | 2016-04-05 17:03:34 | [diff] [blame] | 128 | const float kNoiseEstimateReference[] = |
| 129 | {2534.461914f, 6277.638672f, 14367.499023f}; |
peah | 5585001 | 2016-03-20 01:01:09 | [diff] [blame] | 130 | const float kOutputReference[] = {0.003449f, 0.004334f, 0.004303f}; |
| 131 | #else |
| 132 | const float kSpeechProbabilityReference = 0.71672988f; |
aluebs | 853c840 | 2016-04-05 17:03:34 | [diff] [blame] | 133 | const float kNoiseEstimateReference[] = |
| 134 | {2151.313965f, 6509.765137f, 15658.848633f}; |
peah | 5585001 | 2016-03-20 01:01:09 | [diff] [blame] | 135 | const float kOutputReference[] = {0.003574f, 0.004494f, 0.004499f}; |
| 136 | #endif |
| 137 | |
| 138 | RunBitexactnessTest(16000, 1, NoiseSuppression::Level::kLow, |
| 139 | kSpeechProbabilityReference, kNoiseEstimateReference, |
| 140 | kOutputReference); |
| 141 | } |
| 142 | |
aluebs | 853c840 | 2016-04-05 17:03:34 | [diff] [blame] | 143 | TEST(NoiseSuppresionBitExactnessTest, Mono32kHzLow) { |
peah | 5585001 | 2016-03-20 01:01:09 | [diff] [blame] | 144 | #if defined(WEBRTC_ARCH_ARM64) |
| 145 | const float kSpeechProbabilityReference = -4.0f; |
aluebs | 853c840 | 2016-04-05 17:03:34 | [diff] [blame] | 146 | const float kNoiseEstimateReference[] = |
| 147 | {2540.059082f, 6317.822754f, 14440.845703f}; |
peah | 5585001 | 2016-03-20 01:01:09 | [diff] [blame] | 148 | const float kOutputReference[] = {0.001679f, 0.002411f, 0.002594f}; |
| 149 | #elif defined(WEBRTC_ARCH_ARM) |
| 150 | const float kSpeechProbabilityReference = -4.0f; |
aluebs | 853c840 | 2016-04-05 17:03:34 | [diff] [blame] | 151 | const float kNoiseEstimateReference[] = |
| 152 | {2540.059082f, 6317.822754f, 14440.845703f}; |
peah | 5585001 | 2016-03-20 01:01:09 | [diff] [blame] | 153 | const float kOutputReference[] = {0.001679f, 0.002411f, 0.002594f}; |
| 154 | #else |
| 155 | const float kSpeechProbabilityReference = 0.67999554f; |
aluebs | 853c840 | 2016-04-05 17:03:34 | [diff] [blame] | 156 | const float kNoiseEstimateReference[] = |
| 157 | {2149.780518f, 7076.936035f, 14939.945312f}; |
peah | 5585001 | 2016-03-20 01:01:09 | [diff] [blame] | 158 | const float kOutputReference[] = {0.001221f, 0.001984f, 0.002228f}; |
| 159 | #endif |
| 160 | |
| 161 | RunBitexactnessTest(32000, 1, NoiseSuppression::Level::kLow, |
| 162 | kSpeechProbabilityReference, kNoiseEstimateReference, |
| 163 | kOutputReference); |
| 164 | } |
| 165 | |
aluebs | 853c840 | 2016-04-05 17:03:34 | [diff] [blame] | 166 | TEST(NoiseSuppresionBitExactnessTest, Mono48kHzLow) { |
peah | 5585001 | 2016-03-20 01:01:09 | [diff] [blame] | 167 | #if defined(WEBRTC_ARCH_ARM64) |
| 168 | const float kSpeechProbabilityReference = -4.0f; |
aluebs | 853c840 | 2016-04-05 17:03:34 | [diff] [blame] | 169 | const float kNoiseEstimateReference[] = |
| 170 | {2564.605713f, 6213.656250f, 13372.284180f}; |
peah | 5585001 | 2016-03-20 01:01:09 | [diff] [blame] | 171 | const float kOutputReference[] = {-0.013185f, -0.012769f, -0.012023f}; |
| 172 | #elif defined(WEBRTC_ARCH_ARM) |
| 173 | const float kSpeechProbabilityReference = -4.0f; |
aluebs | 853c840 | 2016-04-05 17:03:34 | [diff] [blame] | 174 | const float kNoiseEstimateReference[] = |
| 175 | {2564.605713f, 6213.656250f, 13372.284180f}; |
peah | 5585001 | 2016-03-20 01:01:09 | [diff] [blame] | 176 | const float kOutputReference[] = {-0.013185f, -0.012769f, -0.012023f}; |
| 177 | #else |
| 178 | const float kSpeechProbabilityReference = 0.70645678f; |
aluebs | 853c840 | 2016-04-05 17:03:34 | [diff] [blame] | 179 | const float kNoiseEstimateReference[] = |
| 180 | {2168.783203f, 6902.895508f, 13190.677734f}; |
peah | 5585001 | 2016-03-20 01:01:09 | [diff] [blame] | 181 | const float kOutputReference[] = {-0.013062f, -0.012657f, -0.011934f}; |
| 182 | #endif |
| 183 | |
| 184 | RunBitexactnessTest(48000, 1, NoiseSuppression::Level::kLow, |
| 185 | kSpeechProbabilityReference, kNoiseEstimateReference, |
| 186 | kOutputReference); |
| 187 | } |
| 188 | |
aluebs | 853c840 | 2016-04-05 17:03:34 | [diff] [blame] | 189 | TEST(NoiseSuppresionBitExactnessTest, Stereo16kHzLow) { |
peah | 5585001 | 2016-03-20 01:01:09 | [diff] [blame] | 190 | #if defined(WEBRTC_ARCH_ARM64) |
| 191 | const float kSpeechProbabilityReference = -4.0f; |
aluebs | 853c840 | 2016-04-05 17:03:34 | [diff] [blame] | 192 | const float kNoiseEstimateReference[] = |
| 193 | {9992.127930f, 12689.569336f, 11589.296875f}; |
peah | 5585001 | 2016-03-20 01:01:09 | [diff] [blame] | 194 | const float kOutputReference[] = {-0.011108f, -0.007904f, -0.012390f, |
| 195 | -0.002441f, 0.000855f, -0.003204f}; |
| 196 | #elif defined(WEBRTC_ARCH_ARM) |
| 197 | const float kSpeechProbabilityReference = -4.0f; |
aluebs | 853c840 | 2016-04-05 17:03:34 | [diff] [blame] | 198 | const float kNoiseEstimateReference[] = |
| 199 | {10321.353516f, 12133.852539f, 10923.060547f}; |
peah | 5585001 | 2016-03-20 01:01:09 | [diff] [blame] | 200 | const float kOutputReference[] = {-0.011108f, -0.007904f, -0.012390f, |
| 201 | -0.002472f, 0.000916f, -0.003235f}; |
| 202 | #else |
| 203 | const float kSpeechProbabilityReference = 0.67230678f; |
aluebs | 853c840 | 2016-04-05 17:03:34 | [diff] [blame] | 204 | const float kNoiseEstimateReference[] = |
| 205 | {9771.250000f, 11329.377930f, 10503.052734f}; |
peah | 5585001 | 2016-03-20 01:01:09 | [diff] [blame] | 206 | const float kOutputReference[] = {-0.011459f, -0.008110f, -0.012728f, |
| 207 | -0.002399f, 0.001018f, -0.003189f}; |
| 208 | #endif |
| 209 | |
| 210 | RunBitexactnessTest(16000, 2, NoiseSuppression::Level::kLow, |
| 211 | kSpeechProbabilityReference, kNoiseEstimateReference, |
| 212 | kOutputReference); |
| 213 | } |
| 214 | |
aluebs | 853c840 | 2016-04-05 17:03:34 | [diff] [blame] | 215 | TEST(NoiseSuppresionBitExactnessTest, Mono16kHzModerate) { |
peah | 5585001 | 2016-03-20 01:01:09 | [diff] [blame] | 216 | #if defined(WEBRTC_ARCH_ARM64) |
| 217 | const float kSpeechProbabilityReference = -4.0f; |
aluebs | 853c840 | 2016-04-05 17:03:34 | [diff] [blame] | 218 | const float kNoiseEstimateReference[] = |
| 219 | {2057.085938f, 7601.055176f, 19666.187500f}; |
peah | 5585001 | 2016-03-20 01:01:09 | [diff] [blame] | 220 | const float kOutputReference[] = {0.004669f, 0.005524f, 0.005432f}; |
| 221 | #elif defined(WEBRTC_ARCH_ARM) |
| 222 | const float kSpeechProbabilityReference = -4.0f; |
aluebs | 853c840 | 2016-04-05 17:03:34 | [diff] [blame] | 223 | const float kNoiseEstimateReference[] = |
| 224 | {2244.497803f, 6864.164062f, 16726.523438f}; |
peah | 5585001 | 2016-03-20 01:01:09 | [diff] [blame] | 225 | const float kOutputReference[] = {0.004669f, 0.005615f, 0.005585f}; |
| 226 | #else |
| 227 | const float kSpeechProbabilityReference = 0.70897013f; |
aluebs | 853c840 | 2016-04-05 17:03:34 | [diff] [blame] | 228 | const float kNoiseEstimateReference[] = |
| 229 | {2171.490723f, 6553.567871f, 15626.562500f}; |
peah | 5585001 | 2016-03-20 01:01:09 | [diff] [blame] | 230 | const float kOutputReference[] = {0.004513f, 0.005590f, 0.005614f}; |
| 231 | #endif |
| 232 | |
| 233 | RunBitexactnessTest(16000, 1, NoiseSuppression::Level::kModerate, |
| 234 | kSpeechProbabilityReference, kNoiseEstimateReference, |
| 235 | kOutputReference); |
| 236 | } |
| 237 | |
aluebs | 853c840 | 2016-04-05 17:03:34 | [diff] [blame] | 238 | TEST(NoiseSuppresionBitExactnessTest, Mono16kHzHigh) { |
peah | 5585001 | 2016-03-20 01:01:09 | [diff] [blame] | 239 | #if defined(WEBRTC_ARCH_ARM64) |
| 240 | const float kSpeechProbabilityReference = -4.0f; |
aluebs | 853c840 | 2016-04-05 17:03:34 | [diff] [blame] | 241 | const float kNoiseEstimateReference[] = |
| 242 | {2095.148193f, 7698.553711f, 19689.533203f}; |
peah | 5585001 | 2016-03-20 01:01:09 | [diff] [blame] | 243 | const float kOutputReference[] = {0.004639f, 0.005402f, 0.005310f}; |
| 244 | #elif defined(WEBRTC_ARCH_ARM) |
| 245 | const float kSpeechProbabilityReference = -4.0f; |
aluebs | 853c840 | 2016-04-05 17:03:34 | [diff] [blame] | 246 | const float kNoiseEstimateReference[] = |
| 247 | {2282.515625f, 6984.408203f, 16920.960938f}; |
peah | 5585001 | 2016-03-20 01:01:09 | [diff] [blame] | 248 | const float kOutputReference[] = {0.004547f, 0.005432f, 0.005402f}; |
| 249 | #else |
| 250 | const float kSpeechProbabilityReference = 0.70106733f; |
aluebs | 853c840 | 2016-04-05 17:03:34 | [diff] [blame] | 251 | const float kNoiseEstimateReference[] = |
| 252 | {2224.968506f, 6712.025879f, 15785.087891f}; |
peah | 5585001 | 2016-03-20 01:01:09 | [diff] [blame] | 253 | const float kOutputReference[] = {0.004394f, 0.005406f, 0.005416f}; |
| 254 | #endif |
| 255 | |
| 256 | RunBitexactnessTest(16000, 1, NoiseSuppression::Level::kHigh, |
| 257 | kSpeechProbabilityReference, kNoiseEstimateReference, |
| 258 | kOutputReference); |
| 259 | } |
| 260 | |
aluebs | 853c840 | 2016-04-05 17:03:34 | [diff] [blame] | 261 | TEST(NoiseSuppresionBitExactnessTest, Mono16kHzVeryHigh) { |
peah | 5585001 | 2016-03-20 01:01:09 | [diff] [blame] | 262 | #if defined(WEBRTC_ARCH_ARM64) |
| 263 | const float kSpeechProbabilityReference = -4.0f; |
aluebs | 853c840 | 2016-04-05 17:03:34 | [diff] [blame] | 264 | const float kNoiseEstimateReference[] = |
| 265 | {2677.733398f, 6186.987305f, 14365.744141f}; |
peah | 5585001 | 2016-03-20 01:01:09 | [diff] [blame] | 266 | const float kOutputReference[] = {0.004273f, 0.005127f, 0.005188f}; |
| 267 | #elif defined(WEBRTC_ARCH_ARM) |
| 268 | const float kSpeechProbabilityReference = -4.0f; |
aluebs | 853c840 | 2016-04-05 17:03:34 | [diff] [blame] | 269 | const float kNoiseEstimateReference[] = |
| 270 | {2677.733398f, 6186.987305f, 14365.744141f}; |
peah | 5585001 | 2016-03-20 01:01:09 | [diff] [blame] | 271 | const float kOutputReference[] = {0.004273f, 0.005127f, 0.005188f}; |
| 272 | #else |
| 273 | const float kSpeechProbabilityReference = 0.70281971f; |
aluebs | 853c840 | 2016-04-05 17:03:34 | [diff] [blame] | 274 | const float kNoiseEstimateReference[] = |
| 275 | {2254.347900f, 6723.699707f, 15771.625977f}; |
peah | 5585001 | 2016-03-20 01:01:09 | [diff] [blame] | 276 | const float kOutputReference[] = {0.004321f, 0.005247f, 0.005263f}; |
| 277 | #endif |
| 278 | |
| 279 | RunBitexactnessTest(16000, 1, NoiseSuppression::Level::kVeryHigh, |
| 280 | kSpeechProbabilityReference, kNoiseEstimateReference, |
| 281 | kOutputReference); |
| 282 | } |
| 283 | |
| 284 | } // namespace webrtc |