blob: 69c499296fbab86b02687423458260c5c0bcde57 [file] [log] [blame] [edit]
/*
* Copyright (c) 2016 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include "modules/audio_processing/audio_buffer.h"
#include <array>
#include <cmath>
#include <cstddef>
#include <cstdint>
#include <numbers>
#include <tuple>
#include <vector>
#include "api/audio/audio_processing.h"
#include "api/audio/audio_view.h"
#include "rtc_base/checks.h"
#include "test/gtest.h"
#include "test/testsupport/rtc_expect_death.h"
namespace webrtc {
namespace {
void ExpectNumChannels(const AudioBuffer& ab, size_t num_channels) {
EXPECT_EQ(ab.num_channels(), num_channels);
}
void FillChannelWith100HzSine(int channel, float amplitude, AudioBuffer& ab) {
constexpr float kPi = std::numbers::pi;
float sample_rate_hz;
if (ab.num_frames() == 160) {
sample_rate_hz = 16000.0f;
} else if (ab.num_frames() == 320) {
sample_rate_hz = 32000.0f;
} else {
sample_rate_hz = 48000.0f;
}
constexpr float kFrequencyHz = 100.0f;
for (size_t i = 0; i < ab.num_frames(); ++i) {
ab.channels()[channel][i] =
amplitude * std::sin(2 * kPi * kFrequencyHz / sample_rate_hz * i);
}
}
void FillChannelWith100HzSine(int sample_rate_hz,
int channel,
float amplitude,
float* const* stacked_data) {
constexpr float kPi = std::numbers::pi;
int num_samples_per_channel;
if (sample_rate_hz == 16000) {
num_samples_per_channel = 160;
} else if (sample_rate_hz == 32000) {
num_samples_per_channel = 320;
} else {
num_samples_per_channel = 480;
}
constexpr float kFrequencyHz = 100.0f;
for (int i = 0; i < num_samples_per_channel; ++i) {
stacked_data[channel][i] =
amplitude * std::sin(2 * kPi * kFrequencyHz / sample_rate_hz * i);
}
}
void FillChannelWith100HzSine(int sample_rate_hz,
int num_channels,
int channel,
float amplitude,
int16_t* const interleaved_data) {
constexpr float kPi = std::numbers::pi;
int num_samples_per_channel;
if (sample_rate_hz == 16000) {
num_samples_per_channel = 160;
} else if (sample_rate_hz == 32000) {
num_samples_per_channel = 320;
} else {
num_samples_per_channel = 480;
}
constexpr float kFrequencyHz = 100.0f;
for (int i = 0; i < num_samples_per_channel; ++i) {
interleaved_data[channel + i * num_channels] = static_cast<int16_t>(
amplitude * std::sin(2 * kPi * kFrequencyHz / sample_rate_hz * i));
}
}
enum class DownmixingSignalVariant {
kInactive,
kChannel0Inactive,
kVeryImbalanced,
kBalanced
};
} // namespace
class AudioBufferMonoInputDownmixingTest
: public ::testing::Test,
public ::testing::WithParamInterface<
std::tuple<int,
int,
AudioProcessing::Config::Pipeline::DownmixMethod>> {
protected:
int Rate1() const { return std::get<0>(GetParam()); }
int Rate2() const { return std::get<1>(GetParam()); }
AudioProcessing::Config::Pipeline::DownmixMethod DownmixingMethod() const {
return std::get<2>(GetParam());
}
};
INSTANTIATE_TEST_SUITE_P(
AudioBufferTests,
AudioBufferMonoInputDownmixingTest,
::testing::Combine(
::testing::Values(16000, 32000, 48000),
::testing::Values(16000, 32000, 48000),
::testing::Values(
AudioProcessing::Config::Pipeline::DownmixMethod::kAverageChannels,
AudioProcessing::Config::Pipeline::DownmixMethod::kAdaptive)));
class AudioBufferStereoInputDownmixingTest
: public ::testing::Test,
public ::testing::WithParamInterface<
std::tuple<int,
int,
int,
AudioProcessing::Config::Pipeline::DownmixMethod,
bool,
bool,
DownmixingSignalVariant>> {
protected:
int Rate1() const { return std::get<0>(GetParam()); }
int Rate2() const { return std::get<1>(GetParam()); }
size_t NumChannels() const { return std::get<2>(GetParam()); }
AudioProcessing::Config::Pipeline::DownmixMethod DownmixingMethod() const {
return std::get<3>(GetParam());
}
bool OnlyInitialFrames() const { return std::get<4>(GetParam()); }
bool UseFloatInterface() const { return std::get<5>(GetParam()); }
DownmixingSignalVariant SignalVariant() const {
return std::get<6>(GetParam());
}
};
INSTANTIATE_TEST_SUITE_P(
AudioBufferTests,
AudioBufferStereoInputDownmixingTest,
::testing::Combine(
::testing::Values(16000, 32000, 48000),
::testing::Values(16000, 32000, 48000),
::testing::Values(1, 2),
::testing::Values(
AudioProcessing::Config::Pipeline::DownmixMethod::kAverageChannels,
AudioProcessing::Config::Pipeline::DownmixMethod::kAdaptive),
::testing::Values(false, true),
::testing::Values(false, true),
::testing::Values(DownmixingSignalVariant::kInactive,
DownmixingSignalVariant::kChannel0Inactive,
DownmixingSignalVariant::kVeryImbalanced,
DownmixingSignalVariant::kBalanced)
));
class AudioBufferChannelCountAndTwoRatesTest
: public ::testing::Test,
public ::testing::WithParamInterface<std::tuple<int, int, int>> {
protected:
int Rate1() const { return std::get<0>(GetParam()); }
int Rate2() const { return std::get<1>(GetParam()); }
int NumChannels() const { return std::get<2>(GetParam()); }
};
INSTANTIATE_TEST_SUITE_P(
AudioBufferTests,
AudioBufferChannelCountAndTwoRatesTest,
::testing::Combine(::testing::Values(16000, 32000, 48000),
::testing::Values(16000, 32000, 48000),
::testing::Values(1, 2)));
class AudioBufferChannelCountAndOneRateTest
: public ::testing::Test,
public ::testing::WithParamInterface<std::tuple<int, int>> {
protected:
int Rate() const { return std::get<0>(GetParam()); }
int NumChannels() const { return std::get<1>(GetParam()); }
};
INSTANTIATE_TEST_SUITE_P(
AudioBufferTests,
AudioBufferChannelCountAndOneRateTest,
::testing::Combine(::testing::Values(16000, 32000, 48000),
::testing::Values(1, 2)));
TEST(AudioBufferTest, SetNumChannelsSetsChannelBuffersNumChannels) {
constexpr size_t kSampleRateHz = 48000u;
AudioBuffer ab(kSampleRateHz, 2, kSampleRateHz, 2, kSampleRateHz, 2);
ExpectNumChannels(ab, 2);
ab.set_num_channels(1);
ExpectNumChannels(ab, 1);
ab.RestoreNumChannels();
ExpectNumChannels(ab, 2);
}
#if RTC_DCHECK_IS_ON && GTEST_HAS_DEATH_TEST && !defined(WEBRTC_ANDROID)
TEST(AudioBufferDeathTest, SetNumChannelsDeathTest) {
constexpr size_t kSampleRateHz = 48000u;
AudioBuffer ab(kSampleRateHz, 1, kSampleRateHz, 1, kSampleRateHz, 1);
RTC_EXPECT_DEATH(ab.set_num_channels(2), "num_channels");
}
#endif
TEST_P(AudioBufferChannelCountAndOneRateTest, CopyWithoutResampling) {
AudioBuffer ab1(Rate(), NumChannels(), Rate(), NumChannels(), Rate(),
NumChannels());
AudioBuffer ab2(Rate(), NumChannels(), Rate(), NumChannels(), Rate(),
NumChannels());
// Fill first buffer.
for (size_t ch = 0; ch < ab1.num_channels(); ++ch) {
for (size_t i = 0; i < ab1.num_frames(); ++i) {
ab1.channels()[ch][i] = i + ch;
}
}
// Copy to second buffer.
ab1.CopyTo(&ab2);
// Verify content of second buffer.
for (size_t ch = 0; ch < ab2.num_channels(); ++ch) {
for (size_t i = 0; i < ab2.num_frames(); ++i) {
EXPECT_EQ(ab2.channels()[ch][i], i + ch);
}
}
}
TEST_P(AudioBufferChannelCountAndTwoRatesTest, CopyWithResampling) {
AudioBuffer ab1(Rate1(), NumChannels(), Rate1(), NumChannels(), Rate2(),
NumChannels());
AudioBuffer ab2(Rate2(), NumChannels(), Rate2(), NumChannels(), Rate2(),
NumChannels());
float energy_ab1 = 0.0f;
float energy_ab2 = 0.0f;
// Put a sine and compute energy of first buffer.
for (size_t ch = 0; ch < ab1.num_channels(); ++ch) {
FillChannelWith100HzSine(ch, 1.0f, ab1);
for (size_t i = 0; i < ab1.num_frames(); ++i) {
energy_ab1 += ab1.channels()[ch][i] * ab1.channels()[ch][i];
}
}
// Copy to second buffer.
ab1.CopyTo(&ab2);
// Compute energy of second buffer.
for (size_t ch = 0; ch < ab2.num_channels(); ++ch) {
for (size_t i = 0; i < ab2.num_frames(); ++i) {
energy_ab2 += ab2.channels()[ch][i] * ab2.channels()[ch][i];
}
}
// Verify that energies match.
EXPECT_NEAR(energy_ab1, energy_ab2 * Rate1() / Rate2(), .04f * energy_ab1);
}
TEST_P(AudioBufferChannelCountAndOneRateTest, DeinterleavedView) {
AudioBuffer ab(Rate(), NumChannels(), Rate(), NumChannels(), Rate(),
NumChannels());
// Fill the buffer with data.
for (size_t ch = 0; ch < ab.num_channels(); ++ch) {
FillChannelWith100HzSine(ch, 1.0f, ab);
}
// Verify that the DeinterleavedView correctly maps to channels.
DeinterleavedView<float> view = ab.view();
ASSERT_EQ(view.num_channels(), ab.num_channels());
float* const* channels = ab.channels();
for (size_t c = 0; c < view.num_channels(); ++c) {
MonoView<float> channel = view[c];
EXPECT_EQ(SamplesPerChannel(channel), ab.num_frames());
for (size_t s = 0; s < SamplesPerChannel(channel); ++s) {
ASSERT_EQ(channel[s], channels[c][s]);
}
}
}
TEST_P(AudioBufferMonoInputDownmixingTest, MonoCaptureStacked) {
AudioProcessing::Config::Pipeline::DownmixMethod downmixing_method =
DownmixingMethod();
int num_frames_per_channel = Rate1() / 100;
std::vector<float> audio_data(num_frames_per_channel);
std::array<float*, 1> audio = {audio_data.data()};
AudioBuffer ab(Rate1(), 1, Rate2(), 1, Rate2(), downmixing_method);
float energy_input = 0.0f;
float energy_ab = 0.0f;
// Put a sine and compute energy of first buffer (compensating for the
// internal S16 format in AudioBuffer..
FillChannelWith100HzSine(Rate1(), 0, 0.7f, audio.data());
for (int i = 0; i < num_frames_per_channel; ++i) {
energy_input += audio[0][i] * 32768.0f * audio[0][i] * 32768.0f;
;
}
// Copy to audio buffer.
StreamConfig stream_config(Rate1(), 1);
ab.CopyFrom(audio.data(), stream_config);
// Verify that the channel count is correct.
EXPECT_EQ(ab.num_channels(), 1u);
// Compute energy of audio buffer.
for (size_t i = 0; i < ab.num_frames(); ++i) {
energy_ab += ab.channels()[0][i] * ab.channels()[0][i];
}
// Verify that energies match.
EXPECT_NEAR(energy_input, energy_ab * Rate1() / Rate2(), .04f * energy_input);
}
TEST_P(AudioBufferMonoInputDownmixingTest, MonoCaptureInterleaved) {
AudioProcessing::Config::Pipeline::DownmixMethod downmixing_method =
DownmixingMethod();
int num_frames_per_channel = Rate1() / 100;
std::vector<int16_t> audio(num_frames_per_channel);
AudioBuffer ab(Rate1(), 1, Rate2(), 1, Rate2(), downmixing_method);
float energy_input = 0.0f;
float energy_ab = 0.0f;
// Put a sine and compute energy of first buffer (compensating for the
// internal S16 format in AudioBuffer).
FillChannelWith100HzSine(Rate1(), 1, 0, 0.7f, audio.data());
for (int i = 0; i < num_frames_per_channel; ++i) {
energy_input += audio[i] * 32768.0f * audio[i] * 32768.0f;
}
// Copy to audio buffer.
StreamConfig stream_config(Rate1(), 1);
ab.CopyFrom(audio.data(), stream_config);
// Verify that the channel count is correct.
EXPECT_EQ(ab.num_channels(), 1u);
// Compute energy of audio buffer.
for (size_t i = 0; i < ab.num_frames(); ++i) {
energy_ab += ab.channels()[0][i] * ab.channels()[0][i];
}
// Verify that energies match.
EXPECT_NEAR(energy_input, energy_ab * Rate1() / Rate2(), .04f * energy_input);
}
TEST_P(AudioBufferStereoInputDownmixingTest, StereoCapture) {
AudioProcessing::Config::Pipeline::DownmixMethod downmixing_method =
DownmixingMethod();
int num_frames_per_channel = Rate1() / 100;
AudioBuffer ab(Rate1(), 2, Rate2(), NumChannels(), Rate2(),
downmixing_method);
float energy_ab_ch0 = 0.0f;
float energy_ab_ch1 = 0.0f;
float energy_input_ch0 = 0.0f;
float energy_input_ch1 = 0.0f;
float energy_input_average = 0.0f;
const int num_frames_to_process = OnlyInitialFrames() ? 10 : 250;
float amplitude0;
float amplitude1;
switch (SignalVariant()) {
case DownmixingSignalVariant::kInactive:
amplitude0 = 0.0001f;
amplitude1 = 0.0002f;
break;
case DownmixingSignalVariant::kChannel0Inactive:
amplitude0 = 0.0001f;
amplitude1 = 0.8f;
break;
case DownmixingSignalVariant::kVeryImbalanced:
amplitude0 = 0.01f;
amplitude1 = 0.8f;
break;
case DownmixingSignalVariant::kBalanced:
amplitude0 = 0.7f;
amplitude1 = 0.8f;
break;
}
if (UseFloatInterface()) {
std::vector<float> audio_data(2 * num_frames_per_channel);
std::array<float*, 2> audio = {&audio_data[0],
&audio_data[num_frames_per_channel]};
// Put a sine and compute energy of first buffer (compensating for the
// internal S16 format in AudioBuffer).
FillChannelWith100HzSine(Rate1(), 0, amplitude0, audio.data());
FillChannelWith100HzSine(Rate1(), 1, amplitude1, audio.data());
for (int i = 0; i < num_frames_per_channel; ++i) {
energy_input_ch0 += (audio[0][i] * audio[0][i]) * 32768.0f * 32768.0f;
energy_input_ch1 += (audio[1][i] * audio[1][i]) * 32768.0f * 32768.0f;
float average = (audio[0][i] + audio[1][i]) * 32768.0f * 0.5f;
energy_input_average += average * average;
}
// Copy to audio buffer.
StreamConfig stream_config(Rate1(), 2);
for (int k = 0; k < num_frames_to_process; ++k) {
ab.CopyFrom(audio.data(), stream_config);
// Verify that the channel count is correct.
EXPECT_EQ(ab.num_channels(), NumChannels());
}
} else {
std::vector<int16_t> audio(2 * num_frames_per_channel);
// Put a sine and compute energy of first buffer (compensating for the
// internal S16 format in AudioBuffer).
FillChannelWith100HzSine(Rate1(), 2, 0, amplitude0, audio.data());
FillChannelWith100HzSine(Rate1(), 2, 1, amplitude1, audio.data());
for (int i = 0; i < num_frames_per_channel; ++i) {
energy_input_ch0 += (audio[2 * i] * audio[2 * i]);
energy_input_ch1 += (audio[2 * i + 1] * audio[2 * i + 1]);
float average = (audio[2 * i] + audio[2 * i + 1]) * 0.5f;
energy_input_average += average * average;
}
// Copy to audio buffer.
StreamConfig stream_config(Rate1(), 2);
for (int k = 0; k < num_frames_to_process; ++k) {
ab.CopyFrom(audio.data(), stream_config);
// Verify that the channel count is correct.
EXPECT_EQ(ab.num_channels(), NumChannels());
}
}
// Compute energy of audio buffer.
for (size_t i = 0; i < ab.num_frames(); ++i) {
energy_ab_ch0 += ab.channels()[0][i] * ab.channels()[0][i];
}
if (ab.num_channels() == 2) {
for (size_t i = 0; i < ab.num_frames(); ++i) {
energy_ab_ch1 += ab.channels()[1][i] * ab.channels()[1][i];
}
}
// Verify that energies match.
if (downmixing_method ==
AudioProcessing::Config::Pipeline::DownmixMethod::kAverageChannels) {
if (NumChannels() == 1) {
EXPECT_NEAR(energy_input_average, energy_ab_ch0 * Rate1() / Rate2(),
.04f * energy_input_average);
} else {
EXPECT_NEAR(energy_input_ch0, energy_ab_ch0 * Rate1() / Rate2(),
.04f * energy_input_average);
EXPECT_NEAR(energy_input_ch1, energy_ab_ch1 * Rate1() / Rate2(),
.04f * energy_input_average);
}
return;
}
ASSERT_EQ(downmixing_method,
AudioProcessing::Config::Pipeline::DownmixMethod::kAdaptive);
if (OnlyInitialFrames()) {
EXPECT_NEAR(energy_input_average, energy_ab_ch0 * Rate1() / Rate2(),
.04f * energy_input_average);
if (NumChannels() == 2) {
EXPECT_NEAR(energy_input_average, energy_ab_ch1 * Rate1() / Rate2(),
.04f * energy_input_average);
}
return;
}
switch (SignalVariant()) {
case DownmixingSignalVariant::kInactive:
EXPECT_NEAR(energy_input_average, energy_ab_ch0 * Rate1() / Rate2(),
.04f * energy_input_average);
if (NumChannels() == 2) {
EXPECT_NEAR(energy_input_average, energy_ab_ch1 * Rate1() / Rate2(),
.04f * energy_input_average);
}
break;
case DownmixingSignalVariant::kChannel0Inactive:
EXPECT_NEAR(energy_input_average, energy_ab_ch0 * Rate1() / Rate2(),
.04f * energy_input_average);
if (NumChannels() == 2) {
EXPECT_NEAR(energy_input_average, energy_ab_ch1 * Rate1() / Rate2(),
.04f * energy_input_average);
}
break;
case DownmixingSignalVariant::kVeryImbalanced:
EXPECT_NEAR(energy_input_ch1, energy_ab_ch0 * Rate1() / Rate2(),
.04f * energy_input_ch1);
if (NumChannels() == 2) {
EXPECT_NEAR(energy_input_ch1, energy_ab_ch1 * Rate1() / Rate2(),
.04f * energy_input_ch1);
}
break;
case DownmixingSignalVariant::kBalanced:
EXPECT_NEAR(energy_input_ch0, energy_ab_ch0 * Rate1() / Rate2(),
.04f * energy_input_ch0);
if (NumChannels() == 2) {
EXPECT_NEAR(energy_input_ch1, energy_ab_ch1 * Rate1() / Rate2(),
.04f * energy_input_ch0);
}
break;
}
}
} // namespace webrtc