blob: 80c2b995759d802c44d7ba758b0c6abafad18b1b [file] [log] [blame]
/*
* Copyright (c) 2017 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include "modules/audio_mixer/frame_combiner.h"
#include <cstdint>
#include <initializer_list>
#include <numeric>
#include <string>
#include <type_traits>
#include <vector>
#include "absl/types/optional.h"
#include "api/array_view.h"
#include "api/rtp_packet_info.h"
#include "api/rtp_packet_infos.h"
#include "api/units/timestamp.h"
#include "audio/utility/audio_frame_operations.h"
#include "modules/audio_mixer/gain_change_calculator.h"
#include "modules/audio_mixer/sine_wave_generator.h"
#include "rtc_base/checks.h"
#include "rtc_base/strings/string_builder.h"
#include "test/gmock.h"
#include "test/gtest.h"
namespace webrtc {
namespace {
using ::testing::ElementsAreArray;
using ::testing::IsEmpty;
using ::testing::UnorderedElementsAreArray;
struct FrameCombinerConfig {
bool use_limiter;
int sample_rate_hz;
int number_of_channels;
float wave_frequency;
};
std::string ProduceDebugText(int sample_rate_hz,
int number_of_channels,
int number_of_sources) {
rtc::StringBuilder ss;
ss << "Sample rate: " << sample_rate_hz << " ,";
ss << "number of channels: " << number_of_channels << " ,";
ss << "number of sources: " << number_of_sources;
return ss.Release();
}
std::string ProduceDebugText(const FrameCombinerConfig& config) {
rtc::StringBuilder ss;
ss << "Sample rate: " << config.sample_rate_hz << " ,";
ss << "number of channels: " << config.number_of_channels << " ,";
ss << "limiter active: " << (config.use_limiter ? "on" : "off") << " ,";
ss << "wave frequency: " << config.wave_frequency << " ,";
return ss.Release();
}
AudioFrame frame1;
AudioFrame frame2;
void SetUpFrames(int sample_rate_hz, int number_of_channels) {
RtpPacketInfo packet_info1(/*ssrc=*/1001, /*csrcs=*/{},
/*rtp_timestamp=*/1000,
/*receive_time=*/Timestamp::Millis(1));
RtpPacketInfo packet_info2(/*ssrc=*/4004, /*csrcs=*/{},
/*rtp_timestamp=*/1234,
/*receive_time=*/Timestamp::Millis(2));
RtpPacketInfo packet_info3(/*ssrc=*/7007, /*csrcs=*/{},
/*rtp_timestamp=*/1333,
/*receive_time=*/Timestamp::Millis(2));
frame1.packet_infos_ = RtpPacketInfos({packet_info1});
frame2.packet_infos_ = RtpPacketInfos({packet_info2, packet_info3});
for (auto* frame : {&frame1, &frame2}) {
frame->UpdateFrame(0, nullptr, rtc::CheckedDivExact(sample_rate_hz, 100),
sample_rate_hz, AudioFrame::kNormalSpeech,
AudioFrame::kVadActive, number_of_channels);
}
}
} // namespace
// The limiter requires sample rate divisible by 2000.
TEST(FrameCombiner, BasicApiCallsLimiter) {
FrameCombiner combiner(true);
for (const int rate : {8000, 18000, 34000, 48000}) {
for (const int number_of_channels : {1, 2, 4, 8}) {
const std::vector<AudioFrame*> all_frames = {&frame1, &frame2};
SetUpFrames(rate, number_of_channels);
for (const int number_of_frames : {0, 1, 2}) {
SCOPED_TRACE(
ProduceDebugText(rate, number_of_channels, number_of_frames));
const std::vector<AudioFrame*> frames_to_combine(
all_frames.begin(), all_frames.begin() + number_of_frames);
AudioFrame audio_frame_for_mixing;
combiner.Combine(frames_to_combine, number_of_channels, rate,
frames_to_combine.size(), &audio_frame_for_mixing);
}
}
}
}
// The RtpPacketInfos field of the mixed packet should contain the union of the
// RtpPacketInfos from the frames that were actually mixed.
TEST(FrameCombiner, ContainsAllRtpPacketInfos) {
static constexpr int kSampleRateHz = 48000;
static constexpr int kNumChannels = 1;
FrameCombiner combiner(true);
const std::vector<AudioFrame*> all_frames = {&frame1, &frame2};
SetUpFrames(kSampleRateHz, kNumChannels);
for (const int number_of_frames : {0, 1, 2}) {
SCOPED_TRACE(
ProduceDebugText(kSampleRateHz, kNumChannels, number_of_frames));
const std::vector<AudioFrame*> frames_to_combine(
all_frames.begin(), all_frames.begin() + number_of_frames);
std::vector<RtpPacketInfo> packet_infos;
for (const auto& frame : frames_to_combine) {
packet_infos.insert(packet_infos.end(), frame->packet_infos_.begin(),
frame->packet_infos_.end());
}
AudioFrame audio_frame_for_mixing;
combiner.Combine(frames_to_combine, kNumChannels, kSampleRateHz,
frames_to_combine.size(), &audio_frame_for_mixing);
EXPECT_THAT(audio_frame_for_mixing.packet_infos_,
UnorderedElementsAreArray(packet_infos));
}
}
#if GTEST_HAS_DEATH_TEST && !defined(WEBRTC_ANDROID)
// There are CHECKs in place to check for invalid parameters.
TEST(FrameCombinerDeathTest, BuildCrashesWithManyChannels) {
FrameCombiner combiner(true);
for (const int rate : {8000, 18000, 34000, 48000}) {
for (const int number_of_channels : {10, 20, 21}) {
if (static_cast<size_t>(rate / 100 * number_of_channels) >
AudioFrame::kMaxDataSizeSamples) {
continue;
}
const std::vector<AudioFrame*> all_frames = {&frame1, &frame2};
// With an unsupported channel count, this will crash in
// `AudioFrame::UpdateFrame`.
EXPECT_DEATH(SetUpFrames(rate, number_of_channels), "");
const int number_of_frames = 2;
SCOPED_TRACE(
ProduceDebugText(rate, number_of_channels, number_of_frames));
const std::vector<AudioFrame*> frames_to_combine(
all_frames.begin(), all_frames.begin() + number_of_frames);
AudioFrame audio_frame_for_mixing;
EXPECT_DEATH(
combiner.Combine(frames_to_combine, number_of_channels, rate,
frames_to_combine.size(), &audio_frame_for_mixing),
"");
}
}
}
#endif // GTEST_HAS_DEATH_TEST && !defined(WEBRTC_ANDROID)
TEST(FrameCombinerDeathTest, DebugBuildCrashesWithHighRate) {
FrameCombiner combiner(true);
for (const int rate : {50000, 96000, 128000, 196000}) {
for (const int number_of_channels : {1, 2, 3}) {
if (static_cast<size_t>(rate / 100 * number_of_channels) >
AudioFrame::kMaxDataSizeSamples) {
continue;
}
const std::vector<AudioFrame*> all_frames = {&frame1, &frame2};
SetUpFrames(rate, number_of_channels);
const int number_of_frames = 2;
SCOPED_TRACE(
ProduceDebugText(rate, number_of_channels, number_of_frames));
const std::vector<AudioFrame*> frames_to_combine(
all_frames.begin(), all_frames.begin() + number_of_frames);
AudioFrame audio_frame_for_mixing;
#if GTEST_HAS_DEATH_TEST && !defined(WEBRTC_ANDROID)
EXPECT_DEATH(
combiner.Combine(frames_to_combine, number_of_channels, rate,
frames_to_combine.size(), &audio_frame_for_mixing),
"")
<< "number_of_channels=" << number_of_channels << ", rate=" << rate
<< ", frames to combine=" << frames_to_combine.size();
#endif
}
}
}
// With no limiter, the rate has to be divisible by 100 since we use
// 10 ms frames.
TEST(FrameCombiner, BasicApiCallsNoLimiter) {
FrameCombiner combiner(false);
for (const int rate : {8000, 10000, 11000, 32000, 44100}) {
for (const int number_of_channels : {1, 2, 4, 8}) {
const std::vector<AudioFrame*> all_frames = {&frame1, &frame2};
SetUpFrames(rate, number_of_channels);
for (const int number_of_frames : {0, 1, 2}) {
SCOPED_TRACE(
ProduceDebugText(rate, number_of_channels, number_of_frames));
const std::vector<AudioFrame*> frames_to_combine(
all_frames.begin(), all_frames.begin() + number_of_frames);
AudioFrame audio_frame_for_mixing;
combiner.Combine(frames_to_combine, number_of_channels, rate,
frames_to_combine.size(), &audio_frame_for_mixing);
}
}
}
}
TEST(FrameCombiner, CombiningZeroFramesShouldProduceSilence) {
FrameCombiner combiner(false);
for (const int rate : {8000, 10000, 11000, 32000, 44100}) {
for (const int number_of_channels : {1, 2}) {
SCOPED_TRACE(ProduceDebugText(rate, number_of_channels, 0));
AudioFrame audio_frame_for_mixing;
const std::vector<AudioFrame*> frames_to_combine;
combiner.Combine(frames_to_combine, number_of_channels, rate,
frames_to_combine.size(), &audio_frame_for_mixing);
const int16_t* audio_frame_for_mixing_data =
audio_frame_for_mixing.data();
const std::vector<int16_t> mixed_data(
audio_frame_for_mixing_data,
audio_frame_for_mixing_data + number_of_channels * rate / 100);
const std::vector<int16_t> expected(number_of_channels * rate / 100, 0);
EXPECT_EQ(mixed_data, expected);
EXPECT_THAT(audio_frame_for_mixing.packet_infos_, IsEmpty());
}
}
}
TEST(FrameCombiner, CombiningOneFrameShouldNotChangeFrame) {
FrameCombiner combiner(false);
for (const int rate : {8000, 10000, 11000, 32000, 44100}) {
// kMaxConcurrentChannels is 8.
for (const int number_of_channels : {1, 2, 4, kMaxConcurrentChannels}) {
SCOPED_TRACE(ProduceDebugText(rate, number_of_channels, 1));
AudioFrame audio_frame_for_mixing;
SetUpFrames(rate, number_of_channels);
int16_t* frame1_data = frame1.mutable_data();
std::iota(frame1_data, frame1_data + number_of_channels * rate / 100, 0);
const std::vector<AudioFrame*> frames_to_combine = {&frame1};
combiner.Combine(frames_to_combine, number_of_channels, rate,
frames_to_combine.size(), &audio_frame_for_mixing);
const int16_t* audio_frame_for_mixing_data =
audio_frame_for_mixing.data();
const std::vector<int16_t> mixed_data(
audio_frame_for_mixing_data,
audio_frame_for_mixing_data + number_of_channels * rate / 100);
std::vector<int16_t> expected(number_of_channels * rate / 100);
std::iota(expected.begin(), expected.end(), 0);
EXPECT_EQ(mixed_data, expected);
EXPECT_THAT(audio_frame_for_mixing.packet_infos_,
ElementsAreArray(frame1.packet_infos_));
}
}
}
// Send a sine wave through the FrameCombiner, and check that the
// difference between input and output varies smoothly. Also check
// that it is inside reasonable bounds. This is to catch issues like
// chromium:695993 and chromium:816875.
TEST(FrameCombiner, GainCurveIsSmoothForAlternatingNumberOfStreams) {
// Rates are divisible by 2000 when limiter is active.
std::vector<FrameCombinerConfig> configs = {
{false, 30100, 2, 50.f}, {false, 16500, 1, 3200.f},
{true, 8000, 1, 3200.f}, {true, 16000, 1, 50.f},
{true, 18000, 8, 3200.f}, {true, 10000, 2, 50.f},
};
for (const auto& config : configs) {
SCOPED_TRACE(ProduceDebugText(config));
FrameCombiner combiner(config.use_limiter);
constexpr int16_t wave_amplitude = 30000;
SineWaveGenerator wave_generator(config.wave_frequency, wave_amplitude);
GainChangeCalculator change_calculator;
float cumulative_change = 0.f;
constexpr size_t iterations = 100;
for (size_t i = 0; i < iterations; ++i) {
SetUpFrames(config.sample_rate_hz, config.number_of_channels);
wave_generator.GenerateNextFrame(&frame1);
AudioFrameOperations::Mute(&frame2);
std::vector<AudioFrame*> frames_to_combine = {&frame1};
if (i % 2 == 0) {
frames_to_combine.push_back(&frame2);
}
const size_t number_of_samples =
frame1.samples_per_channel_ * config.number_of_channels;
// Ensures limiter is on if 'use_limiter'.
constexpr size_t number_of_streams = 2;
AudioFrame audio_frame_for_mixing;
combiner.Combine(frames_to_combine, config.number_of_channels,
config.sample_rate_hz, number_of_streams,
&audio_frame_for_mixing);
cumulative_change += change_calculator.CalculateGainChange(
rtc::ArrayView<const int16_t>(frame1.data(), number_of_samples),
rtc::ArrayView<const int16_t>(audio_frame_for_mixing.data(),
number_of_samples));
}
// Check that the gain doesn't vary too much.
EXPECT_LT(cumulative_change, 10);
// Check that the latest gain is within reasonable bounds. It
// should be slightly less that 1.
EXPECT_LT(0.9f, change_calculator.LatestGain());
EXPECT_LT(change_calculator.LatestGain(), 1.01f);
}
}
} // namespace webrtc