blob: 3b6e942d887695e817f107f26624833ac2a564b5 [file] [log] [blame]
/*
* Copyright (c) 2022 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include "modules/audio_processing/aec3/multi_channel_content_detector.h"
#include "system_wrappers/include/metrics.h"
#include "test/gtest.h"
namespace webrtc {
TEST(MultiChannelContentDetector, HandlingOfMono) {
MultiChannelContentDetector mc(
/*detect_stereo_content=*/true,
/*num_render_input_channels=*/1,
/*detection_threshold=*/0.0f,
/*stereo_detection_timeout_threshold_seconds=*/0,
/*stereo_detection_hysteresis_seconds=*/0.0f);
EXPECT_FALSE(mc.IsProperMultiChannelContentDetected());
}
TEST(MultiChannelContentDetector, HandlingOfMonoAndDetectionOff) {
MultiChannelContentDetector mc(
/*detect_stereo_content=*/false,
/*num_render_input_channels=*/1,
/*detection_threshold=*/0.0f,
/*stereo_detection_timeout_threshold_seconds=*/0,
/*stereo_detection_hysteresis_seconds=*/0.0f);
EXPECT_FALSE(mc.IsProperMultiChannelContentDetected());
}
TEST(MultiChannelContentDetector, HandlingOfDetectionOff) {
MultiChannelContentDetector mc(
/*detect_stereo_content=*/false,
/*num_render_input_channels=*/2,
/*detection_threshold=*/0.0f,
/*stereo_detection_timeout_threshold_seconds=*/0,
/*stereo_detection_hysteresis_seconds=*/0.0f);
EXPECT_TRUE(mc.IsProperMultiChannelContentDetected());
std::vector<std::vector<std::vector<float>>> frame(
1, std::vector<std::vector<float>>(2, std::vector<float>(160, 0.0f)));
std::fill(frame[0][0].begin(), frame[0][0].end(), 100.0f);
std::fill(frame[0][1].begin(), frame[0][1].end(), 101.0f);
EXPECT_FALSE(mc.UpdateDetection(frame));
EXPECT_TRUE(mc.IsProperMultiChannelContentDetected());
EXPECT_FALSE(mc.UpdateDetection(frame));
}
TEST(MultiChannelContentDetector, InitialDetectionOfStereo) {
MultiChannelContentDetector mc(
/*detect_stereo_content=*/true,
/*num_render_input_channels=*/2,
/*detection_threshold=*/0.0f,
/*stereo_detection_timeout_threshold_seconds=*/0,
/*stereo_detection_hysteresis_seconds=*/0.0f);
EXPECT_FALSE(mc.IsProperMultiChannelContentDetected());
}
TEST(MultiChannelContentDetector, DetectionWhenFakeStereo) {
MultiChannelContentDetector mc(
/*detect_stereo_content=*/true,
/*num_render_input_channels=*/2,
/*detection_threshold=*/0.0f,
/*stereo_detection_timeout_threshold_seconds=*/0,
/*stereo_detection_hysteresis_seconds=*/0.0f);
std::vector<std::vector<std::vector<float>>> frame(
1, std::vector<std::vector<float>>(2, std::vector<float>(160, 0.0f)));
std::fill(frame[0][0].begin(), frame[0][0].end(), 100.0f);
std::fill(frame[0][1].begin(), frame[0][1].end(), 100.0f);
EXPECT_FALSE(mc.UpdateDetection(frame));
EXPECT_FALSE(mc.IsProperMultiChannelContentDetected());
EXPECT_FALSE(mc.UpdateDetection(frame));
}
TEST(MultiChannelContentDetector, DetectionWhenStereo) {
MultiChannelContentDetector mc(
/*detect_stereo_content=*/true,
/*num_render_input_channels=*/2,
/*detection_threshold=*/0.0f,
/*stereo_detection_timeout_threshold_seconds=*/0,
/*stereo_detection_hysteresis_seconds=*/0.0f);
std::vector<std::vector<std::vector<float>>> frame(
1, std::vector<std::vector<float>>(2, std::vector<float>(160, 0.0f)));
std::fill(frame[0][0].begin(), frame[0][0].end(), 100.0f);
std::fill(frame[0][1].begin(), frame[0][1].end(), 101.0f);
EXPECT_TRUE(mc.UpdateDetection(frame));
EXPECT_TRUE(mc.IsProperMultiChannelContentDetected());
EXPECT_FALSE(mc.UpdateDetection(frame));
}
TEST(MultiChannelContentDetector, DetectionWhenStereoAfterAWhile) {
MultiChannelContentDetector mc(
/*detect_stereo_content=*/true,
/*num_render_input_channels=*/2,
/*detection_threshold=*/0.0f,
/*stereo_detection_timeout_threshold_seconds=*/0,
/*stereo_detection_hysteresis_seconds=*/0.0f);
std::vector<std::vector<std::vector<float>>> frame(
1, std::vector<std::vector<float>>(2, std::vector<float>(160, 0.0f)));
std::fill(frame[0][0].begin(), frame[0][0].end(), 100.0f);
std::fill(frame[0][1].begin(), frame[0][1].end(), 100.0f);
EXPECT_FALSE(mc.UpdateDetection(frame));
EXPECT_FALSE(mc.IsProperMultiChannelContentDetected());
EXPECT_FALSE(mc.UpdateDetection(frame));
std::fill(frame[0][0].begin(), frame[0][0].end(), 100.0f);
std::fill(frame[0][1].begin(), frame[0][1].end(), 101.0f);
EXPECT_TRUE(mc.UpdateDetection(frame));
EXPECT_TRUE(mc.IsProperMultiChannelContentDetected());
EXPECT_FALSE(mc.UpdateDetection(frame));
}
TEST(MultiChannelContentDetector, DetectionWithStereoBelowThreshold) {
constexpr float kThreshold = 1.0f;
MultiChannelContentDetector mc(
/*detect_stereo_content=*/true,
/*num_render_input_channels=*/2,
/*detection_threshold=*/kThreshold,
/*stereo_detection_timeout_threshold_seconds=*/0,
/*stereo_detection_hysteresis_seconds=*/0.0f);
std::vector<std::vector<std::vector<float>>> frame(
1, std::vector<std::vector<float>>(2, std::vector<float>(160, 0.0f)));
std::fill(frame[0][0].begin(), frame[0][0].end(), 100.0f);
std::fill(frame[0][1].begin(), frame[0][1].end(), 100.0f + kThreshold);
EXPECT_FALSE(mc.UpdateDetection(frame));
EXPECT_FALSE(mc.IsProperMultiChannelContentDetected());
EXPECT_FALSE(mc.UpdateDetection(frame));
}
TEST(MultiChannelContentDetector, DetectionWithStereoAboveThreshold) {
constexpr float kThreshold = 1.0f;
MultiChannelContentDetector mc(
/*detect_stereo_content=*/true,
/*num_render_input_channels=*/2,
/*detection_threshold=*/kThreshold,
/*stereo_detection_timeout_threshold_seconds=*/0,
/*stereo_detection_hysteresis_seconds=*/0.0f);
std::vector<std::vector<std::vector<float>>> frame(
1, std::vector<std::vector<float>>(2, std::vector<float>(160, 0.0f)));
std::fill(frame[0][0].begin(), frame[0][0].end(), 100.0f);
std::fill(frame[0][1].begin(), frame[0][1].end(), 100.0f + kThreshold + 0.1f);
EXPECT_TRUE(mc.UpdateDetection(frame));
EXPECT_TRUE(mc.IsProperMultiChannelContentDetected());
EXPECT_FALSE(mc.UpdateDetection(frame));
}
class MultiChannelContentDetectorTimeoutBehavior
: public ::testing::Test,
public ::testing::WithParamInterface<std::tuple<bool, int>> {};
INSTANTIATE_TEST_SUITE_P(MultiChannelContentDetector,
MultiChannelContentDetectorTimeoutBehavior,
::testing::Combine(::testing::Values(false, true),
::testing::Values(0, 1, 10)));
TEST_P(MultiChannelContentDetectorTimeoutBehavior,
TimeOutBehaviorForNonTrueStereo) {
constexpr int kNumFramesPerSecond = 100;
const bool detect_stereo_content = std::get<0>(GetParam());
const int stereo_detection_timeout_threshold_seconds =
std::get<1>(GetParam());
const int stereo_detection_timeout_threshold_frames =
stereo_detection_timeout_threshold_seconds * kNumFramesPerSecond;
MultiChannelContentDetector mc(detect_stereo_content,
/*num_render_input_channels=*/2,
/*detection_threshold=*/0.0f,
stereo_detection_timeout_threshold_seconds,
/*stereo_detection_hysteresis_seconds=*/0.0f);
std::vector<std::vector<std::vector<float>>> true_stereo_frame = {
{std::vector<float>(160, 100.0f), std::vector<float>(160, 101.0f)}};
std::vector<std::vector<std::vector<float>>> fake_stereo_frame = {
{std::vector<float>(160, 100.0f), std::vector<float>(160, 100.0f)}};
// Pass fake stereo frames and verify the content detection.
for (int k = 0; k < 10; ++k) {
EXPECT_FALSE(mc.UpdateDetection(fake_stereo_frame));
if (detect_stereo_content) {
EXPECT_FALSE(mc.IsProperMultiChannelContentDetected());
} else {
EXPECT_TRUE(mc.IsProperMultiChannelContentDetected());
}
}
// Pass a true stereo frame and verify that it is properly detected.
if (detect_stereo_content) {
EXPECT_TRUE(mc.UpdateDetection(true_stereo_frame));
} else {
EXPECT_FALSE(mc.UpdateDetection(true_stereo_frame));
}
EXPECT_TRUE(mc.IsProperMultiChannelContentDetected());
// Pass fake stereo frames until any timeouts are about to occur.
for (int k = 0; k < stereo_detection_timeout_threshold_frames - 1; ++k) {
EXPECT_FALSE(mc.UpdateDetection(fake_stereo_frame));
EXPECT_TRUE(mc.IsProperMultiChannelContentDetected());
}
// Pass a fake stereo frame and verify that any timeouts properly occur.
if (detect_stereo_content && stereo_detection_timeout_threshold_frames > 0) {
EXPECT_TRUE(mc.UpdateDetection(fake_stereo_frame));
EXPECT_FALSE(mc.IsProperMultiChannelContentDetected());
} else {
EXPECT_FALSE(mc.UpdateDetection(fake_stereo_frame));
EXPECT_TRUE(mc.IsProperMultiChannelContentDetected());
}
// Pass fake stereo frames and verify the behavior after any timeout.
for (int k = 0; k < 10; ++k) {
EXPECT_FALSE(mc.UpdateDetection(fake_stereo_frame));
if (detect_stereo_content &&
stereo_detection_timeout_threshold_frames > 0) {
EXPECT_FALSE(mc.IsProperMultiChannelContentDetected());
} else {
EXPECT_TRUE(mc.IsProperMultiChannelContentDetected());
}
}
}
class MultiChannelContentDetectorHysteresisBehavior
: public ::testing::Test,
public ::testing::WithParamInterface<std::tuple<bool, float>> {};
INSTANTIATE_TEST_SUITE_P(
MultiChannelContentDetector,
MultiChannelContentDetectorHysteresisBehavior,
::testing::Combine(::testing::Values(false, true),
::testing::Values(0.0f, 0.1f, 0.2f)));
TEST_P(MultiChannelContentDetectorHysteresisBehavior,
PeriodBeforeStereoDetectionIsTriggered) {
constexpr int kNumFramesPerSecond = 100;
const bool detect_stereo_content = std::get<0>(GetParam());
const int stereo_detection_hysteresis_seconds = std::get<1>(GetParam());
const int stereo_detection_hysteresis_frames =
stereo_detection_hysteresis_seconds * kNumFramesPerSecond;
MultiChannelContentDetector mc(
detect_stereo_content,
/*num_render_input_channels=*/2,
/*detection_threshold=*/0.0f,
/*stereo_detection_timeout_threshold_seconds=*/0,
stereo_detection_hysteresis_seconds);
std::vector<std::vector<std::vector<float>>> true_stereo_frame = {
{std::vector<float>(160, 100.0f), std::vector<float>(160, 101.0f)}};
std::vector<std::vector<std::vector<float>>> fake_stereo_frame = {
{std::vector<float>(160, 100.0f), std::vector<float>(160, 100.0f)}};
// Pass fake stereo frames and verify the content detection.
for (int k = 0; k < 10; ++k) {
EXPECT_FALSE(mc.UpdateDetection(fake_stereo_frame));
if (detect_stereo_content) {
EXPECT_FALSE(mc.IsProperMultiChannelContentDetected());
} else {
EXPECT_TRUE(mc.IsProperMultiChannelContentDetected());
}
EXPECT_FALSE(mc.IsTemporaryMultiChannelContentDetected());
}
// Pass a two true stereo frames and verify that they are properly detected.
ASSERT_TRUE(stereo_detection_hysteresis_frames > 2 ||
stereo_detection_hysteresis_frames == 0);
for (int k = 0; k < 2; ++k) {
if (detect_stereo_content) {
if (stereo_detection_hysteresis_seconds == 0.0f) {
if (k == 0) {
EXPECT_TRUE(mc.UpdateDetection(true_stereo_frame));
} else {
EXPECT_FALSE(mc.UpdateDetection(true_stereo_frame));
}
EXPECT_TRUE(mc.IsProperMultiChannelContentDetected());
EXPECT_FALSE(mc.IsTemporaryMultiChannelContentDetected());
} else {
EXPECT_FALSE(mc.UpdateDetection(true_stereo_frame));
EXPECT_FALSE(mc.IsProperMultiChannelContentDetected());
EXPECT_TRUE(mc.IsTemporaryMultiChannelContentDetected());
}
} else {
EXPECT_FALSE(mc.UpdateDetection(true_stereo_frame));
EXPECT_TRUE(mc.IsProperMultiChannelContentDetected());
EXPECT_FALSE(mc.IsTemporaryMultiChannelContentDetected());
}
}
if (stereo_detection_hysteresis_seconds == 0.0f) {
return;
}
// Pass true stereo frames until any timeouts are about to occur.
for (int k = 0; k < stereo_detection_hysteresis_frames - 3; ++k) {
if (detect_stereo_content) {
EXPECT_FALSE(mc.UpdateDetection(true_stereo_frame));
EXPECT_FALSE(mc.IsProperMultiChannelContentDetected());
EXPECT_TRUE(mc.IsTemporaryMultiChannelContentDetected());
} else {
EXPECT_FALSE(mc.UpdateDetection(true_stereo_frame));
EXPECT_TRUE(mc.IsProperMultiChannelContentDetected());
EXPECT_FALSE(mc.IsTemporaryMultiChannelContentDetected());
}
}
// Pass a true stereo frame and verify that it is properly detected.
if (detect_stereo_content) {
EXPECT_TRUE(mc.UpdateDetection(true_stereo_frame));
EXPECT_TRUE(mc.IsProperMultiChannelContentDetected());
EXPECT_FALSE(mc.IsTemporaryMultiChannelContentDetected());
} else {
EXPECT_FALSE(mc.UpdateDetection(true_stereo_frame));
EXPECT_TRUE(mc.IsProperMultiChannelContentDetected());
EXPECT_FALSE(mc.IsTemporaryMultiChannelContentDetected());
}
// Pass an additional true stereo frame and verify that it is properly
// detected.
if (detect_stereo_content) {
EXPECT_FALSE(mc.UpdateDetection(true_stereo_frame));
EXPECT_TRUE(mc.IsProperMultiChannelContentDetected());
EXPECT_FALSE(mc.IsTemporaryMultiChannelContentDetected());
} else {
EXPECT_FALSE(mc.UpdateDetection(true_stereo_frame));
EXPECT_TRUE(mc.IsProperMultiChannelContentDetected());
EXPECT_FALSE(mc.IsTemporaryMultiChannelContentDetected());
}
// Pass a fake stereo frame and verify that it is properly detected.
if (detect_stereo_content) {
EXPECT_FALSE(mc.UpdateDetection(fake_stereo_frame));
EXPECT_TRUE(mc.IsProperMultiChannelContentDetected());
EXPECT_FALSE(mc.IsTemporaryMultiChannelContentDetected());
} else {
EXPECT_FALSE(mc.UpdateDetection(fake_stereo_frame));
EXPECT_TRUE(mc.IsProperMultiChannelContentDetected());
EXPECT_FALSE(mc.IsTemporaryMultiChannelContentDetected());
}
}
class MultiChannelContentDetectorMetricsDisabled
: public ::testing::Test,
public ::testing::WithParamInterface<std::tuple<bool, int>> {};
INSTANTIATE_TEST_SUITE_P(
/*no prefix*/,
MultiChannelContentDetectorMetricsDisabled,
::testing::Values(std::tuple<bool, int>(false, 2),
std::tuple<bool, int>(true, 1)));
// Test that no metrics are logged when they are clearly uninteresting and would
// dilute relevant data: when the reference audio is single channel, or when
// dynamic detection is disabled.
TEST_P(MultiChannelContentDetectorMetricsDisabled, ReportsNoMetrics) {
metrics::Reset();
constexpr int kNumFramesPerSecond = 100;
const bool detect_stereo_content = std::get<0>(GetParam());
const int channel_count = std::get<1>(GetParam());
std::vector<std::vector<std::vector<float>>> audio_frame = {
std::vector<std::vector<float>>(channel_count,
std::vector<float>(160, 100.0f))};
{
MultiChannelContentDetector mc(
/*detect_stereo_content=*/detect_stereo_content,
/*num_render_input_channels=*/channel_count,
/*detection_threshold=*/0.0f,
/*stereo_detection_timeout_threshold_seconds=*/1,
/*stereo_detection_hysteresis_seconds=*/0.0f);
for (int k = 0; k < 20 * kNumFramesPerSecond; ++k) {
mc.UpdateDetection(audio_frame);
}
}
EXPECT_METRIC_EQ(
0, metrics::NumSamples("WebRTC.Audio.EchoCanceller."
"ProcessingPersistentMultichannelContent"));
EXPECT_METRIC_EQ(
0, metrics::NumSamples("WebRTC.Audio.EchoCanceller."
"PersistentMultichannelContentEverDetected"));
}
// Tests that after 3 seconds, no metrics are reported.
TEST(MultiChannelContentDetectorMetrics, ReportsNoMetricsForShortLifetime) {
metrics::Reset();
constexpr int kNumFramesPerSecond = 100;
constexpr int kTooFewFramesToLogMetrics = 3 * kNumFramesPerSecond;
std::vector<std::vector<std::vector<float>>> audio_frame = {
std::vector<std::vector<float>>(2, std::vector<float>(160, 100.0f))};
{
MultiChannelContentDetector mc(
/*detect_stereo_content=*/true,
/*num_render_input_channels=*/2,
/*detection_threshold=*/0.0f,
/*stereo_detection_timeout_threshold_seconds=*/1,
/*stereo_detection_hysteresis_seconds=*/0.0f);
for (int k = 0; k < kTooFewFramesToLogMetrics; ++k) {
mc.UpdateDetection(audio_frame);
}
}
EXPECT_METRIC_EQ(
0, metrics::NumSamples("WebRTC.Audio.EchoCanceller."
"ProcessingPersistentMultichannelContent"));
EXPECT_METRIC_EQ(
0, metrics::NumSamples("WebRTC.Audio.EchoCanceller."
"PersistentMultichannelContentEverDetected"));
}
// Tests that after 25 seconds, metrics are reported.
TEST(MultiChannelContentDetectorMetrics, ReportsMetrics) {
metrics::Reset();
constexpr int kNumFramesPerSecond = 100;
std::vector<std::vector<std::vector<float>>> true_stereo_frame = {
{std::vector<float>(160, 100.0f), std::vector<float>(160, 101.0f)}};
std::vector<std::vector<std::vector<float>>> fake_stereo_frame = {
{std::vector<float>(160, 100.0f), std::vector<float>(160, 100.0f)}};
{
MultiChannelContentDetector mc(
/*detect_stereo_content=*/true,
/*num_render_input_channels=*/2,
/*detection_threshold=*/0.0f,
/*stereo_detection_timeout_threshold_seconds=*/1,
/*stereo_detection_hysteresis_seconds=*/0.0f);
for (int k = 0; k < 10 * kNumFramesPerSecond; ++k) {
mc.UpdateDetection(true_stereo_frame);
}
for (int k = 0; k < 15 * kNumFramesPerSecond; ++k) {
mc.UpdateDetection(fake_stereo_frame);
}
}
// After 10 seconds of true stereo and the remainder fake stereo, we expect
// one lifetime metric sample (multichannel detected) and two periodic samples
// (one multichannel, one mono).
// Check lifetime metric.
EXPECT_METRIC_EQ(
1, metrics::NumSamples("WebRTC.Audio.EchoCanceller."
"PersistentMultichannelContentEverDetected"));
EXPECT_METRIC_EQ(
1, metrics::NumEvents("WebRTC.Audio.EchoCanceller."
"PersistentMultichannelContentEverDetected",
1));
// Check periodic metric.
EXPECT_METRIC_EQ(
2, metrics::NumSamples("WebRTC.Audio.EchoCanceller."
"ProcessingPersistentMultichannelContent"));
EXPECT_METRIC_EQ(1,
metrics::NumEvents("WebRTC.Audio.EchoCanceller."
"ProcessingPersistentMultichannelContent",
0));
EXPECT_METRIC_EQ(1,
metrics::NumEvents("WebRTC.Audio.EchoCanceller."
"ProcessingPersistentMultichannelContent",
1));
}
} // namespace webrtc