blob: d490fb5b83d002f817a210c11a7b9fbe4a14a106 [file] [log] [blame]
/*
* Copyright (c) 2025 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include "modules/audio_processing/capture_mixer/remixing_logic.h"
#include <cstddef>
#include <optional>
#include "api/array_view.h"
#include "modules/audio_processing/capture_mixer/channel_content_remixer.h"
#include "rtc_base/checks.h"
namespace webrtc {
namespace {
constexpr int kInactivityThresholdFrames = 100;
bool ChoiceOfChannelMatchesSingleChannelMixing(int channel,
StereoMixingVariant mixing) {
if (channel == 0 && mixing == StereoMixingVariant::kUseChannel0) {
return true;
}
if (channel == 1 && mixing == StereoMixingVariant::kUseChannel1) {
return true;
}
return false;
}
bool EnoughContentForUpdatingMixing(
ArrayView<const int, 2> num_frames_since_activity) {
const bool channel0_inactive =
num_frames_since_activity[0] > kInactivityThresholdFrames;
const bool channel1_inactive =
num_frames_since_activity[1] > kInactivityThresholdFrames;
return !(channel0_inactive && channel1_inactive);
}
bool SingleSilentChannelDetected(
size_t num_samples_per_channel,
ArrayView<const float, 2> average_energies,
ArrayView<const int, 2> num_frames_since_activity) {
RTC_DCHECK(EnoughContentForUpdatingMixing(num_frames_since_activity));
const bool channel0_inactive =
num_frames_since_activity[0] > kInactivityThresholdFrames;
const bool channel1_inactive =
num_frames_since_activity[1] > kInactivityThresholdFrames;
RTC_DCHECK(!(channel0_inactive && channel1_inactive));
const float absolute_energy_threshold =
100.0f * 100.0f * num_samples_per_channel;
constexpr float kRelativeEnergyThreshold = 100.0f;
if (channel0_inactive) {
return average_energies[0] < absolute_energy_threshold &&
average_energies[0] * kRelativeEnergyThreshold < average_energies[1];
}
if (channel1_inactive) {
return average_energies[1] < absolute_energy_threshold &&
average_energies[1] * kRelativeEnergyThreshold < average_energies[0];
}
return false;
}
std::optional<int> IdentifyLargelyImbalancedChannel(
ArrayView<const float, 2> average_energies) {
constexpr float kEnergyRatioThreshold = 50.0f;
const float& energy0 = average_energies[0];
const float& energy1 = average_energies[1];
const bool large_energy_imbalance =
energy0 > kEnergyRatioThreshold * energy1 ||
energy1 > kEnergyRatioThreshold * energy0;
if (large_energy_imbalance) {
return energy0 > energy1 ? 0 : 1;
}
return std::nullopt;
}
std::optional<int> IdentifyModerateImbalancedAndSaturatedChannel(
ArrayView<const float, 2> average_energies,
ArrayView<const float, 2> saturation_factors) {
constexpr float kEnergyRatioModerateThreshold = 4.0f;
constexpr float kSignificantSaturationThreshold = 0.8f;
constexpr float kNoSaturationThreshold = 0.1f;
const float& energy0 = average_energies[0];
const float& energy1 = average_energies[1];
const float& saturation0 = saturation_factors[0];
const float& saturation1 = saturation_factors[1];
// Rely on that large energy imbalances have been handled before calling the
// function.
if (IdentifyLargelyImbalancedChannel(average_energies).has_value()) {
return std::nullopt;
}
// Detect if any, and in that case which, channel would be preferable from a
// saturation perspective.
if (energy0 > kEnergyRatioModerateThreshold * energy1 &&
saturation0 > kSignificantSaturationThreshold &&
saturation1 < kNoSaturationThreshold) {
return 1;
}
if (energy1 > kEnergyRatioModerateThreshold * energy0 &&
saturation1 > kSignificantSaturationThreshold &&
saturation0 < kNoSaturationThreshold) {
return 0;
}
return std::nullopt;
}
} // namespace
RemixingLogic::RemixingLogic(size_t num_samples_per_channel)
: RemixingLogic(num_samples_per_channel, Settings()) {}
RemixingLogic::RemixingLogic(size_t num_samples_per_channel,
const Settings& settings)
: settings_(settings), num_samples_per_channel_(num_samples_per_channel) {}
StereoMixingVariant RemixingLogic::SelectStereoChannelMixing(
ArrayView<const float, 2> average_energies,
ArrayView<const int, 2> num_frames_since_activity,
ArrayView<const float, 2> saturation_factors) {
// Only update the mixing when there is sufficient audio activity.
if (!EnoughContentForUpdatingMixing(num_frames_since_activity)) {
return mixing_;
}
// Handle mixing variants in an order of precedence.
// Handle the case when audio is active in only one channel.
if (settings_.silent_channel_handling) {
if (HandleAnySilentChannels(average_energies, num_frames_since_activity)) {
RTC_DCHECK_EQ(mode_, Mode::kSilentChannel);
RTC_DCHECK_EQ(mixing_, StereoMixingVariant::kUseAverage);
return mixing_;
}
}
// Handle the case when the energy content in the channels is very imbalanced.
if (settings_.largely_imbalanced_handling) {
if (HandleAnyLargelyImbalancedChannels(average_energies)) {
RTC_DCHECK_EQ(mode_, Mode::kImbalancedChannels);
RTC_DCHECK(mixing_ == StereoMixingVariant::kUseChannel0 ||
mixing_ == StereoMixingVariant::kUseChannel1);
return mixing_;
}
}
// Handle the case when audio is more saturated in one of the channels than
// the other, but the energy content in the channels is still fairly balanced.
if (settings_.imbalanced_and_saturated_channel_handling) {
if (HandleAnyImbalancedAndSaturatedChannels(average_energies,
saturation_factors)) {
RTC_DCHECK_EQ(mode_, Mode::kSaturatedChannel);
RTC_DCHECK(mixing_ == StereoMixingVariant::kUseChannel0 ||
mixing_ == StereoMixingVariant::kUseChannel1);
return mixing_;
}
}
RTC_DCHECK_EQ(mode_, Mode::kIdle);
mixing_ = StereoMixingVariant::kUseBothChannels;
return mixing_;
}
bool RemixingLogic::HandleAnySilentChannels(
ArrayView<const float, 2> average_energies,
ArrayView<const int, 2> num_frames_since_activity) {
RTC_DCHECK(mode_ != Mode::kSilentChannel ||
mixing_ == StereoMixingVariant::kUseAverage);
bool inactive_channel_detected = SingleSilentChannelDetected(
num_samples_per_channel_, average_energies, num_frames_since_activity);
// If the remixing is not in silent channel handling mode, and no inactive
// channels have been detected there is no need to take any action.
if (mode_ != Mode::kSilentChannel && !inactive_channel_detected) {
return false;
}
// If inactive channels have been detected, reset frame counter and enter the
// mode for silent channel handling. Set mixing to use the average of the
// channels as a safe fallback.
if (inactive_channel_detected) {
num_frames_since_mode_triggered_ = 0;
mode_ = Mode::kSilentChannel;
mixing_ = StereoMixingVariant::kUseAverage;
return true;
}
// Once no inactive channels are no longer detected, wait for a certain time
// before exiting silent channel detection mode.
constexpr int kNumFramesForModeExit = 10 * 100;
if (++num_frames_since_mode_triggered_ > kNumFramesForModeExit) {
mode_ = Mode::kIdle;
num_frames_since_mode_triggered_ = 0;
return false;
}
return true;
}
bool RemixingLogic::HandleAnyImbalancedAndSaturatedChannels(
ArrayView<const float, 2> average_energies,
ArrayView<const float, 2> saturation_factors) {
RTC_DCHECK(mode_ != Mode::kSaturatedChannel ||
(mixing_ == StereoMixingVariant::kUseChannel0 ||
mixing_ == StereoMixingVariant::kUseChannel1));
std::optional<int> single_channel_to_use =
IdentifyModerateImbalancedAndSaturatedChannel(average_energies,
saturation_factors);
// If the remixing is not in saturated channel handling mode, and no
// preferable single channel was detected to be used, there is no further
// action to take.
if (mode_ != Mode::kSaturatedChannel && !single_channel_to_use.has_value()) {
return false;
}
// If a single channel to used was identified and that matches the
// single-channel selection which is currently in use, reset frame counter and
// enter the mode for handling saturated channels. Set mixing to use the
// appropriate channel.
if (single_channel_to_use.has_value() &&
(mode_ != Mode::kSaturatedChannel ||
ChoiceOfChannelMatchesSingleChannelMixing(single_channel_to_use.value(),
mixing_))) {
num_frames_since_mode_triggered_ = 0;
StereoMixingVariant mixing = single_channel_to_use.value() == 0
? StereoMixingVariant::kUseChannel0
: StereoMixingVariant::kUseChannel1;
RTC_DCHECK(mode_ != Mode::kSaturatedChannel || mixing == mixing_);
mode_ = Mode::kSaturatedChannel;
mixing_ = mixing;
return true;
}
// If a preferable channel is no longer detected, wait for a certain time
// before exiting the mode for handling saturated channels.
constexpr int kNumFramesForModeExit = 300;
if (++num_frames_since_mode_triggered_ > kNumFramesForModeExit) {
mode_ = Mode::kIdle;
num_frames_since_mode_triggered_ = 0;
mixing_ = StereoMixingVariant::kUseAverage;
return false;
}
return true;
}
bool RemixingLogic::HandleAnyLargelyImbalancedChannels(
ArrayView<const float, 2> average_energies) {
RTC_DCHECK(mode_ != Mode::kImbalancedChannels ||
(mixing_ == StereoMixingVariant::kUseChannel0 ||
mixing_ == StereoMixingVariant::kUseChannel1));
std::optional<int> single_channel_to_use =
IdentifyLargelyImbalancedChannel(average_energies);
// If the remixing is not in imbalanced channel handling mode, and no channels
// with large imbalance have been detected there is no need to take any
// action.
if (mode_ != Mode::kImbalancedChannels &&
!single_channel_to_use.has_value()) {
return false;
}
// If the single channel to used was matches the single-channel selection
// which is currently in use, reset frame counter and enter the mode for
// handling imbalanced channels. Set mixing to use the appropriate channel.
if (single_channel_to_use.has_value() &&
(mode_ != Mode::kImbalancedChannels ||
ChoiceOfChannelMatchesSingleChannelMixing(single_channel_to_use.value(),
mixing_))) {
num_frames_since_mode_triggered_ = 0;
mode_ = Mode::kImbalancedChannels;
mixing_ = single_channel_to_use.value() == 0
? StereoMixingVariant::kUseChannel0
: StereoMixingVariant::kUseChannel1;
return true;
}
// If a channel imbalance is no longer detected, wait for a certain time
// before exiting the mode for handling saturated channels.
constexpr int kNumFramesForModeExit = 300;
if (++num_frames_since_mode_triggered_ > kNumFramesForModeExit) {
mode_ = Mode::kIdle;
num_frames_since_mode_triggered_ = 0;
mixing_ = StereoMixingVariant::kUseAverage;
return false;
}
return true;
}
} // namespace webrtc