#include "modules/audio_processing/agc2/rnn_vad/spectral_features_internal.h"
#include <algorithm>
#include <cmath>
#include "rtc_base/checks.h"
namespace webrtc {
namespace rnn_vad {
std::array<size_t, kNumBands> ComputeBandBoundaryIndexes(
size_t sample_rate_hz,
size_t frame_size_samples) {
std::array<size_t, kNumBands> indexes;
for (size_t i = 0; i < kNumBands; ++i) {
indexes[i] =
kBandFrequencyBoundaries[i] * frame_size_samples / sample_rate_hz;
return indexes;
void ComputeBandCoefficients(
rtc::FunctionView<float(size_t)> functor,
rtc::ArrayView<const size_t, kNumBands> band_boundaries,
size_t max_freq_bin_index,
rtc::ArrayView<float, kNumBands> coefficients) {
std::fill(coefficients.begin(), coefficients.end(), 0.f);
for (size_t i = 0; i < coefficients.size() - 1; ++i) {
RTC_DCHECK_EQ(0.f, coefficients[i + 1]);
RTC_DCHECK_GT(band_boundaries[i + 1], band_boundaries[i]);
const size_t first_freq_bin = band_boundaries[i];
const size_t last_freq_bin =
std::min(max_freq_bin_index, first_freq_bin + band_boundaries[i + 1] -
band_boundaries[i] - 1);
// Depending on the sample rate, the highest bands can have no FFT
// coefficients. Stop the iteration when coming across the first empty band.
if (first_freq_bin >= last_freq_bin)
const size_t band_size = last_freq_bin - first_freq_bin + 1;
// Compute the band coefficient using a triangular band with peak response
// at the band boundary.
for (size_t j = first_freq_bin; j <= last_freq_bin; ++j) {
const float w = static_cast<float>(j - first_freq_bin) / band_size;
const float coefficient = functor(j);
coefficients[i] += (1.f - w) * coefficient;
coefficients[i + 1] += w * coefficient;
// The first and the last bands in the loop above only got half contribution.
coefficients[0] *= 2.f;
coefficients[coefficients.size() - 1] *= 2.f;
// TODO( Replace the line above with
// "coefficients[i] *= 2.f" (*) since we now assume that the last band is
// always |kNumBands| - 1.
// (*): "size_t i" must be declared before the main loop.
void ComputeBandEnergies(
rtc::ArrayView<const std::complex<float>> fft_coeffs,
rtc::ArrayView<const size_t, kNumBands> band_boundaries,
rtc::ArrayView<float, kNumBands> band_energies) {
RTC_DCHECK_EQ(band_boundaries.size(), band_energies.size());
auto functor = [fft_coeffs](const size_t freq_bin_index) -> float {
return std::norm(fft_coeffs[freq_bin_index]);
ComputeBandCoefficients(functor, band_boundaries, fft_coeffs.size() - 1,
void ComputeLogBandEnergiesCoefficients(
rtc::ArrayView<const float, kNumBands> band_energy_coeffs,
rtc::ArrayView<float, kNumBands> log_band_energy_coeffs) {
float log_max = -2.f;
float follow = -2.f;
for (size_t i = 0; i < band_energy_coeffs.size(); ++i) {
log_band_energy_coeffs[i] = std::log10(1e-2f + band_energy_coeffs[i]);
// Smoothing across frequency bands.
log_band_energy_coeffs[i] = std::max(
log_max - 7.f, std::max(follow - 1.5f, log_band_energy_coeffs[i]));
log_max = std::max(log_max, log_band_energy_coeffs[i]);
follow = std::max(follow - 1.5f, log_band_energy_coeffs[i]);
std::array<float, kNumBands * kNumBands> ComputeDctTable() {
std::array<float, kNumBands * kNumBands> dct_table;
const double k = std::sqrt(0.5);
for (size_t i = 0; i < kNumBands; ++i) {
for (size_t j = 0; j < kNumBands; ++j)
dct_table[i * kNumBands + j] = std::cos((i + 0.5) * j * kPi / kNumBands);
dct_table[i * kNumBands] *= k;
return dct_table;
void ComputeDct(rtc::ArrayView<const float, kNumBands> in,
rtc::ArrayView<const float, kNumBands * kNumBands> dct_table,
rtc::ArrayView<float> out) {
RTC_DCHECK_NE(, << "In-place DCT is not supported.";
RTC_DCHECK_LE(1, out.size());
RTC_DCHECK_LE(out.size(), in.size());
std::fill(out.begin(), out.end(), 0.f);
// DCT scaling factor.
const float kDctScalingFactor = std::sqrt(2.f / kNumBands);
for (size_t i = 0; i < out.size(); ++i) {
for (size_t j = 0; j < in.size(); ++j) {
out[i] += in[j] * dct_table[j * in.size() + i];
out[i] *= kDctScalingFactor;
} // namespace rnn_vad
} // namespace webrtc