| /* |
| * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. |
| * |
| * Use of this source code is governed by a BSD-style license |
| * that can be found in the LICENSE file in the root of the source |
| * tree. An additional intellectual property rights grant can be found |
| * in the file PATENTS. All contributing project authors may |
| * be found in the AUTHORS file in the root of the source tree. |
| */ |
| |
| #include <math.h> |
| #include <string.h> |
| #include <stdlib.h> |
| |
| #include "webrtc/rtc_base/checks.h" |
| #include "webrtc/common_audio/fft4g.h" |
| #include "webrtc/common_audio/signal_processing/include/signal_processing_library.h" |
| #include "webrtc/modules/audio_processing/ns/noise_suppression.h" |
| #include "webrtc/modules/audio_processing/ns/ns_core.h" |
| #include "webrtc/modules/audio_processing/ns/windows_private.h" |
| |
| // Set Feature Extraction Parameters. |
| static void set_feature_extraction_parameters(NoiseSuppressionC* self) { |
| // Bin size of histogram. |
| self->featureExtractionParams.binSizeLrt = 0.1f; |
| self->featureExtractionParams.binSizeSpecFlat = 0.05f; |
| self->featureExtractionParams.binSizeSpecDiff = 0.1f; |
| |
| // Range of histogram over which LRT threshold is computed. |
| self->featureExtractionParams.rangeAvgHistLrt = 1.f; |
| |
| // Scale parameters: multiply dominant peaks of the histograms by scale factor |
| // to obtain thresholds for prior model. |
| // For LRT and spectral difference. |
| self->featureExtractionParams.factor1ModelPars = 1.2f; |
| // For spectral_flatness: used when noise is flatter than speech. |
| self->featureExtractionParams.factor2ModelPars = 0.9f; |
| |
| // Peak limit for spectral flatness (varies between 0 and 1). |
| self->featureExtractionParams.thresPosSpecFlat = 0.6f; |
| |
| // Limit on spacing of two highest peaks in histogram: spacing determined by |
| // bin size. |
| self->featureExtractionParams.limitPeakSpacingSpecFlat = |
| 2 * self->featureExtractionParams.binSizeSpecFlat; |
| self->featureExtractionParams.limitPeakSpacingSpecDiff = |
| 2 * self->featureExtractionParams.binSizeSpecDiff; |
| |
| // Limit on relevance of second peak. |
| self->featureExtractionParams.limitPeakWeightsSpecFlat = 0.5f; |
| self->featureExtractionParams.limitPeakWeightsSpecDiff = 0.5f; |
| |
| // Fluctuation limit of LRT feature. |
| self->featureExtractionParams.thresFluctLrt = 0.05f; |
| |
| // Limit on the max and min values for the feature thresholds. |
| self->featureExtractionParams.maxLrt = 1.f; |
| self->featureExtractionParams.minLrt = 0.2f; |
| |
| self->featureExtractionParams.maxSpecFlat = 0.95f; |
| self->featureExtractionParams.minSpecFlat = 0.1f; |
| |
| self->featureExtractionParams.maxSpecDiff = 1.f; |
| self->featureExtractionParams.minSpecDiff = 0.16f; |
| |
| // Criteria of weight of histogram peak to accept/reject feature. |
| self->featureExtractionParams.thresWeightSpecFlat = |
| (int)(0.3 * (self->modelUpdatePars[1])); // For spectral flatness. |
| self->featureExtractionParams.thresWeightSpecDiff = |
| (int)(0.3 * (self->modelUpdatePars[1])); // For spectral difference. |
| } |
| |
| // Initialize state. |
| int WebRtcNs_InitCore(NoiseSuppressionC* self, uint32_t fs) { |
| int i; |
| // Check for valid pointer. |
| if (self == NULL) { |
| return -1; |
| } |
| |
| // Initialization of struct. |
| if (fs == 8000 || fs == 16000 || fs == 32000 || fs == 48000) { |
| self->fs = fs; |
| } else { |
| return -1; |
| } |
| self->windShift = 0; |
| // We only support 10ms frames. |
| if (fs == 8000) { |
| self->blockLen = 80; |
| self->anaLen = 128; |
| self->window = kBlocks80w128; |
| } else { |
| self->blockLen = 160; |
| self->anaLen = 256; |
| self->window = kBlocks160w256; |
| } |
| self->magnLen = self->anaLen / 2 + 1; // Number of frequency bins. |
| |
| // Initialize FFT work arrays. |
| self->ip[0] = 0; // Setting this triggers initialization. |
| memset(self->dataBuf, 0, sizeof(float) * ANAL_BLOCKL_MAX); |
| WebRtc_rdft(self->anaLen, 1, self->dataBuf, self->ip, self->wfft); |
| |
| memset(self->analyzeBuf, 0, sizeof(float) * ANAL_BLOCKL_MAX); |
| memset(self->dataBuf, 0, sizeof(float) * ANAL_BLOCKL_MAX); |
| memset(self->syntBuf, 0, sizeof(float) * ANAL_BLOCKL_MAX); |
| |
| // For HB processing. |
| memset(self->dataBufHB, |
| 0, |
| sizeof(float) * NUM_HIGH_BANDS_MAX * ANAL_BLOCKL_MAX); |
| |
| // For quantile noise estimation. |
| memset(self->quantile, 0, sizeof(float) * HALF_ANAL_BLOCKL); |
| for (i = 0; i < SIMULT * HALF_ANAL_BLOCKL; i++) { |
| self->lquantile[i] = 8.f; |
| self->density[i] = 0.3f; |
| } |
| |
| for (i = 0; i < SIMULT; i++) { |
| self->counter[i] = |
| (int)floor((float)(END_STARTUP_LONG * (i + 1)) / (float)SIMULT); |
| } |
| |
| self->updates = 0; |
| |
| // Wiener filter initialization. |
| for (i = 0; i < HALF_ANAL_BLOCKL; i++) { |
| self->smooth[i] = 1.f; |
| } |
| |
| // Set the aggressiveness: default. |
| self->aggrMode = 0; |
| |
| // Initialize variables for new method. |
| self->priorSpeechProb = 0.5f; // Prior prob for speech/noise. |
| // Previous analyze mag spectrum. |
| memset(self->magnPrevAnalyze, 0, sizeof(float) * HALF_ANAL_BLOCKL); |
| // Previous process mag spectrum. |
| memset(self->magnPrevProcess, 0, sizeof(float) * HALF_ANAL_BLOCKL); |
| // Current noise-spectrum. |
| memset(self->noise, 0, sizeof(float) * HALF_ANAL_BLOCKL); |
| // Previous noise-spectrum. |
| memset(self->noisePrev, 0, sizeof(float) * HALF_ANAL_BLOCKL); |
| // Conservative noise spectrum estimate. |
| memset(self->magnAvgPause, 0, sizeof(float) * HALF_ANAL_BLOCKL); |
| // For estimation of HB in second pass. |
| memset(self->speechProb, 0, sizeof(float) * HALF_ANAL_BLOCKL); |
| // Initial average magnitude spectrum. |
| memset(self->initMagnEst, 0, sizeof(float) * HALF_ANAL_BLOCKL); |
| for (i = 0; i < HALF_ANAL_BLOCKL; i++) { |
| // Smooth LR (same as threshold). |
| self->logLrtTimeAvg[i] = LRT_FEATURE_THR; |
| } |
| |
| // Feature quantities. |
| // Spectral flatness (start on threshold). |
| self->featureData[0] = SF_FEATURE_THR; |
| self->featureData[1] = 0.f; // Spectral entropy: not used in this version. |
| self->featureData[2] = 0.f; // Spectral variance: not used in this version. |
| // Average LRT factor (start on threshold). |
| self->featureData[3] = LRT_FEATURE_THR; |
| // Spectral template diff (start on threshold). |
| self->featureData[4] = SF_FEATURE_THR; |
| self->featureData[5] = 0.f; // Normalization for spectral difference. |
| // Window time-average of input magnitude spectrum. |
| self->featureData[6] = 0.f; |
| |
| // Histogram quantities: used to estimate/update thresholds for features. |
| memset(self->histLrt, 0, sizeof(int) * HIST_PAR_EST); |
| memset(self->histSpecFlat, 0, sizeof(int) * HIST_PAR_EST); |
| memset(self->histSpecDiff, 0, sizeof(int) * HIST_PAR_EST); |
| |
| |
| self->blockInd = -1; // Frame counter. |
| // Default threshold for LRT feature. |
| self->priorModelPars[0] = LRT_FEATURE_THR; |
| // Threshold for spectral flatness: determined on-line. |
| self->priorModelPars[1] = 0.5f; |
| // sgn_map par for spectral measure: 1 for flatness measure. |
| self->priorModelPars[2] = 1.f; |
| // Threshold for template-difference feature: determined on-line. |
| self->priorModelPars[3] = 0.5f; |
| // Default weighting parameter for LRT feature. |
| self->priorModelPars[4] = 1.f; |
| // Default weighting parameter for spectral flatness feature. |
| self->priorModelPars[5] = 0.f; |
| // Default weighting parameter for spectral difference feature. |
| self->priorModelPars[6] = 0.f; |
| |
| // Update flag for parameters: |
| // 0 no update, 1 = update once, 2 = update every window. |
| self->modelUpdatePars[0] = 2; |
| self->modelUpdatePars[1] = 500; // Window for update. |
| // Counter for update of conservative noise spectrum. |
| self->modelUpdatePars[2] = 0; |
| // Counter if the feature thresholds are updated during the sequence. |
| self->modelUpdatePars[3] = self->modelUpdatePars[1]; |
| |
| self->signalEnergy = 0.0; |
| self->sumMagn = 0.0; |
| self->whiteNoiseLevel = 0.0; |
| self->pinkNoiseNumerator = 0.0; |
| self->pinkNoiseExp = 0.0; |
| |
| set_feature_extraction_parameters(self); |
| |
| // Default mode. |
| WebRtcNs_set_policy_core(self, 0); |
| |
| self->initFlag = 1; |
| return 0; |
| } |
| |
| // Estimate noise. |
| static void NoiseEstimation(NoiseSuppressionC* self, |
| float* magn, |
| float* noise) { |
| size_t i, s, offset; |
| float lmagn[HALF_ANAL_BLOCKL], delta; |
| |
| if (self->updates < END_STARTUP_LONG) { |
| self->updates++; |
| } |
| |
| for (i = 0; i < self->magnLen; i++) { |
| lmagn[i] = (float)log(magn[i]); |
| } |
| |
| // Loop over simultaneous estimates. |
| for (s = 0; s < SIMULT; s++) { |
| offset = s * self->magnLen; |
| |
| // newquantest(...) |
| for (i = 0; i < self->magnLen; i++) { |
| // Compute delta. |
| if (self->density[offset + i] > 1.0) { |
| delta = FACTOR * 1.f / self->density[offset + i]; |
| } else { |
| delta = FACTOR; |
| } |
| |
| // Update log quantile estimate. |
| if (lmagn[i] > self->lquantile[offset + i]) { |
| self->lquantile[offset + i] += |
| QUANTILE * delta / (float)(self->counter[s] + 1); |
| } else { |
| self->lquantile[offset + i] -= |
| (1.f - QUANTILE) * delta / (float)(self->counter[s] + 1); |
| } |
| |
| // Update density estimate. |
| if (fabs(lmagn[i] - self->lquantile[offset + i]) < WIDTH) { |
| self->density[offset + i] = |
| ((float)self->counter[s] * self->density[offset + i] + |
| 1.f / (2.f * WIDTH)) / |
| (float)(self->counter[s] + 1); |
| } |
| } // End loop over magnitude spectrum. |
| |
| if (self->counter[s] >= END_STARTUP_LONG) { |
| self->counter[s] = 0; |
| if (self->updates >= END_STARTUP_LONG) { |
| for (i = 0; i < self->magnLen; i++) { |
| self->quantile[i] = (float)exp(self->lquantile[offset + i]); |
| } |
| } |
| } |
| |
| self->counter[s]++; |
| } // End loop over simultaneous estimates. |
| |
| // Sequentially update the noise during startup. |
| if (self->updates < END_STARTUP_LONG) { |
| // Use the last "s" to get noise during startup that differ from zero. |
| for (i = 0; i < self->magnLen; i++) { |
| self->quantile[i] = (float)exp(self->lquantile[offset + i]); |
| } |
| } |
| |
| for (i = 0; i < self->magnLen; i++) { |
| noise[i] = self->quantile[i]; |
| } |
| } |
| |
| // Extract thresholds for feature parameters. |
| // Histograms are computed over some window size (given by |
| // self->modelUpdatePars[1]). |
| // Thresholds and weights are extracted every window. |
| // |flag| = 0 updates histogram only, |flag| = 1 computes the threshold/weights. |
| // Threshold and weights are returned in: self->priorModelPars. |
| static void FeatureParameterExtraction(NoiseSuppressionC* self, int flag) { |
| int i, useFeatureSpecFlat, useFeatureSpecDiff, numHistLrt; |
| int maxPeak1, maxPeak2; |
| int weightPeak1SpecFlat, weightPeak2SpecFlat, weightPeak1SpecDiff, |
| weightPeak2SpecDiff; |
| |
| float binMid, featureSum; |
| float posPeak1SpecFlat, posPeak2SpecFlat, posPeak1SpecDiff, posPeak2SpecDiff; |
| float fluctLrt, avgHistLrt, avgSquareHistLrt, avgHistLrtCompl; |
| |
| // 3 features: LRT, flatness, difference. |
| // lrt_feature = self->featureData[3]; |
| // flat_feature = self->featureData[0]; |
| // diff_feature = self->featureData[4]; |
| |
| // Update histograms. |
| if (flag == 0) { |
| // LRT |
| if ((self->featureData[3] < |
| HIST_PAR_EST * self->featureExtractionParams.binSizeLrt) && |
| (self->featureData[3] >= 0.0)) { |
| i = (int)(self->featureData[3] / |
| self->featureExtractionParams.binSizeLrt); |
| self->histLrt[i]++; |
| } |
| // Spectral flatness. |
| if ((self->featureData[0] < |
| HIST_PAR_EST * self->featureExtractionParams.binSizeSpecFlat) && |
| (self->featureData[0] >= 0.0)) { |
| i = (int)(self->featureData[0] / |
| self->featureExtractionParams.binSizeSpecFlat); |
| self->histSpecFlat[i]++; |
| } |
| // Spectral difference. |
| if ((self->featureData[4] < |
| HIST_PAR_EST * self->featureExtractionParams.binSizeSpecDiff) && |
| (self->featureData[4] >= 0.0)) { |
| i = (int)(self->featureData[4] / |
| self->featureExtractionParams.binSizeSpecDiff); |
| self->histSpecDiff[i]++; |
| } |
| } |
| |
| // Extract parameters for speech/noise probability. |
| if (flag == 1) { |
| // LRT feature: compute the average over |
| // self->featureExtractionParams.rangeAvgHistLrt. |
| avgHistLrt = 0.0; |
| avgHistLrtCompl = 0.0; |
| avgSquareHistLrt = 0.0; |
| numHistLrt = 0; |
| for (i = 0; i < HIST_PAR_EST; i++) { |
| binMid = ((float)i + 0.5f) * self->featureExtractionParams.binSizeLrt; |
| if (binMid <= self->featureExtractionParams.rangeAvgHistLrt) { |
| avgHistLrt += self->histLrt[i] * binMid; |
| numHistLrt += self->histLrt[i]; |
| } |
| avgSquareHistLrt += self->histLrt[i] * binMid * binMid; |
| avgHistLrtCompl += self->histLrt[i] * binMid; |
| } |
| if (numHistLrt > 0) { |
| avgHistLrt = avgHistLrt / ((float)numHistLrt); |
| } |
| avgHistLrtCompl = avgHistLrtCompl / ((float)self->modelUpdatePars[1]); |
| avgSquareHistLrt = avgSquareHistLrt / ((float)self->modelUpdatePars[1]); |
| fluctLrt = avgSquareHistLrt - avgHistLrt * avgHistLrtCompl; |
| // Get threshold for LRT feature. |
| if (fluctLrt < self->featureExtractionParams.thresFluctLrt) { |
| // Very low fluctuation, so likely noise. |
| self->priorModelPars[0] = self->featureExtractionParams.maxLrt; |
| } else { |
| self->priorModelPars[0] = |
| self->featureExtractionParams.factor1ModelPars * avgHistLrt; |
| // Check if value is within min/max range. |
| if (self->priorModelPars[0] < self->featureExtractionParams.minLrt) { |
| self->priorModelPars[0] = self->featureExtractionParams.minLrt; |
| } |
| if (self->priorModelPars[0] > self->featureExtractionParams.maxLrt) { |
| self->priorModelPars[0] = self->featureExtractionParams.maxLrt; |
| } |
| } |
| // Done with LRT feature. |
| |
| // For spectral flatness and spectral difference: compute the main peaks of |
| // histogram. |
| maxPeak1 = 0; |
| maxPeak2 = 0; |
| posPeak1SpecFlat = 0.0; |
| posPeak2SpecFlat = 0.0; |
| weightPeak1SpecFlat = 0; |
| weightPeak2SpecFlat = 0; |
| |
| // Peaks for flatness. |
| for (i = 0; i < HIST_PAR_EST; i++) { |
| binMid = |
| (i + 0.5f) * self->featureExtractionParams.binSizeSpecFlat; |
| if (self->histSpecFlat[i] > maxPeak1) { |
| // Found new "first" peak. |
| maxPeak2 = maxPeak1; |
| weightPeak2SpecFlat = weightPeak1SpecFlat; |
| posPeak2SpecFlat = posPeak1SpecFlat; |
| |
| maxPeak1 = self->histSpecFlat[i]; |
| weightPeak1SpecFlat = self->histSpecFlat[i]; |
| posPeak1SpecFlat = binMid; |
| } else if (self->histSpecFlat[i] > maxPeak2) { |
| // Found new "second" peak. |
| maxPeak2 = self->histSpecFlat[i]; |
| weightPeak2SpecFlat = self->histSpecFlat[i]; |
| posPeak2SpecFlat = binMid; |
| } |
| } |
| |
| // Compute two peaks for spectral difference. |
| maxPeak1 = 0; |
| maxPeak2 = 0; |
| posPeak1SpecDiff = 0.0; |
| posPeak2SpecDiff = 0.0; |
| weightPeak1SpecDiff = 0; |
| weightPeak2SpecDiff = 0; |
| // Peaks for spectral difference. |
| for (i = 0; i < HIST_PAR_EST; i++) { |
| binMid = |
| ((float)i + 0.5f) * self->featureExtractionParams.binSizeSpecDiff; |
| if (self->histSpecDiff[i] > maxPeak1) { |
| // Found new "first" peak. |
| maxPeak2 = maxPeak1; |
| weightPeak2SpecDiff = weightPeak1SpecDiff; |
| posPeak2SpecDiff = posPeak1SpecDiff; |
| |
| maxPeak1 = self->histSpecDiff[i]; |
| weightPeak1SpecDiff = self->histSpecDiff[i]; |
| posPeak1SpecDiff = binMid; |
| } else if (self->histSpecDiff[i] > maxPeak2) { |
| // Found new "second" peak. |
| maxPeak2 = self->histSpecDiff[i]; |
| weightPeak2SpecDiff = self->histSpecDiff[i]; |
| posPeak2SpecDiff = binMid; |
| } |
| } |
| |
| // For spectrum flatness feature. |
| useFeatureSpecFlat = 1; |
| // Merge the two peaks if they are close. |
| if ((fabs(posPeak2SpecFlat - posPeak1SpecFlat) < |
| self->featureExtractionParams.limitPeakSpacingSpecFlat) && |
| (weightPeak2SpecFlat > |
| self->featureExtractionParams.limitPeakWeightsSpecFlat * |
| weightPeak1SpecFlat)) { |
| weightPeak1SpecFlat += weightPeak2SpecFlat; |
| posPeak1SpecFlat = 0.5f * (posPeak1SpecFlat + posPeak2SpecFlat); |
| } |
| // Reject if weight of peaks is not large enough, or peak value too small. |
| if (weightPeak1SpecFlat < |
| self->featureExtractionParams.thresWeightSpecFlat || |
| posPeak1SpecFlat < self->featureExtractionParams.thresPosSpecFlat) { |
| useFeatureSpecFlat = 0; |
| } |
| // If selected, get the threshold. |
| if (useFeatureSpecFlat == 1) { |
| // Compute the threshold. |
| self->priorModelPars[1] = |
| self->featureExtractionParams.factor2ModelPars * posPeak1SpecFlat; |
| // Check if value is within min/max range. |
| if (self->priorModelPars[1] < self->featureExtractionParams.minSpecFlat) { |
| self->priorModelPars[1] = self->featureExtractionParams.minSpecFlat; |
| } |
| if (self->priorModelPars[1] > self->featureExtractionParams.maxSpecFlat) { |
| self->priorModelPars[1] = self->featureExtractionParams.maxSpecFlat; |
| } |
| } |
| // Done with flatness feature. |
| |
| // For template feature. |
| useFeatureSpecDiff = 1; |
| // Merge the two peaks if they are close. |
| if ((fabs(posPeak2SpecDiff - posPeak1SpecDiff) < |
| self->featureExtractionParams.limitPeakSpacingSpecDiff) && |
| (weightPeak2SpecDiff > |
| self->featureExtractionParams.limitPeakWeightsSpecDiff * |
| weightPeak1SpecDiff)) { |
| weightPeak1SpecDiff += weightPeak2SpecDiff; |
| posPeak1SpecDiff = 0.5f * (posPeak1SpecDiff + posPeak2SpecDiff); |
| } |
| // Get the threshold value. |
| self->priorModelPars[3] = |
| self->featureExtractionParams.factor1ModelPars * posPeak1SpecDiff; |
| // Reject if weight of peaks is not large enough. |
| if (weightPeak1SpecDiff < |
| self->featureExtractionParams.thresWeightSpecDiff) { |
| useFeatureSpecDiff = 0; |
| } |
| // Check if value is within min/max range. |
| if (self->priorModelPars[3] < self->featureExtractionParams.minSpecDiff) { |
| self->priorModelPars[3] = self->featureExtractionParams.minSpecDiff; |
| } |
| if (self->priorModelPars[3] > self->featureExtractionParams.maxSpecDiff) { |
| self->priorModelPars[3] = self->featureExtractionParams.maxSpecDiff; |
| } |
| // Done with spectral difference feature. |
| |
| // Don't use template feature if fluctuation of LRT feature is very low: |
| // most likely just noise state. |
| if (fluctLrt < self->featureExtractionParams.thresFluctLrt) { |
| useFeatureSpecDiff = 0; |
| } |
| |
| // Select the weights between the features. |
| // self->priorModelPars[4] is weight for LRT: always selected. |
| // self->priorModelPars[5] is weight for spectral flatness. |
| // self->priorModelPars[6] is weight for spectral difference. |
| featureSum = (float)(1 + useFeatureSpecFlat + useFeatureSpecDiff); |
| self->priorModelPars[4] = 1.f / featureSum; |
| self->priorModelPars[5] = ((float)useFeatureSpecFlat) / featureSum; |
| self->priorModelPars[6] = ((float)useFeatureSpecDiff) / featureSum; |
| |
| // Set hists to zero for next update. |
| if (self->modelUpdatePars[0] >= 1) { |
| for (i = 0; i < HIST_PAR_EST; i++) { |
| self->histLrt[i] = 0; |
| self->histSpecFlat[i] = 0; |
| self->histSpecDiff[i] = 0; |
| } |
| } |
| } // End of flag == 1. |
| } |
| |
| // Compute spectral flatness on input spectrum. |
| // |magnIn| is the magnitude spectrum. |
| // Spectral flatness is returned in self->featureData[0]. |
| static void ComputeSpectralFlatness(NoiseSuppressionC* self, |
| const float* magnIn) { |
| size_t i; |
| size_t shiftLP = 1; // Option to remove first bin(s) from spectral measures. |
| float avgSpectralFlatnessNum, avgSpectralFlatnessDen, spectralTmp; |
| |
| // Compute spectral measures. |
| // For flatness. |
| avgSpectralFlatnessNum = 0.0; |
| avgSpectralFlatnessDen = self->sumMagn; |
| for (i = 0; i < shiftLP; i++) { |
| avgSpectralFlatnessDen -= magnIn[i]; |
| } |
| // Compute log of ratio of the geometric to arithmetic mean: check for log(0) |
| // case. |
| for (i = shiftLP; i < self->magnLen; i++) { |
| if (magnIn[i] > 0.0) { |
| avgSpectralFlatnessNum += (float)log(magnIn[i]); |
| } else { |
| self->featureData[0] -= SPECT_FL_TAVG * self->featureData[0]; |
| return; |
| } |
| } |
| // Normalize. |
| avgSpectralFlatnessDen = avgSpectralFlatnessDen / self->magnLen; |
| avgSpectralFlatnessNum = avgSpectralFlatnessNum / self->magnLen; |
| |
| // Ratio and inverse log: check for case of log(0). |
| spectralTmp = (float)exp(avgSpectralFlatnessNum) / avgSpectralFlatnessDen; |
| |
| // Time-avg update of spectral flatness feature. |
| self->featureData[0] += SPECT_FL_TAVG * (spectralTmp - self->featureData[0]); |
| // Done with flatness feature. |
| } |
| |
| // Compute prior and post SNR based on quantile noise estimation. |
| // Compute DD estimate of prior SNR. |
| // Inputs: |
| // * |magn| is the signal magnitude spectrum estimate. |
| // * |noise| is the magnitude noise spectrum estimate. |
| // Outputs: |
| // * |snrLocPrior| is the computed prior SNR. |
| // * |snrLocPost| is the computed post SNR. |
| static void ComputeSnr(const NoiseSuppressionC* self, |
| const float* magn, |
| const float* noise, |
| float* snrLocPrior, |
| float* snrLocPost) { |
| size_t i; |
| |
| for (i = 0; i < self->magnLen; i++) { |
| // Previous post SNR. |
| // Previous estimate: based on previous frame with gain filter. |
| float previousEstimateStsa = self->magnPrevAnalyze[i] / |
| (self->noisePrev[i] + 0.0001f) * self->smooth[i]; |
| // Post SNR. |
| snrLocPost[i] = 0.f; |
| if (magn[i] > noise[i]) { |
| snrLocPost[i] = magn[i] / (noise[i] + 0.0001f) - 1.f; |
| } |
| // DD estimate is sum of two terms: current estimate and previous estimate. |
| // Directed decision update of snrPrior. |
| snrLocPrior[i] = |
| DD_PR_SNR * previousEstimateStsa + (1.f - DD_PR_SNR) * snrLocPost[i]; |
| } // End of loop over frequencies. |
| } |
| |
| // Compute the difference measure between input spectrum and a template/learned |
| // noise spectrum. |
| // |magnIn| is the input spectrum. |
| // The reference/template spectrum is self->magnAvgPause[i]. |
| // Returns (normalized) spectral difference in self->featureData[4]. |
| static void ComputeSpectralDifference(NoiseSuppressionC* self, |
| const float* magnIn) { |
| // avgDiffNormMagn = var(magnIn) - cov(magnIn, magnAvgPause)^2 / |
| // var(magnAvgPause) |
| size_t i; |
| float avgPause, avgMagn, covMagnPause, varPause, varMagn, avgDiffNormMagn; |
| |
| avgPause = 0.0; |
| avgMagn = self->sumMagn; |
| // Compute average quantities. |
| for (i = 0; i < self->magnLen; i++) { |
| // Conservative smooth noise spectrum from pause frames. |
| avgPause += self->magnAvgPause[i]; |
| } |
| avgPause /= self->magnLen; |
| avgMagn /= self->magnLen; |
| |
| covMagnPause = 0.0; |
| varPause = 0.0; |
| varMagn = 0.0; |
| // Compute variance and covariance quantities. |
| for (i = 0; i < self->magnLen; i++) { |
| covMagnPause += (magnIn[i] - avgMagn) * (self->magnAvgPause[i] - avgPause); |
| varPause += |
| (self->magnAvgPause[i] - avgPause) * (self->magnAvgPause[i] - avgPause); |
| varMagn += (magnIn[i] - avgMagn) * (magnIn[i] - avgMagn); |
| } |
| covMagnPause /= self->magnLen; |
| varPause /= self->magnLen; |
| varMagn /= self->magnLen; |
| // Update of average magnitude spectrum. |
| self->featureData[6] += self->signalEnergy; |
| |
| avgDiffNormMagn = |
| varMagn - (covMagnPause * covMagnPause) / (varPause + 0.0001f); |
| // Normalize and compute time-avg update of difference feature. |
| avgDiffNormMagn = (float)(avgDiffNormMagn / (self->featureData[5] + 0.0001f)); |
| self->featureData[4] += |
| SPECT_DIFF_TAVG * (avgDiffNormMagn - self->featureData[4]); |
| } |
| |
| // Compute speech/noise probability. |
| // Speech/noise probability is returned in |probSpeechFinal|. |
| // |magn| is the input magnitude spectrum. |
| // |noise| is the noise spectrum. |
| // |snrLocPrior| is the prior SNR for each frequency. |
| // |snrLocPost| is the post SNR for each frequency. |
| static void SpeechNoiseProb(NoiseSuppressionC* self, |
| float* probSpeechFinal, |
| const float* snrLocPrior, |
| const float* snrLocPost) { |
| size_t i; |
| int sgnMap; |
| float invLrt, gainPrior, indPrior; |
| float logLrtTimeAvgKsum, besselTmp; |
| float indicator0, indicator1, indicator2; |
| float tmpFloat1, tmpFloat2; |
| float weightIndPrior0, weightIndPrior1, weightIndPrior2; |
| float threshPrior0, threshPrior1, threshPrior2; |
| float widthPrior, widthPrior0, widthPrior1, widthPrior2; |
| |
| widthPrior0 = WIDTH_PR_MAP; |
| // Width for pause region: lower range, so increase width in tanh map. |
| widthPrior1 = 2.f * WIDTH_PR_MAP; |
| widthPrior2 = 2.f * WIDTH_PR_MAP; // For spectral-difference measure. |
| |
| // Threshold parameters for features. |
| threshPrior0 = self->priorModelPars[0]; |
| threshPrior1 = self->priorModelPars[1]; |
| threshPrior2 = self->priorModelPars[3]; |
| |
| // Sign for flatness feature. |
| sgnMap = (int)(self->priorModelPars[2]); |
| |
| // Weight parameters for features. |
| weightIndPrior0 = self->priorModelPars[4]; |
| weightIndPrior1 = self->priorModelPars[5]; |
| weightIndPrior2 = self->priorModelPars[6]; |
| |
| // Compute feature based on average LR factor. |
| // This is the average over all frequencies of the smooth log LRT. |
| logLrtTimeAvgKsum = 0.0; |
| for (i = 0; i < self->magnLen; i++) { |
| tmpFloat1 = 1.f + 2.f * snrLocPrior[i]; |
| tmpFloat2 = 2.f * snrLocPrior[i] / (tmpFloat1 + 0.0001f); |
| besselTmp = (snrLocPost[i] + 1.f) * tmpFloat2; |
| self->logLrtTimeAvg[i] += |
| LRT_TAVG * (besselTmp - (float)log(tmpFloat1) - self->logLrtTimeAvg[i]); |
| logLrtTimeAvgKsum += self->logLrtTimeAvg[i]; |
| } |
| logLrtTimeAvgKsum = (float)logLrtTimeAvgKsum / (self->magnLen); |
| self->featureData[3] = logLrtTimeAvgKsum; |
| // Done with computation of LR factor. |
| |
| // Compute the indicator functions. |
| // Average LRT feature. |
| widthPrior = widthPrior0; |
| // Use larger width in tanh map for pause regions. |
| if (logLrtTimeAvgKsum < threshPrior0) { |
| widthPrior = widthPrior1; |
| } |
| // Compute indicator function: sigmoid map. |
| indicator0 = |
| 0.5f * |
| ((float)tanh(widthPrior * (logLrtTimeAvgKsum - threshPrior0)) + 1.f); |
| |
| // Spectral flatness feature. |
| tmpFloat1 = self->featureData[0]; |
| widthPrior = widthPrior0; |
| // Use larger width in tanh map for pause regions. |
| if (sgnMap == 1 && (tmpFloat1 > threshPrior1)) { |
| widthPrior = widthPrior1; |
| } |
| if (sgnMap == -1 && (tmpFloat1 < threshPrior1)) { |
| widthPrior = widthPrior1; |
| } |
| // Compute indicator function: sigmoid map. |
| indicator1 = |
| 0.5f * |
| ((float)tanh((float)sgnMap * widthPrior * (threshPrior1 - tmpFloat1)) + |
| 1.f); |
| |
| // For template spectrum-difference. |
| tmpFloat1 = self->featureData[4]; |
| widthPrior = widthPrior0; |
| // Use larger width in tanh map for pause regions. |
| if (tmpFloat1 < threshPrior2) { |
| widthPrior = widthPrior2; |
| } |
| // Compute indicator function: sigmoid map. |
| indicator2 = |
| 0.5f * ((float)tanh(widthPrior * (tmpFloat1 - threshPrior2)) + 1.f); |
| |
| // Combine the indicator function with the feature weights. |
| indPrior = weightIndPrior0 * indicator0 + weightIndPrior1 * indicator1 + |
| weightIndPrior2 * indicator2; |
| // Done with computing indicator function. |
| |
| // Compute the prior probability. |
| self->priorSpeechProb += PRIOR_UPDATE * (indPrior - self->priorSpeechProb); |
| // Make sure probabilities are within range: keep floor to 0.01. |
| if (self->priorSpeechProb > 1.f) { |
| self->priorSpeechProb = 1.f; |
| } |
| if (self->priorSpeechProb < 0.01f) { |
| self->priorSpeechProb = 0.01f; |
| } |
| |
| // Final speech probability: combine prior model with LR factor:. |
| gainPrior = (1.f - self->priorSpeechProb) / (self->priorSpeechProb + 0.0001f); |
| for (i = 0; i < self->magnLen; i++) { |
| invLrt = (float)exp(-self->logLrtTimeAvg[i]); |
| invLrt = (float)gainPrior * invLrt; |
| probSpeechFinal[i] = 1.f / (1.f + invLrt); |
| } |
| } |
| |
| // Update the noise features. |
| // Inputs: |
| // * |magn| is the signal magnitude spectrum estimate. |
| // * |updateParsFlag| is an update flag for parameters. |
| static void FeatureUpdate(NoiseSuppressionC* self, |
| const float* magn, |
| int updateParsFlag) { |
| // Compute spectral flatness on input spectrum. |
| ComputeSpectralFlatness(self, magn); |
| // Compute difference of input spectrum with learned/estimated noise spectrum. |
| ComputeSpectralDifference(self, magn); |
| // Compute histograms for parameter decisions (thresholds and weights for |
| // features). |
| // Parameters are extracted once every window time. |
| // (=self->modelUpdatePars[1]) |
| if (updateParsFlag >= 1) { |
| // Counter update. |
| self->modelUpdatePars[3]--; |
| // Update histogram. |
| if (self->modelUpdatePars[3] > 0) { |
| FeatureParameterExtraction(self, 0); |
| } |
| // Compute model parameters. |
| if (self->modelUpdatePars[3] == 0) { |
| FeatureParameterExtraction(self, 1); |
| self->modelUpdatePars[3] = self->modelUpdatePars[1]; |
| // If wish to update only once, set flag to zero. |
| if (updateParsFlag == 1) { |
| self->modelUpdatePars[0] = 0; |
| } else { |
| // Update every window: |
| // Get normalization for spectral difference for next window estimate. |
| self->featureData[6] = |
| self->featureData[6] / ((float)self->modelUpdatePars[1]); |
| self->featureData[5] = |
| 0.5f * (self->featureData[6] + self->featureData[5]); |
| self->featureData[6] = 0.f; |
| } |
| } |
| } |
| } |
| |
| // Update the noise estimate. |
| // Inputs: |
| // * |magn| is the signal magnitude spectrum estimate. |
| // * |snrLocPrior| is the prior SNR. |
| // * |snrLocPost| is the post SNR. |
| // Output: |
| // * |noise| is the updated noise magnitude spectrum estimate. |
| static void UpdateNoiseEstimate(NoiseSuppressionC* self, |
| const float* magn, |
| const float* snrLocPrior, |
| const float* snrLocPost, |
| float* noise) { |
| size_t i; |
| float probSpeech, probNonSpeech; |
| // Time-avg parameter for noise update. |
| float gammaNoiseTmp = NOISE_UPDATE; |
| float gammaNoiseOld; |
| float noiseUpdateTmp; |
| |
| for (i = 0; i < self->magnLen; i++) { |
| probSpeech = self->speechProb[i]; |
| probNonSpeech = 1.f - probSpeech; |
| // Temporary noise update: |
| // Use it for speech frames if update value is less than previous. |
| noiseUpdateTmp = gammaNoiseTmp * self->noisePrev[i] + |
| (1.f - gammaNoiseTmp) * (probNonSpeech * magn[i] + |
| probSpeech * self->noisePrev[i]); |
| // Time-constant based on speech/noise state. |
| gammaNoiseOld = gammaNoiseTmp; |
| gammaNoiseTmp = NOISE_UPDATE; |
| // Increase gamma (i.e., less noise update) for frame likely to be speech. |
| if (probSpeech > PROB_RANGE) { |
| gammaNoiseTmp = SPEECH_UPDATE; |
| } |
| // Conservative noise update. |
| if (probSpeech < PROB_RANGE) { |
| self->magnAvgPause[i] += GAMMA_PAUSE * (magn[i] - self->magnAvgPause[i]); |
| } |
| // Noise update. |
| if (gammaNoiseTmp == gammaNoiseOld) { |
| noise[i] = noiseUpdateTmp; |
| } else { |
| noise[i] = gammaNoiseTmp * self->noisePrev[i] + |
| (1.f - gammaNoiseTmp) * (probNonSpeech * magn[i] + |
| probSpeech * self->noisePrev[i]); |
| // Allow for noise update downwards: |
| // If noise update decreases the noise, it is safe, so allow it to |
| // happen. |
| if (noiseUpdateTmp < noise[i]) { |
| noise[i] = noiseUpdateTmp; |
| } |
| } |
| } // End of freq loop. |
| } |
| |
| // Updates |buffer| with a new |frame|. |
| // Inputs: |
| // * |frame| is a new speech frame or NULL for setting to zero. |
| // * |frame_length| is the length of the new frame. |
| // * |buffer_length| is the length of the buffer. |
| // Output: |
| // * |buffer| is the updated buffer. |
| static void UpdateBuffer(const float* frame, |
| size_t frame_length, |
| size_t buffer_length, |
| float* buffer) { |
| RTC_DCHECK_LT(buffer_length, 2 * frame_length); |
| |
| memcpy(buffer, |
| buffer + frame_length, |
| sizeof(*buffer) * (buffer_length - frame_length)); |
| if (frame) { |
| memcpy(buffer + buffer_length - frame_length, |
| frame, |
| sizeof(*buffer) * frame_length); |
| } else { |
| memset(buffer + buffer_length - frame_length, |
| 0, |
| sizeof(*buffer) * frame_length); |
| } |
| } |
| |
| // Transforms the signal from time to frequency domain. |
| // Inputs: |
| // * |time_data| is the signal in the time domain. |
| // * |time_data_length| is the length of the analysis buffer. |
| // * |magnitude_length| is the length of the spectrum magnitude, which equals |
| // the length of both |real| and |imag| (time_data_length / 2 + 1). |
| // Outputs: |
| // * |time_data| is the signal in the frequency domain. |
| // * |real| is the real part of the frequency domain. |
| // * |imag| is the imaginary part of the frequency domain. |
| // * |magn| is the calculated signal magnitude in the frequency domain. |
| static void FFT(NoiseSuppressionC* self, |
| float* time_data, |
| size_t time_data_length, |
| size_t magnitude_length, |
| float* real, |
| float* imag, |
| float* magn) { |
| size_t i; |
| |
| RTC_DCHECK_EQ(magnitude_length, time_data_length / 2 + 1); |
| |
| WebRtc_rdft(time_data_length, 1, time_data, self->ip, self->wfft); |
| |
| imag[0] = 0; |
| real[0] = time_data[0]; |
| magn[0] = fabsf(real[0]) + 1.f; |
| imag[magnitude_length - 1] = 0; |
| real[magnitude_length - 1] = time_data[1]; |
| magn[magnitude_length - 1] = fabsf(real[magnitude_length - 1]) + 1.f; |
| for (i = 1; i < magnitude_length - 1; ++i) { |
| real[i] = time_data[2 * i]; |
| imag[i] = time_data[2 * i + 1]; |
| // Magnitude spectrum. |
| magn[i] = sqrtf(real[i] * real[i] + imag[i] * imag[i]) + 1.f; |
| } |
| } |
| |
| // Transforms the signal from frequency to time domain. |
| // Inputs: |
| // * |real| is the real part of the frequency domain. |
| // * |imag| is the imaginary part of the frequency domain. |
| // * |magnitude_length| is the length of the spectrum magnitude, which equals |
| // the length of both |real| and |imag|. |
| // * |time_data_length| is the length of the analysis buffer |
| // (2 * (magnitude_length - 1)). |
| // Output: |
| // * |time_data| is the signal in the time domain. |
| static void IFFT(NoiseSuppressionC* self, |
| const float* real, |
| const float* imag, |
| size_t magnitude_length, |
| size_t time_data_length, |
| float* time_data) { |
| size_t i; |
| |
| RTC_DCHECK_EQ(time_data_length, 2 * (magnitude_length - 1)); |
| |
| time_data[0] = real[0]; |
| time_data[1] = real[magnitude_length - 1]; |
| for (i = 1; i < magnitude_length - 1; ++i) { |
| time_data[2 * i] = real[i]; |
| time_data[2 * i + 1] = imag[i]; |
| } |
| WebRtc_rdft(time_data_length, -1, time_data, self->ip, self->wfft); |
| |
| for (i = 0; i < time_data_length; ++i) { |
| time_data[i] *= 2.f / time_data_length; // FFT scaling. |
| } |
| } |
| |
| // Calculates the energy of a buffer. |
| // Inputs: |
| // * |buffer| is the buffer over which the energy is calculated. |
| // * |length| is the length of the buffer. |
| // Returns the calculated energy. |
| static float Energy(const float* buffer, size_t length) { |
| size_t i; |
| float energy = 0.f; |
| |
| for (i = 0; i < length; ++i) { |
| energy += buffer[i] * buffer[i]; |
| } |
| |
| return energy; |
| } |
| |
| // Windows a buffer. |
| // Inputs: |
| // * |window| is the window by which to multiply. |
| // * |data| is the data without windowing. |
| // * |length| is the length of the window and data. |
| // Output: |
| // * |data_windowed| is the windowed data. |
| static void Windowing(const float* window, |
| const float* data, |
| size_t length, |
| float* data_windowed) { |
| size_t i; |
| |
| for (i = 0; i < length; ++i) { |
| data_windowed[i] = window[i] * data[i]; |
| } |
| } |
| |
| // Estimate prior SNR decision-directed and compute DD based Wiener Filter. |
| // Input: |
| // * |magn| is the signal magnitude spectrum estimate. |
| // Output: |
| // * |theFilter| is the frequency response of the computed Wiener filter. |
| static void ComputeDdBasedWienerFilter(const NoiseSuppressionC* self, |
| const float* magn, |
| float* theFilter) { |
| size_t i; |
| float snrPrior, previousEstimateStsa, currentEstimateStsa; |
| |
| for (i = 0; i < self->magnLen; i++) { |
| // Previous estimate: based on previous frame with gain filter. |
| previousEstimateStsa = self->magnPrevProcess[i] / |
| (self->noisePrev[i] + 0.0001f) * self->smooth[i]; |
| // Post and prior SNR. |
| currentEstimateStsa = 0.f; |
| if (magn[i] > self->noise[i]) { |
| currentEstimateStsa = magn[i] / (self->noise[i] + 0.0001f) - 1.f; |
| } |
| // DD estimate is sum of two terms: current estimate and previous estimate. |
| // Directed decision update of |snrPrior|. |
| snrPrior = DD_PR_SNR * previousEstimateStsa + |
| (1.f - DD_PR_SNR) * currentEstimateStsa; |
| // Gain filter. |
| theFilter[i] = snrPrior / (self->overdrive + snrPrior); |
| } // End of loop over frequencies. |
| } |
| |
| // Changes the aggressiveness of the noise suppression method. |
| // |mode| = 0 is mild (6dB), |mode| = 1 is medium (10dB) and |mode| = 2 is |
| // aggressive (15dB). |
| // Returns 0 on success and -1 otherwise. |
| int WebRtcNs_set_policy_core(NoiseSuppressionC* self, int mode) { |
| // Allow for modes: 0, 1, 2, 3. |
| if (mode < 0 || mode > 3) { |
| return (-1); |
| } |
| |
| self->aggrMode = mode; |
| if (mode == 0) { |
| self->overdrive = 1.f; |
| self->denoiseBound = 0.5f; |
| self->gainmap = 0; |
| } else if (mode == 1) { |
| // self->overdrive = 1.25f; |
| self->overdrive = 1.f; |
| self->denoiseBound = 0.25f; |
| self->gainmap = 1; |
| } else if (mode == 2) { |
| // self->overdrive = 1.25f; |
| self->overdrive = 1.1f; |
| self->denoiseBound = 0.125f; |
| self->gainmap = 1; |
| } else if (mode == 3) { |
| // self->overdrive = 1.3f; |
| self->overdrive = 1.25f; |
| self->denoiseBound = 0.09f; |
| self->gainmap = 1; |
| } |
| return 0; |
| } |
| |
| void WebRtcNs_AnalyzeCore(NoiseSuppressionC* self, const float* speechFrame) { |
| size_t i; |
| const size_t kStartBand = 5; // Skip first frequency bins during estimation. |
| int updateParsFlag; |
| float energy; |
| float signalEnergy = 0.f; |
| float sumMagn = 0.f; |
| float tmpFloat1, tmpFloat2, tmpFloat3; |
| float winData[ANAL_BLOCKL_MAX]; |
| float magn[HALF_ANAL_BLOCKL], noise[HALF_ANAL_BLOCKL]; |
| float snrLocPost[HALF_ANAL_BLOCKL], snrLocPrior[HALF_ANAL_BLOCKL]; |
| float real[ANAL_BLOCKL_MAX], imag[HALF_ANAL_BLOCKL]; |
| // Variables during startup. |
| float sum_log_i = 0.0; |
| float sum_log_i_square = 0.0; |
| float sum_log_magn = 0.0; |
| float sum_log_i_log_magn = 0.0; |
| float parametric_exp = 0.0; |
| float parametric_num = 0.0; |
| |
| // Check that initiation has been done. |
| RTC_DCHECK_EQ(1, self->initFlag); |
| updateParsFlag = self->modelUpdatePars[0]; |
| |
| // Update analysis buffer for L band. |
| UpdateBuffer(speechFrame, self->blockLen, self->anaLen, self->analyzeBuf); |
| |
| Windowing(self->window, self->analyzeBuf, self->anaLen, winData); |
| energy = Energy(winData, self->anaLen); |
| if (energy == 0.0) { |
| // We want to avoid updating statistics in this case: |
| // Updating feature statistics when we have zeros only will cause |
| // thresholds to move towards zero signal situations. This in turn has the |
| // effect that once the signal is "turned on" (non-zero values) everything |
| // will be treated as speech and there is no noise suppression effect. |
| // Depending on the duration of the inactive signal it takes a |
| // considerable amount of time for the system to learn what is noise and |
| // what is speech. |
| return; |
| } |
| |
| self->blockInd++; // Update the block index only when we process a block. |
| |
| FFT(self, winData, self->anaLen, self->magnLen, real, imag, magn); |
| |
| for (i = 0; i < self->magnLen; i++) { |
| signalEnergy += real[i] * real[i] + imag[i] * imag[i]; |
| sumMagn += magn[i]; |
| if (self->blockInd < END_STARTUP_SHORT) { |
| if (i >= kStartBand) { |
| tmpFloat2 = logf((float)i); |
| sum_log_i += tmpFloat2; |
| sum_log_i_square += tmpFloat2 * tmpFloat2; |
| tmpFloat1 = logf(magn[i]); |
| sum_log_magn += tmpFloat1; |
| sum_log_i_log_magn += tmpFloat2 * tmpFloat1; |
| } |
| } |
| } |
| signalEnergy /= self->magnLen; |
| self->signalEnergy = signalEnergy; |
| self->sumMagn = sumMagn; |
| |
| // Quantile noise estimate. |
| NoiseEstimation(self, magn, noise); |
| // Compute simplified noise model during startup. |
| if (self->blockInd < END_STARTUP_SHORT) { |
| // Estimate White noise. |
| self->whiteNoiseLevel += sumMagn / self->magnLen * self->overdrive; |
| // Estimate Pink noise parameters. |
| tmpFloat1 = sum_log_i_square * (self->magnLen - kStartBand); |
| tmpFloat1 -= (sum_log_i * sum_log_i); |
| tmpFloat2 = |
| (sum_log_i_square * sum_log_magn - sum_log_i * sum_log_i_log_magn); |
| tmpFloat3 = tmpFloat2 / tmpFloat1; |
| // Constrain the estimated spectrum to be positive. |
| if (tmpFloat3 < 0.f) { |
| tmpFloat3 = 0.f; |
| } |
| self->pinkNoiseNumerator += tmpFloat3; |
| tmpFloat2 = (sum_log_i * sum_log_magn); |
| tmpFloat2 -= (self->magnLen - kStartBand) * sum_log_i_log_magn; |
| tmpFloat3 = tmpFloat2 / tmpFloat1; |
| // Constrain the pink noise power to be in the interval [0, 1]. |
| if (tmpFloat3 < 0.f) { |
| tmpFloat3 = 0.f; |
| } |
| if (tmpFloat3 > 1.f) { |
| tmpFloat3 = 1.f; |
| } |
| self->pinkNoiseExp += tmpFloat3; |
| |
| // Calculate frequency independent parts of parametric noise estimate. |
| if (self->pinkNoiseExp > 0.f) { |
| // Use pink noise estimate. |
| parametric_num = |
| expf(self->pinkNoiseNumerator / (float)(self->blockInd + 1)); |
| parametric_num *= (float)(self->blockInd + 1); |
| parametric_exp = self->pinkNoiseExp / (float)(self->blockInd + 1); |
| } |
| for (i = 0; i < self->magnLen; i++) { |
| // Estimate the background noise using the white and pink noise |
| // parameters. |
| if (self->pinkNoiseExp == 0.f) { |
| // Use white noise estimate. |
| self->parametricNoise[i] = self->whiteNoiseLevel; |
| } else { |
| // Use pink noise estimate. |
| float use_band = (float)(i < kStartBand ? kStartBand : i); |
| self->parametricNoise[i] = |
| parametric_num / powf(use_band, parametric_exp); |
| } |
| // Weight quantile noise with modeled noise. |
| noise[i] *= (self->blockInd); |
| tmpFloat2 = |
| self->parametricNoise[i] * (END_STARTUP_SHORT - self->blockInd); |
| noise[i] += (tmpFloat2 / (float)(self->blockInd + 1)); |
| noise[i] /= END_STARTUP_SHORT; |
| } |
| } |
| // Compute average signal during END_STARTUP_LONG time: |
| // used to normalize spectral difference measure. |
| if (self->blockInd < END_STARTUP_LONG) { |
| self->featureData[5] *= self->blockInd; |
| self->featureData[5] += signalEnergy; |
| self->featureData[5] /= (self->blockInd + 1); |
| } |
| |
| // Post and prior SNR needed for SpeechNoiseProb. |
| ComputeSnr(self, magn, noise, snrLocPrior, snrLocPost); |
| |
| FeatureUpdate(self, magn, updateParsFlag); |
| SpeechNoiseProb(self, self->speechProb, snrLocPrior, snrLocPost); |
| UpdateNoiseEstimate(self, magn, snrLocPrior, snrLocPost, noise); |
| |
| // Keep track of noise spectrum for next frame. |
| memcpy(self->noise, noise, sizeof(*noise) * self->magnLen); |
| memcpy(self->magnPrevAnalyze, magn, sizeof(*magn) * self->magnLen); |
| } |
| |
| void WebRtcNs_ProcessCore(NoiseSuppressionC* self, |
| const float* const* speechFrame, |
| size_t num_bands, |
| float* const* outFrame) { |
| // Main routine for noise reduction. |
| int flagHB = 0; |
| size_t i, j; |
| |
| float energy1, energy2, gain, factor, factor1, factor2; |
| float fout[BLOCKL_MAX]; |
| float winData[ANAL_BLOCKL_MAX]; |
| float magn[HALF_ANAL_BLOCKL]; |
| float theFilter[HALF_ANAL_BLOCKL], theFilterTmp[HALF_ANAL_BLOCKL]; |
| float real[ANAL_BLOCKL_MAX], imag[HALF_ANAL_BLOCKL]; |
| |
| // SWB variables. |
| int deltaBweHB = 1; |
| int deltaGainHB = 1; |
| float decayBweHB = 1.0; |
| float gainMapParHB = 1.0; |
| float gainTimeDomainHB = 1.0; |
| float avgProbSpeechHB, avgProbSpeechHBTmp, avgFilterGainHB, gainModHB; |
| float sumMagnAnalyze, sumMagnProcess; |
| |
| // Check that initiation has been done. |
| RTC_DCHECK_EQ(1, self->initFlag); |
| RTC_DCHECK_LE(num_bands - 1, NUM_HIGH_BANDS_MAX); |
| |
| const float* const* speechFrameHB = NULL; |
| float* const* outFrameHB = NULL; |
| size_t num_high_bands = 0; |
| if (num_bands > 1) { |
| speechFrameHB = &speechFrame[1]; |
| outFrameHB = &outFrame[1]; |
| num_high_bands = num_bands - 1; |
| flagHB = 1; |
| // Range for averaging low band quantities for H band gain. |
| deltaBweHB = (int)self->magnLen / 4; |
| deltaGainHB = deltaBweHB; |
| } |
| |
| // Update analysis buffer for L band. |
| UpdateBuffer(speechFrame[0], self->blockLen, self->anaLen, self->dataBuf); |
| |
| if (flagHB == 1) { |
| // Update analysis buffer for H bands. |
| for (i = 0; i < num_high_bands; ++i) { |
| UpdateBuffer(speechFrameHB[i], |
| self->blockLen, |
| self->anaLen, |
| self->dataBufHB[i]); |
| } |
| } |
| |
| Windowing(self->window, self->dataBuf, self->anaLen, winData); |
| energy1 = Energy(winData, self->anaLen); |
| if (energy1 == 0.0) { |
| // Synthesize the special case of zero input. |
| // Read out fully processed segment. |
| for (i = self->windShift; i < self->blockLen + self->windShift; i++) { |
| fout[i - self->windShift] = self->syntBuf[i]; |
| } |
| // Update synthesis buffer. |
| UpdateBuffer(NULL, self->blockLen, self->anaLen, self->syntBuf); |
| |
| for (i = 0; i < self->blockLen; ++i) |
| outFrame[0][i] = |
| WEBRTC_SPL_SAT(WEBRTC_SPL_WORD16_MAX, fout[i], WEBRTC_SPL_WORD16_MIN); |
| |
| // For time-domain gain of HB. |
| if (flagHB == 1) { |
| for (i = 0; i < num_high_bands; ++i) { |
| for (j = 0; j < self->blockLen; ++j) { |
| outFrameHB[i][j] = WEBRTC_SPL_SAT(WEBRTC_SPL_WORD16_MAX, |
| self->dataBufHB[i][j], |
| WEBRTC_SPL_WORD16_MIN); |
| } |
| } |
| } |
| |
| return; |
| } |
| |
| FFT(self, winData, self->anaLen, self->magnLen, real, imag, magn); |
| |
| if (self->blockInd < END_STARTUP_SHORT) { |
| for (i = 0; i < self->magnLen; i++) { |
| self->initMagnEst[i] += magn[i]; |
| } |
| } |
| |
| ComputeDdBasedWienerFilter(self, magn, theFilter); |
| |
| for (i = 0; i < self->magnLen; i++) { |
| // Flooring bottom. |
| if (theFilter[i] < self->denoiseBound) { |
| theFilter[i] = self->denoiseBound; |
| } |
| // Flooring top. |
| if (theFilter[i] > 1.f) { |
| theFilter[i] = 1.f; |
| } |
| if (self->blockInd < END_STARTUP_SHORT) { |
| theFilterTmp[i] = |
| (self->initMagnEst[i] - self->overdrive * self->parametricNoise[i]); |
| theFilterTmp[i] /= (self->initMagnEst[i] + 0.0001f); |
| // Flooring bottom. |
| if (theFilterTmp[i] < self->denoiseBound) { |
| theFilterTmp[i] = self->denoiseBound; |
| } |
| // Flooring top. |
| if (theFilterTmp[i] > 1.f) { |
| theFilterTmp[i] = 1.f; |
| } |
| // Weight the two suppression filters. |
| theFilter[i] *= (self->blockInd); |
| theFilterTmp[i] *= (END_STARTUP_SHORT - self->blockInd); |
| theFilter[i] += theFilterTmp[i]; |
| theFilter[i] /= (END_STARTUP_SHORT); |
| } |
| |
| self->smooth[i] = theFilter[i]; |
| real[i] *= self->smooth[i]; |
| imag[i] *= self->smooth[i]; |
| } |
| // Keep track of |magn| spectrum for next frame. |
| memcpy(self->magnPrevProcess, magn, sizeof(*magn) * self->magnLen); |
| memcpy(self->noisePrev, self->noise, sizeof(self->noise[0]) * self->magnLen); |
| // Back to time domain. |
| IFFT(self, real, imag, self->magnLen, self->anaLen, winData); |
| |
| // Scale factor: only do it after END_STARTUP_LONG time. |
| factor = 1.f; |
| if (self->gainmap == 1 && self->blockInd > END_STARTUP_LONG) { |
| factor1 = 1.f; |
| factor2 = 1.f; |
| |
| energy2 = Energy(winData, self->anaLen); |
| gain = (float)sqrt(energy2 / (energy1 + 1.f)); |
| |
| // Scaling for new version. |
| if (gain > B_LIM) { |
| factor1 = 1.f + 1.3f * (gain - B_LIM); |
| if (gain * factor1 > 1.f) { |
| factor1 = 1.f / gain; |
| } |
| } |
| if (gain < B_LIM) { |
| // Don't reduce scale too much for pause regions: |
| // attenuation here should be controlled by flooring. |
| if (gain <= self->denoiseBound) { |
| gain = self->denoiseBound; |
| } |
| factor2 = 1.f - 0.3f * (B_LIM - gain); |
| } |
| // Combine both scales with speech/noise prob: |
| // note prior (priorSpeechProb) is not frequency dependent. |
| factor = self->priorSpeechProb * factor1 + |
| (1.f - self->priorSpeechProb) * factor2; |
| } // Out of self->gainmap == 1. |
| |
| Windowing(self->window, winData, self->anaLen, winData); |
| |
| // Synthesis. |
| for (i = 0; i < self->anaLen; i++) { |
| self->syntBuf[i] += factor * winData[i]; |
| } |
| // Read out fully processed segment. |
| for (i = self->windShift; i < self->blockLen + self->windShift; i++) { |
| fout[i - self->windShift] = self->syntBuf[i]; |
| } |
| // Update synthesis buffer. |
| UpdateBuffer(NULL, self->blockLen, self->anaLen, self->syntBuf); |
| |
| for (i = 0; i < self->blockLen; ++i) |
| outFrame[0][i] = |
| WEBRTC_SPL_SAT(WEBRTC_SPL_WORD16_MAX, fout[i], WEBRTC_SPL_WORD16_MIN); |
| |
| // For time-domain gain of HB. |
| if (flagHB == 1) { |
| // Average speech prob from low band. |
| // Average over second half (i.e., 4->8kHz) of frequencies spectrum. |
| avgProbSpeechHB = 0.0; |
| for (i = self->magnLen - deltaBweHB - 1; i < self->magnLen - 1; i++) { |
| avgProbSpeechHB += self->speechProb[i]; |
| } |
| avgProbSpeechHB = avgProbSpeechHB / ((float)deltaBweHB); |
| // If the speech was suppressed by a component between Analyze and |
| // Process, for example the AEC, then it should not be considered speech |
| // for high band suppression purposes. |
| sumMagnAnalyze = 0; |
| sumMagnProcess = 0; |
| for (i = 0; i < self->magnLen; ++i) { |
| sumMagnAnalyze += self->magnPrevAnalyze[i]; |
| sumMagnProcess += self->magnPrevProcess[i]; |
| } |
| avgProbSpeechHB *= sumMagnProcess / sumMagnAnalyze; |
| // Average filter gain from low band. |
| // Average over second half (i.e., 4->8kHz) of frequencies spectrum. |
| avgFilterGainHB = 0.0; |
| for (i = self->magnLen - deltaGainHB - 1; i < self->magnLen - 1; i++) { |
| avgFilterGainHB += self->smooth[i]; |
| } |
| avgFilterGainHB = avgFilterGainHB / ((float)(deltaGainHB)); |
| avgProbSpeechHBTmp = 2.f * avgProbSpeechHB - 1.f; |
| // Gain based on speech probability. |
| gainModHB = 0.5f * (1.f + (float)tanh(gainMapParHB * avgProbSpeechHBTmp)); |
| // Combine gain with low band gain. |
| gainTimeDomainHB = 0.5f * gainModHB + 0.5f * avgFilterGainHB; |
| if (avgProbSpeechHB >= 0.5f) { |
| gainTimeDomainHB = 0.25f * gainModHB + 0.75f * avgFilterGainHB; |
| } |
| gainTimeDomainHB = gainTimeDomainHB * decayBweHB; |
| // Make sure gain is within flooring range. |
| // Flooring bottom. |
| if (gainTimeDomainHB < self->denoiseBound) { |
| gainTimeDomainHB = self->denoiseBound; |
| } |
| // Flooring top. |
| if (gainTimeDomainHB > 1.f) { |
| gainTimeDomainHB = 1.f; |
| } |
| // Apply gain. |
| for (i = 0; i < num_high_bands; ++i) { |
| for (j = 0; j < self->blockLen; j++) { |
| outFrameHB[i][j] = |
| WEBRTC_SPL_SAT(WEBRTC_SPL_WORD16_MAX, |
| gainTimeDomainHB * self->dataBufHB[i][j], |
| WEBRTC_SPL_WORD16_MIN); |
| } |
| } |
| } // End of H band gain computation. |
| } |