|  | /* | 
|  | *  Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. | 
|  | * | 
|  | *  Use of this source code is governed by a BSD-style license | 
|  | *  that can be found in the LICENSE file in the root of the source | 
|  | *  tree. An additional intellectual property rights grant can be found | 
|  | *  in the file PATENTS.  All contributing project authors may | 
|  | *  be found in the AUTHORS file in the root of the source tree. | 
|  | */ | 
|  |  | 
|  | #include <string.h> | 
|  | #include <math.h> | 
|  | //#include <stdio.h> | 
|  | #include <stdlib.h> | 
|  | #include "noise_suppression.h" | 
|  | #include "ns_core.h" | 
|  | #include "windows_private.h" | 
|  | #include "fft4g.h" | 
|  | #include "signal_processing_library.h" | 
|  |  | 
|  | // Set Feature Extraction Parameters | 
|  | void WebRtcNs_set_feature_extraction_parameters(NSinst_t* inst) { | 
|  | //bin size of histogram | 
|  | inst->featureExtractionParams.binSizeLrt      = (float)0.1; | 
|  | inst->featureExtractionParams.binSizeSpecFlat = (float)0.05; | 
|  | inst->featureExtractionParams.binSizeSpecDiff = (float)0.1; | 
|  |  | 
|  | //range of histogram over which lrt threshold is computed | 
|  | inst->featureExtractionParams.rangeAvgHistLrt = (float)1.0; | 
|  |  | 
|  | //scale parameters: multiply dominant peaks of the histograms by scale factor to obtain | 
|  | // thresholds for prior model | 
|  | inst->featureExtractionParams.factor1ModelPars = (float)1.20; //for lrt and spectral diff | 
|  | inst->featureExtractionParams.factor2ModelPars = (float)0.9;  //for spectral_flatness: | 
|  | // used when noise is flatter than speech | 
|  |  | 
|  | //peak limit for spectral flatness (varies between 0 and 1) | 
|  | inst->featureExtractionParams.thresPosSpecFlat = (float)0.6; | 
|  |  | 
|  | //limit on spacing of two highest peaks in histogram: spacing determined by bin size | 
|  | inst->featureExtractionParams.limitPeakSpacingSpecFlat = | 
|  | 2 * inst->featureExtractionParams.binSizeSpecFlat; | 
|  | inst->featureExtractionParams.limitPeakSpacingSpecDiff = | 
|  | 2 * inst->featureExtractionParams.binSizeSpecDiff; | 
|  |  | 
|  | //limit on relevance of second peak: | 
|  | inst->featureExtractionParams.limitPeakWeightsSpecFlat = (float)0.5; | 
|  | inst->featureExtractionParams.limitPeakWeightsSpecDiff = (float)0.5; | 
|  |  | 
|  | // fluctuation limit of lrt feature | 
|  | inst->featureExtractionParams.thresFluctLrt = (float)0.05; | 
|  |  | 
|  | //limit on the max and min values for the feature thresholds | 
|  | inst->featureExtractionParams.maxLrt = (float)1.0; | 
|  | inst->featureExtractionParams.minLrt = (float)0.20; | 
|  |  | 
|  | inst->featureExtractionParams.maxSpecFlat = (float)0.95; | 
|  | inst->featureExtractionParams.minSpecFlat = (float)0.10; | 
|  |  | 
|  | inst->featureExtractionParams.maxSpecDiff = (float)1.0; | 
|  | inst->featureExtractionParams.minSpecDiff = (float)0.16; | 
|  |  | 
|  | //criteria of weight of histogram peak  to accept/reject feature | 
|  | inst->featureExtractionParams.thresWeightSpecFlat = (int)(0.3 | 
|  | * (inst->modelUpdatePars[1])); //for spectral flatness | 
|  | inst->featureExtractionParams.thresWeightSpecDiff = (int)(0.3 | 
|  | * (inst->modelUpdatePars[1])); //for spectral difference | 
|  | } | 
|  |  | 
|  | // Initialize state | 
|  | int WebRtcNs_InitCore(NSinst_t* inst, uint32_t fs) { | 
|  | int i; | 
|  | //We only support 10ms frames | 
|  |  | 
|  | //check for valid pointer | 
|  | if (inst == NULL) { | 
|  | return -1; | 
|  | } | 
|  |  | 
|  | // Initialization of struct | 
|  | if (fs == 8000 || fs == 16000 || fs == 32000) { | 
|  | inst->fs = fs; | 
|  | } else { | 
|  | return -1; | 
|  | } | 
|  | inst->windShift = 0; | 
|  | if (fs == 8000) { | 
|  | // We only support 10ms frames | 
|  | inst->blockLen = 80; | 
|  | inst->blockLen10ms = 80; | 
|  | inst->anaLen = 128; | 
|  | inst->window = kBlocks80w128; | 
|  | inst->outLen = 0; | 
|  | } else if (fs == 16000) { | 
|  | // We only support 10ms frames | 
|  | inst->blockLen = 160; | 
|  | inst->blockLen10ms = 160; | 
|  | inst->anaLen = 256; | 
|  | inst->window = kBlocks160w256; | 
|  | inst->outLen = 0; | 
|  | } else if (fs == 32000) { | 
|  | // We only support 10ms frames | 
|  | inst->blockLen = 160; | 
|  | inst->blockLen10ms = 160; | 
|  | inst->anaLen = 256; | 
|  | inst->window = kBlocks160w256; | 
|  | inst->outLen = 0; | 
|  | } | 
|  | inst->magnLen = inst->anaLen / 2 + 1; // Number of frequency bins | 
|  |  | 
|  | // Initialize fft work arrays. | 
|  | inst->ip[0] = 0; // Setting this triggers initialization. | 
|  | memset(inst->dataBuf, 0, sizeof(float) * ANAL_BLOCKL_MAX); | 
|  | WebRtc_rdft(inst->anaLen, 1, inst->dataBuf, inst->ip, inst->wfft); | 
|  |  | 
|  | memset(inst->dataBuf, 0, sizeof(float) * ANAL_BLOCKL_MAX); | 
|  | memset(inst->syntBuf, 0, sizeof(float) * ANAL_BLOCKL_MAX); | 
|  |  | 
|  | //for HB processing | 
|  | memset(inst->dataBufHB, 0, sizeof(float) * ANAL_BLOCKL_MAX); | 
|  |  | 
|  | //for quantile noise estimation | 
|  | memset(inst->quantile, 0, sizeof(float) * HALF_ANAL_BLOCKL); | 
|  | for (i = 0; i < SIMULT * HALF_ANAL_BLOCKL; i++) { | 
|  | inst->lquantile[i] = (float)8.0; | 
|  | inst->density[i] = (float)0.3; | 
|  | } | 
|  |  | 
|  | for (i = 0; i < SIMULT; i++) { | 
|  | inst->counter[i] = (int)floor((float)(END_STARTUP_LONG * (i + 1)) / (float)SIMULT); | 
|  | } | 
|  |  | 
|  | inst->updates = 0; | 
|  |  | 
|  | // Wiener filter initialization | 
|  | for (i = 0; i < HALF_ANAL_BLOCKL; i++) { | 
|  | inst->smooth[i] = (float)1.0; | 
|  | } | 
|  |  | 
|  | // Set the aggressiveness: default | 
|  | inst->aggrMode = 0; | 
|  |  | 
|  | //initialize variables for new method | 
|  | inst->priorSpeechProb = (float)0.5; //prior prob for speech/noise | 
|  | for (i = 0; i < HALF_ANAL_BLOCKL; i++) { | 
|  | inst->magnPrev[i]      = (float)0.0; //previous mag spectrum | 
|  | inst->noisePrev[i]     = (float)0.0; //previous noise-spectrum | 
|  | inst->logLrtTimeAvg[i] = LRT_FEATURE_THR; //smooth LR ratio (same as threshold) | 
|  | inst->magnAvgPause[i]  = (float)0.0; //conservative noise spectrum estimate | 
|  | inst->speechProbHB[i]  = (float)0.0; //for estimation of HB in second pass | 
|  | inst->initMagnEst[i]   = (float)0.0; //initial average mag spectrum | 
|  | } | 
|  |  | 
|  | //feature quantities | 
|  | inst->featureData[0] = SF_FEATURE_THR;  //spectral flatness (start on threshold) | 
|  | inst->featureData[1] = (float)0.0;      //spectral entropy: not used in this version | 
|  | inst->featureData[2] = (float)0.0;      //spectral variance: not used in this version | 
|  | inst->featureData[3] = LRT_FEATURE_THR; //average lrt factor (start on threshold) | 
|  | inst->featureData[4] = SF_FEATURE_THR;  //spectral template diff (start on threshold) | 
|  | inst->featureData[5] = (float)0.0;      //normalization for spectral-diff | 
|  | inst->featureData[6] = (float)0.0;      //window time-average of input magnitude spectrum | 
|  |  | 
|  | //histogram quantities: used to estimate/update thresholds for features | 
|  | for (i = 0; i < HIST_PAR_EST; i++) { | 
|  | inst->histLrt[i] = 0; | 
|  | inst->histSpecFlat[i] = 0; | 
|  | inst->histSpecDiff[i] = 0; | 
|  | } | 
|  |  | 
|  | inst->blockInd = -1; //frame counter | 
|  | inst->priorModelPars[0] = LRT_FEATURE_THR; //default threshold for lrt feature | 
|  | inst->priorModelPars[1] = (float)0.5;      //threshold for spectral flatness: | 
|  | // determined on-line | 
|  | inst->priorModelPars[2] = (float)1.0;      //sgn_map par for spectral measure: | 
|  | // 1 for flatness measure | 
|  | inst->priorModelPars[3] = (float)0.5;      //threshold for template-difference feature: | 
|  | // determined on-line | 
|  | inst->priorModelPars[4] = (float)1.0;      //default weighting parameter for lrt feature | 
|  | inst->priorModelPars[5] = (float)0.0;      //default weighting parameter for | 
|  | // spectral flatness feature | 
|  | inst->priorModelPars[6] = (float)0.0;      //default weighting parameter for | 
|  | // spectral difference feature | 
|  |  | 
|  | inst->modelUpdatePars[0] = 2;   //update flag for parameters: | 
|  | // 0 no update, 1=update once, 2=update every window | 
|  | inst->modelUpdatePars[1] = 500; //window for update | 
|  | inst->modelUpdatePars[2] = 0;   //counter for update of conservative noise spectrum | 
|  | //counter if the feature thresholds are updated during the sequence | 
|  | inst->modelUpdatePars[3] = inst->modelUpdatePars[1]; | 
|  |  | 
|  | inst->signalEnergy = 0.0; | 
|  | inst->sumMagn = 0.0; | 
|  | inst->whiteNoiseLevel = 0.0; | 
|  | inst->pinkNoiseNumerator = 0.0; | 
|  | inst->pinkNoiseExp = 0.0; | 
|  |  | 
|  | WebRtcNs_set_feature_extraction_parameters(inst); // Set feature configuration | 
|  |  | 
|  | //default mode | 
|  | WebRtcNs_set_policy_core(inst, 0); | 
|  |  | 
|  |  | 
|  | memset(inst->outBuf, 0, sizeof(float) * 3 * BLOCKL_MAX); | 
|  |  | 
|  | inst->initFlag = 1; | 
|  | return 0; | 
|  | } | 
|  |  | 
|  | int WebRtcNs_set_policy_core(NSinst_t* inst, int mode) { | 
|  | // allow for modes:0,1,2,3 | 
|  | if (mode < 0 || mode > 3) { | 
|  | return (-1); | 
|  | } | 
|  |  | 
|  | inst->aggrMode = mode; | 
|  | if (mode == 0) { | 
|  | inst->overdrive = (float)1.0; | 
|  | inst->denoiseBound = (float)0.5; | 
|  | inst->gainmap = 0; | 
|  | } else if (mode == 1) { | 
|  | //inst->overdrive = (float)1.25; | 
|  | inst->overdrive = (float)1.0; | 
|  | inst->denoiseBound = (float)0.25; | 
|  | inst->gainmap = 1; | 
|  | } else if (mode == 2) { | 
|  | //inst->overdrive = (float)1.25; | 
|  | inst->overdrive = (float)1.1; | 
|  | inst->denoiseBound = (float)0.125; | 
|  | inst->gainmap = 1; | 
|  | } else if (mode == 3) { | 
|  | //inst->overdrive = (float)1.30; | 
|  | inst->overdrive = (float)1.25; | 
|  | inst->denoiseBound = (float)0.09; | 
|  | inst->gainmap = 1; | 
|  | } | 
|  | return 0; | 
|  | } | 
|  |  | 
|  | // Estimate noise | 
|  | void WebRtcNs_NoiseEstimation(NSinst_t* inst, float* magn, float* noise) { | 
|  | int i, s, offset; | 
|  | float lmagn[HALF_ANAL_BLOCKL], delta; | 
|  |  | 
|  | if (inst->updates < END_STARTUP_LONG) { | 
|  | inst->updates++; | 
|  | } | 
|  |  | 
|  | for (i = 0; i < inst->magnLen; i++) { | 
|  | lmagn[i] = (float)log(magn[i]); | 
|  | } | 
|  |  | 
|  | // loop over simultaneous estimates | 
|  | for (s = 0; s < SIMULT; s++) { | 
|  | offset = s * inst->magnLen; | 
|  |  | 
|  | // newquantest(...) | 
|  | for (i = 0; i < inst->magnLen; i++) { | 
|  | // compute delta | 
|  | if (inst->density[offset + i] > 1.0) { | 
|  | delta = FACTOR * (float)1.0 / inst->density[offset + i]; | 
|  | } else { | 
|  | delta = FACTOR; | 
|  | } | 
|  |  | 
|  | // update log quantile estimate | 
|  | if (lmagn[i] > inst->lquantile[offset + i]) { | 
|  | inst->lquantile[offset + i] += QUANTILE * delta | 
|  | / (float)(inst->counter[s] + 1); | 
|  | } else { | 
|  | inst->lquantile[offset + i] -= ((float)1.0 - QUANTILE) * delta | 
|  | / (float)(inst->counter[s] + 1); | 
|  | } | 
|  |  | 
|  | // update density estimate | 
|  | if (fabs(lmagn[i] - inst->lquantile[offset + i]) < WIDTH) { | 
|  | inst->density[offset + i] = ((float)inst->counter[s] * inst->density[offset | 
|  | + i] + (float)1.0 / ((float)2.0 * WIDTH)) / (float)(inst->counter[s] + 1); | 
|  | } | 
|  | } // end loop over magnitude spectrum | 
|  |  | 
|  | if (inst->counter[s] >= END_STARTUP_LONG) { | 
|  | inst->counter[s] = 0; | 
|  | if (inst->updates >= END_STARTUP_LONG) { | 
|  | for (i = 0; i < inst->magnLen; i++) { | 
|  | inst->quantile[i] = (float)exp(inst->lquantile[offset + i]); | 
|  | } | 
|  | } | 
|  | } | 
|  |  | 
|  | inst->counter[s]++; | 
|  | } // end loop over simultaneous estimates | 
|  |  | 
|  | // Sequentially update the noise during startup | 
|  | if (inst->updates < END_STARTUP_LONG) { | 
|  | // Use the last "s" to get noise during startup that differ from zero. | 
|  | for (i = 0; i < inst->magnLen; i++) { | 
|  | inst->quantile[i] = (float)exp(inst->lquantile[offset + i]); | 
|  | } | 
|  | } | 
|  |  | 
|  | for (i = 0; i < inst->magnLen; i++) { | 
|  | noise[i] = inst->quantile[i]; | 
|  | } | 
|  | } | 
|  |  | 
|  | // Extract thresholds for feature parameters | 
|  | // histograms are computed over some window_size (given by inst->modelUpdatePars[1]) | 
|  | // thresholds and weights are extracted every window | 
|  | // flag 0 means update histogram only, flag 1 means compute the thresholds/weights | 
|  | // threshold and weights are returned in: inst->priorModelPars | 
|  | void WebRtcNs_FeatureParameterExtraction(NSinst_t* inst, int flag) { | 
|  | int i, useFeatureSpecFlat, useFeatureSpecDiff, numHistLrt; | 
|  | int maxPeak1, maxPeak2; | 
|  | int weightPeak1SpecFlat, weightPeak2SpecFlat, weightPeak1SpecDiff, weightPeak2SpecDiff; | 
|  |  | 
|  | float binMid, featureSum; | 
|  | float posPeak1SpecFlat, posPeak2SpecFlat, posPeak1SpecDiff, posPeak2SpecDiff; | 
|  | float fluctLrt, avgHistLrt, avgSquareHistLrt, avgHistLrtCompl; | 
|  |  | 
|  | //3 features: lrt, flatness, difference | 
|  | //lrt_feature = inst->featureData[3]; | 
|  | //flat_feature = inst->featureData[0]; | 
|  | //diff_feature = inst->featureData[4]; | 
|  |  | 
|  | //update histograms | 
|  | if (flag == 0) { | 
|  | // LRT | 
|  | if ((inst->featureData[3] < HIST_PAR_EST * inst->featureExtractionParams.binSizeLrt) | 
|  | && (inst->featureData[3] >= 0.0)) { | 
|  | i = (int)(inst->featureData[3] / inst->featureExtractionParams.binSizeLrt); | 
|  | inst->histLrt[i]++; | 
|  | } | 
|  | // Spectral flatness | 
|  | if ((inst->featureData[0] < HIST_PAR_EST | 
|  | * inst->featureExtractionParams.binSizeSpecFlat) | 
|  | && (inst->featureData[0] >= 0.0)) { | 
|  | i = (int)(inst->featureData[0] / inst->featureExtractionParams.binSizeSpecFlat); | 
|  | inst->histSpecFlat[i]++; | 
|  | } | 
|  | // Spectral difference | 
|  | if ((inst->featureData[4] < HIST_PAR_EST | 
|  | * inst->featureExtractionParams.binSizeSpecDiff) | 
|  | && (inst->featureData[4] >= 0.0)) { | 
|  | i = (int)(inst->featureData[4] / inst->featureExtractionParams.binSizeSpecDiff); | 
|  | inst->histSpecDiff[i]++; | 
|  | } | 
|  | } | 
|  |  | 
|  | // extract parameters for speech/noise probability | 
|  | if (flag == 1) { | 
|  | //lrt feature: compute the average over inst->featureExtractionParams.rangeAvgHistLrt | 
|  | avgHistLrt = 0.0; | 
|  | avgHistLrtCompl = 0.0; | 
|  | avgSquareHistLrt = 0.0; | 
|  | numHistLrt = 0; | 
|  | for (i = 0; i < HIST_PAR_EST; i++) { | 
|  | binMid = ((float)i + (float)0.5) * inst->featureExtractionParams.binSizeLrt; | 
|  | if (binMid <= inst->featureExtractionParams.rangeAvgHistLrt) { | 
|  | avgHistLrt += inst->histLrt[i] * binMid; | 
|  | numHistLrt += inst->histLrt[i]; | 
|  | } | 
|  | avgSquareHistLrt += inst->histLrt[i] * binMid * binMid; | 
|  | avgHistLrtCompl += inst->histLrt[i] * binMid; | 
|  | } | 
|  | if (numHistLrt > 0) { | 
|  | avgHistLrt = avgHistLrt / ((float)numHistLrt); | 
|  | } | 
|  | avgHistLrtCompl = avgHistLrtCompl / ((float)inst->modelUpdatePars[1]); | 
|  | avgSquareHistLrt = avgSquareHistLrt / ((float)inst->modelUpdatePars[1]); | 
|  | fluctLrt = avgSquareHistLrt - avgHistLrt * avgHistLrtCompl; | 
|  | // get threshold for lrt feature: | 
|  | if (fluctLrt < inst->featureExtractionParams.thresFluctLrt) { | 
|  | //very low fluct, so likely noise | 
|  | inst->priorModelPars[0] = inst->featureExtractionParams.maxLrt; | 
|  | } else { | 
|  | inst->priorModelPars[0] = inst->featureExtractionParams.factor1ModelPars | 
|  | * avgHistLrt; | 
|  | // check if value is within min/max range | 
|  | if (inst->priorModelPars[0] < inst->featureExtractionParams.minLrt) { | 
|  | inst->priorModelPars[0] = inst->featureExtractionParams.minLrt; | 
|  | } | 
|  | if (inst->priorModelPars[0] > inst->featureExtractionParams.maxLrt) { | 
|  | inst->priorModelPars[0] = inst->featureExtractionParams.maxLrt; | 
|  | } | 
|  | } | 
|  | // done with lrt feature | 
|  |  | 
|  | // | 
|  | // for spectral flatness and spectral difference: compute the main peaks of histogram | 
|  | maxPeak1 = 0; | 
|  | maxPeak2 = 0; | 
|  | posPeak1SpecFlat = 0.0; | 
|  | posPeak2SpecFlat = 0.0; | 
|  | weightPeak1SpecFlat = 0; | 
|  | weightPeak2SpecFlat = 0; | 
|  |  | 
|  | // peaks for flatness | 
|  | for (i = 0; i < HIST_PAR_EST; i++) { | 
|  | binMid = ((float)i + (float)0.5) * inst->featureExtractionParams.binSizeSpecFlat; | 
|  | if (inst->histSpecFlat[i] > maxPeak1) { | 
|  | // Found new "first" peak | 
|  | maxPeak2 = maxPeak1; | 
|  | weightPeak2SpecFlat = weightPeak1SpecFlat; | 
|  | posPeak2SpecFlat = posPeak1SpecFlat; | 
|  |  | 
|  | maxPeak1 = inst->histSpecFlat[i]; | 
|  | weightPeak1SpecFlat = inst->histSpecFlat[i]; | 
|  | posPeak1SpecFlat = binMid; | 
|  | } else if (inst->histSpecFlat[i] > maxPeak2) { | 
|  | // Found new "second" peak | 
|  | maxPeak2 = inst->histSpecFlat[i]; | 
|  | weightPeak2SpecFlat = inst->histSpecFlat[i]; | 
|  | posPeak2SpecFlat = binMid; | 
|  | } | 
|  | } | 
|  |  | 
|  | //compute two peaks for spectral difference | 
|  | maxPeak1 = 0; | 
|  | maxPeak2 = 0; | 
|  | posPeak1SpecDiff = 0.0; | 
|  | posPeak2SpecDiff = 0.0; | 
|  | weightPeak1SpecDiff = 0; | 
|  | weightPeak2SpecDiff = 0; | 
|  | // peaks for spectral difference | 
|  | for (i = 0; i < HIST_PAR_EST; i++) { | 
|  | binMid = ((float)i + (float)0.5) * inst->featureExtractionParams.binSizeSpecDiff; | 
|  | if (inst->histSpecDiff[i] > maxPeak1) { | 
|  | // Found new "first" peak | 
|  | maxPeak2 = maxPeak1; | 
|  | weightPeak2SpecDiff = weightPeak1SpecDiff; | 
|  | posPeak2SpecDiff = posPeak1SpecDiff; | 
|  |  | 
|  | maxPeak1 = inst->histSpecDiff[i]; | 
|  | weightPeak1SpecDiff = inst->histSpecDiff[i]; | 
|  | posPeak1SpecDiff = binMid; | 
|  | } else if (inst->histSpecDiff[i] > maxPeak2) { | 
|  | // Found new "second" peak | 
|  | maxPeak2 = inst->histSpecDiff[i]; | 
|  | weightPeak2SpecDiff = inst->histSpecDiff[i]; | 
|  | posPeak2SpecDiff = binMid; | 
|  | } | 
|  | } | 
|  |  | 
|  | // for spectrum flatness feature | 
|  | useFeatureSpecFlat = 1; | 
|  | // merge the two peaks if they are close | 
|  | if ((fabs(posPeak2SpecFlat - posPeak1SpecFlat) | 
|  | < inst->featureExtractionParams.limitPeakSpacingSpecFlat) | 
|  | && (weightPeak2SpecFlat | 
|  | > inst->featureExtractionParams.limitPeakWeightsSpecFlat | 
|  | * weightPeak1SpecFlat)) { | 
|  | weightPeak1SpecFlat += weightPeak2SpecFlat; | 
|  | posPeak1SpecFlat = (float)0.5 * (posPeak1SpecFlat + posPeak2SpecFlat); | 
|  | } | 
|  | //reject if weight of peaks is not large enough, or peak value too small | 
|  | if (weightPeak1SpecFlat < inst->featureExtractionParams.thresWeightSpecFlat | 
|  | || posPeak1SpecFlat < inst->featureExtractionParams.thresPosSpecFlat) { | 
|  | useFeatureSpecFlat = 0; | 
|  | } | 
|  | // if selected, get the threshold | 
|  | if (useFeatureSpecFlat == 1) { | 
|  | // compute the threshold | 
|  | inst->priorModelPars[1] = inst->featureExtractionParams.factor2ModelPars | 
|  | * posPeak1SpecFlat; | 
|  | //check if value is within min/max range | 
|  | if (inst->priorModelPars[1] < inst->featureExtractionParams.minSpecFlat) { | 
|  | inst->priorModelPars[1] = inst->featureExtractionParams.minSpecFlat; | 
|  | } | 
|  | if (inst->priorModelPars[1] > inst->featureExtractionParams.maxSpecFlat) { | 
|  | inst->priorModelPars[1] = inst->featureExtractionParams.maxSpecFlat; | 
|  | } | 
|  | } | 
|  | // done with flatness feature | 
|  |  | 
|  | // for template feature | 
|  | useFeatureSpecDiff = 1; | 
|  | // merge the two peaks if they are close | 
|  | if ((fabs(posPeak2SpecDiff - posPeak1SpecDiff) | 
|  | < inst->featureExtractionParams.limitPeakSpacingSpecDiff) | 
|  | && (weightPeak2SpecDiff | 
|  | > inst->featureExtractionParams.limitPeakWeightsSpecDiff | 
|  | * weightPeak1SpecDiff)) { | 
|  | weightPeak1SpecDiff += weightPeak2SpecDiff; | 
|  | posPeak1SpecDiff = (float)0.5 * (posPeak1SpecDiff + posPeak2SpecDiff); | 
|  | } | 
|  | // get the threshold value | 
|  | inst->priorModelPars[3] = inst->featureExtractionParams.factor1ModelPars | 
|  | * posPeak1SpecDiff; | 
|  | //reject if weight of peaks is not large enough | 
|  | if (weightPeak1SpecDiff < inst->featureExtractionParams.thresWeightSpecDiff) { | 
|  | useFeatureSpecDiff = 0; | 
|  | } | 
|  | //check if value is within min/max range | 
|  | if (inst->priorModelPars[3] < inst->featureExtractionParams.minSpecDiff) { | 
|  | inst->priorModelPars[3] = inst->featureExtractionParams.minSpecDiff; | 
|  | } | 
|  | if (inst->priorModelPars[3] > inst->featureExtractionParams.maxSpecDiff) { | 
|  | inst->priorModelPars[3] = inst->featureExtractionParams.maxSpecDiff; | 
|  | } | 
|  | // done with spectral difference feature | 
|  |  | 
|  | // don't use template feature if fluctuation of lrt feature is very low: | 
|  | //  most likely just noise state | 
|  | if (fluctLrt < inst->featureExtractionParams.thresFluctLrt) { | 
|  | useFeatureSpecDiff = 0; | 
|  | } | 
|  |  | 
|  | // select the weights between the features | 
|  | // inst->priorModelPars[4] is weight for lrt: always selected | 
|  | // inst->priorModelPars[5] is weight for spectral flatness | 
|  | // inst->priorModelPars[6] is weight for spectral difference | 
|  | featureSum = (float)(1 + useFeatureSpecFlat + useFeatureSpecDiff); | 
|  | inst->priorModelPars[4] = (float)1.0 / featureSum; | 
|  | inst->priorModelPars[5] = ((float)useFeatureSpecFlat) / featureSum; | 
|  | inst->priorModelPars[6] = ((float)useFeatureSpecDiff) / featureSum; | 
|  |  | 
|  | // set hists to zero for next update | 
|  | if (inst->modelUpdatePars[0] >= 1) { | 
|  | for (i = 0; i < HIST_PAR_EST; i++) { | 
|  | inst->histLrt[i] = 0; | 
|  | inst->histSpecFlat[i] = 0; | 
|  | inst->histSpecDiff[i] = 0; | 
|  | } | 
|  | } | 
|  | } // end of flag == 1 | 
|  | } | 
|  |  | 
|  | // Compute spectral flatness on input spectrum | 
|  | // magnIn is the magnitude spectrum | 
|  | // spectral flatness is returned in inst->featureData[0] | 
|  | void WebRtcNs_ComputeSpectralFlatness(NSinst_t* inst, float* magnIn) { | 
|  | int i; | 
|  | int shiftLP = 1; //option to remove first bin(s) from spectral measures | 
|  | float avgSpectralFlatnessNum, avgSpectralFlatnessDen, spectralTmp; | 
|  |  | 
|  | // comute spectral measures | 
|  | // for flatness | 
|  | avgSpectralFlatnessNum = 0.0; | 
|  | avgSpectralFlatnessDen = inst->sumMagn; | 
|  | for (i = 0; i < shiftLP; i++) { | 
|  | avgSpectralFlatnessDen -= magnIn[i]; | 
|  | } | 
|  | // compute log of ratio of the geometric to arithmetic mean: check for log(0) case | 
|  | for (i = shiftLP; i < inst->magnLen; i++) { | 
|  | if (magnIn[i] > 0.0) { | 
|  | avgSpectralFlatnessNum += (float)log(magnIn[i]); | 
|  | } else { | 
|  | inst->featureData[0] -= SPECT_FL_TAVG * inst->featureData[0]; | 
|  | return; | 
|  | } | 
|  | } | 
|  | //normalize | 
|  | avgSpectralFlatnessDen = avgSpectralFlatnessDen / inst->magnLen; | 
|  | avgSpectralFlatnessNum = avgSpectralFlatnessNum / inst->magnLen; | 
|  |  | 
|  | //ratio and inverse log: check for case of log(0) | 
|  | spectralTmp = (float)exp(avgSpectralFlatnessNum) / avgSpectralFlatnessDen; | 
|  |  | 
|  | //time-avg update of spectral flatness feature | 
|  | inst->featureData[0] += SPECT_FL_TAVG * (spectralTmp - inst->featureData[0]); | 
|  | // done with flatness feature | 
|  | } | 
|  |  | 
|  | // Compute the difference measure between input spectrum and a template/learned noise spectrum | 
|  | // magnIn is the input spectrum | 
|  | // the reference/template spectrum is inst->magnAvgPause[i] | 
|  | // returns (normalized) spectral difference in inst->featureData[4] | 
|  | void WebRtcNs_ComputeSpectralDifference(NSinst_t* inst, float* magnIn) { | 
|  | // avgDiffNormMagn = var(magnIn) - cov(magnIn, magnAvgPause)^2 / var(magnAvgPause) | 
|  | int i; | 
|  | float avgPause, avgMagn, covMagnPause, varPause, varMagn, avgDiffNormMagn; | 
|  |  | 
|  | avgPause = 0.0; | 
|  | avgMagn = inst->sumMagn; | 
|  | // compute average quantities | 
|  | for (i = 0; i < inst->magnLen; i++) { | 
|  | //conservative smooth noise spectrum from pause frames | 
|  | avgPause += inst->magnAvgPause[i]; | 
|  | } | 
|  | avgPause = avgPause / ((float)inst->magnLen); | 
|  | avgMagn = avgMagn / ((float)inst->magnLen); | 
|  |  | 
|  | covMagnPause = 0.0; | 
|  | varPause = 0.0; | 
|  | varMagn = 0.0; | 
|  | // compute variance and covariance quantities | 
|  | for (i = 0; i < inst->magnLen; i++) { | 
|  | covMagnPause += (magnIn[i] - avgMagn) * (inst->magnAvgPause[i] - avgPause); | 
|  | varPause += (inst->magnAvgPause[i] - avgPause) * (inst->magnAvgPause[i] - avgPause); | 
|  | varMagn += (magnIn[i] - avgMagn) * (magnIn[i] - avgMagn); | 
|  | } | 
|  | covMagnPause = covMagnPause / ((float)inst->magnLen); | 
|  | varPause = varPause / ((float)inst->magnLen); | 
|  | varMagn = varMagn / ((float)inst->magnLen); | 
|  | // update of average magnitude spectrum | 
|  | inst->featureData[6] += inst->signalEnergy; | 
|  |  | 
|  | avgDiffNormMagn = varMagn - (covMagnPause * covMagnPause) / (varPause + (float)0.0001); | 
|  | // normalize and compute time-avg update of difference feature | 
|  | avgDiffNormMagn = (float)(avgDiffNormMagn / (inst->featureData[5] + (float)0.0001)); | 
|  | inst->featureData[4] += SPECT_DIFF_TAVG * (avgDiffNormMagn - inst->featureData[4]); | 
|  | } | 
|  |  | 
|  | // Compute speech/noise probability | 
|  | // speech/noise probability is returned in: probSpeechFinal | 
|  | //magn is the input magnitude spectrum | 
|  | //noise is the noise spectrum | 
|  | //snrLocPrior is the prior snr for each freq. | 
|  | //snr loc_post is the post snr for each freq. | 
|  | void WebRtcNs_SpeechNoiseProb(NSinst_t* inst, float* probSpeechFinal, float* snrLocPrior, | 
|  | float* snrLocPost) { | 
|  | int i, sgnMap; | 
|  | float invLrt, gainPrior, indPrior; | 
|  | float logLrtTimeAvgKsum, besselTmp; | 
|  | float indicator0, indicator1, indicator2; | 
|  | float tmpFloat1, tmpFloat2; | 
|  | float weightIndPrior0, weightIndPrior1, weightIndPrior2; | 
|  | float threshPrior0, threshPrior1, threshPrior2; | 
|  | float widthPrior, widthPrior0, widthPrior1, widthPrior2; | 
|  |  | 
|  | widthPrior0 = WIDTH_PR_MAP; | 
|  | widthPrior1 = (float)2.0 * WIDTH_PR_MAP; //width for pause region: | 
|  | // lower range, so increase width in tanh map | 
|  | widthPrior2 = (float)2.0 * WIDTH_PR_MAP; //for spectral-difference measure | 
|  |  | 
|  | //threshold parameters for features | 
|  | threshPrior0 = inst->priorModelPars[0]; | 
|  | threshPrior1 = inst->priorModelPars[1]; | 
|  | threshPrior2 = inst->priorModelPars[3]; | 
|  |  | 
|  | //sign for flatness feature | 
|  | sgnMap = (int)(inst->priorModelPars[2]); | 
|  |  | 
|  | //weight parameters for features | 
|  | weightIndPrior0 = inst->priorModelPars[4]; | 
|  | weightIndPrior1 = inst->priorModelPars[5]; | 
|  | weightIndPrior2 = inst->priorModelPars[6]; | 
|  |  | 
|  | // compute feature based on average LR factor | 
|  | // this is the average over all frequencies of the smooth log lrt | 
|  | logLrtTimeAvgKsum = 0.0; | 
|  | for (i = 0; i < inst->magnLen; i++) { | 
|  | tmpFloat1 = (float)1.0 + (float)2.0 * snrLocPrior[i]; | 
|  | tmpFloat2 = (float)2.0 * snrLocPrior[i] / (tmpFloat1 + (float)0.0001); | 
|  | besselTmp = (snrLocPost[i] + (float)1.0) * tmpFloat2; | 
|  | inst->logLrtTimeAvg[i] += LRT_TAVG * (besselTmp - (float)log(tmpFloat1) | 
|  | - inst->logLrtTimeAvg[i]); | 
|  | logLrtTimeAvgKsum += inst->logLrtTimeAvg[i]; | 
|  | } | 
|  | logLrtTimeAvgKsum = (float)logLrtTimeAvgKsum / (inst->magnLen); | 
|  | inst->featureData[3] = logLrtTimeAvgKsum; | 
|  | // done with computation of LR factor | 
|  |  | 
|  | // | 
|  | //compute the indicator functions | 
|  | // | 
|  |  | 
|  | // average lrt feature | 
|  | widthPrior = widthPrior0; | 
|  | //use larger width in tanh map for pause regions | 
|  | if (logLrtTimeAvgKsum < threshPrior0) { | 
|  | widthPrior = widthPrior1; | 
|  | } | 
|  | // compute indicator function: sigmoid map | 
|  | indicator0 = (float)0.5 * ((float)tanh(widthPrior * | 
|  | (logLrtTimeAvgKsum - threshPrior0)) + (float)1.0); | 
|  |  | 
|  | //spectral flatness feature | 
|  | tmpFloat1 = inst->featureData[0]; | 
|  | widthPrior = widthPrior0; | 
|  | //use larger width in tanh map for pause regions | 
|  | if (sgnMap == 1 && (tmpFloat1 > threshPrior1)) { | 
|  | widthPrior = widthPrior1; | 
|  | } | 
|  | if (sgnMap == -1 && (tmpFloat1 < threshPrior1)) { | 
|  | widthPrior = widthPrior1; | 
|  | } | 
|  | // compute indicator function: sigmoid map | 
|  | indicator1 = (float)0.5 * ((float)tanh((float)sgnMap * | 
|  | widthPrior * (threshPrior1 - tmpFloat1)) + (float)1.0); | 
|  |  | 
|  | //for template spectrum-difference | 
|  | tmpFloat1 = inst->featureData[4]; | 
|  | widthPrior = widthPrior0; | 
|  | //use larger width in tanh map for pause regions | 
|  | if (tmpFloat1 < threshPrior2) { | 
|  | widthPrior = widthPrior2; | 
|  | } | 
|  | // compute indicator function: sigmoid map | 
|  | indicator2 = (float)0.5 * ((float)tanh(widthPrior * (tmpFloat1 - threshPrior2)) | 
|  | + (float)1.0); | 
|  |  | 
|  | //combine the indicator function with the feature weights | 
|  | indPrior = weightIndPrior0 * indicator0 + weightIndPrior1 * indicator1 + weightIndPrior2 | 
|  | * indicator2; | 
|  | // done with computing indicator function | 
|  |  | 
|  | //compute the prior probability | 
|  | inst->priorSpeechProb += PRIOR_UPDATE * (indPrior - inst->priorSpeechProb); | 
|  | // make sure probabilities are within range: keep floor to 0.01 | 
|  | if (inst->priorSpeechProb > 1.0) { | 
|  | inst->priorSpeechProb = (float)1.0; | 
|  | } | 
|  | if (inst->priorSpeechProb < 0.01) { | 
|  | inst->priorSpeechProb = (float)0.01; | 
|  | } | 
|  |  | 
|  | //final speech probability: combine prior model with LR factor: | 
|  | gainPrior = ((float)1.0 - inst->priorSpeechProb) / (inst->priorSpeechProb + (float)0.0001); | 
|  | for (i = 0; i < inst->magnLen; i++) { | 
|  | invLrt = (float)exp(-inst->logLrtTimeAvg[i]); | 
|  | invLrt = (float)gainPrior * invLrt; | 
|  | probSpeechFinal[i] = (float)1.0 / ((float)1.0 + invLrt); | 
|  | } | 
|  | } | 
|  |  | 
|  | int WebRtcNs_ProcessCore(NSinst_t* inst, | 
|  | short* speechFrame, | 
|  | short* speechFrameHB, | 
|  | short* outFrame, | 
|  | short* outFrameHB) { | 
|  | // main routine for noise reduction | 
|  |  | 
|  | int     flagHB = 0; | 
|  | int     i; | 
|  | const int kStartBand = 5; // Skip first frequency bins during estimation. | 
|  | int     updateParsFlag; | 
|  |  | 
|  | float   energy1, energy2, gain, factor, factor1, factor2; | 
|  | float   signalEnergy, sumMagn; | 
|  | float   snrPrior, currentEstimateStsa; | 
|  | float   tmpFloat1, tmpFloat2, tmpFloat3, probSpeech, probNonSpeech; | 
|  | float   gammaNoiseTmp, gammaNoiseOld; | 
|  | float   noiseUpdateTmp, fTmp, dTmp; | 
|  | float   fin[BLOCKL_MAX], fout[BLOCKL_MAX]; | 
|  | float   winData[ANAL_BLOCKL_MAX]; | 
|  | float   magn[HALF_ANAL_BLOCKL], noise[HALF_ANAL_BLOCKL]; | 
|  | float   theFilter[HALF_ANAL_BLOCKL], theFilterTmp[HALF_ANAL_BLOCKL]; | 
|  | float   snrLocPost[HALF_ANAL_BLOCKL], snrLocPrior[HALF_ANAL_BLOCKL]; | 
|  | float   probSpeechFinal[HALF_ANAL_BLOCKL] = { 0 }; | 
|  | float   previousEstimateStsa[HALF_ANAL_BLOCKL]; | 
|  | float   real[ANAL_BLOCKL_MAX], imag[HALF_ANAL_BLOCKL]; | 
|  | // Variables during startup | 
|  | float   sum_log_i = 0.0; | 
|  | float   sum_log_i_square = 0.0; | 
|  | float   sum_log_magn = 0.0; | 
|  | float   sum_log_i_log_magn = 0.0; | 
|  | float   parametric_noise = 0.0; | 
|  | float   parametric_exp = 0.0; | 
|  | float   parametric_num = 0.0; | 
|  |  | 
|  | // SWB variables | 
|  | int     deltaBweHB = 1; | 
|  | int     deltaGainHB = 1; | 
|  | float   decayBweHB = 1.0; | 
|  | float   gainMapParHB = 1.0; | 
|  | float   gainTimeDomainHB = 1.0; | 
|  | float   avgProbSpeechHB, avgProbSpeechHBTmp, avgFilterGainHB, gainModHB; | 
|  |  | 
|  | // Check that initiation has been done | 
|  | if (inst->initFlag != 1) { | 
|  | return (-1); | 
|  | } | 
|  | // Check for valid pointers based on sampling rate | 
|  | if (inst->fs == 32000) { | 
|  | if (speechFrameHB == NULL) { | 
|  | return -1; | 
|  | } | 
|  | flagHB = 1; | 
|  | // range for averaging low band quantities for H band gain | 
|  | deltaBweHB = (int)inst->magnLen / 4; | 
|  | deltaGainHB = deltaBweHB; | 
|  | } | 
|  | // | 
|  | updateParsFlag = inst->modelUpdatePars[0]; | 
|  | // | 
|  |  | 
|  | //for LB do all processing | 
|  | // convert to float | 
|  | for (i = 0; i < inst->blockLen10ms; i++) { | 
|  | fin[i] = (float)speechFrame[i]; | 
|  | } | 
|  | // update analysis buffer for L band | 
|  | memcpy(inst->dataBuf, inst->dataBuf + inst->blockLen10ms, | 
|  | sizeof(float) * (inst->anaLen - inst->blockLen10ms)); | 
|  | memcpy(inst->dataBuf + inst->anaLen - inst->blockLen10ms, fin, | 
|  | sizeof(float) * inst->blockLen10ms); | 
|  |  | 
|  | if (flagHB == 1) { | 
|  | // convert to float | 
|  | for (i = 0; i < inst->blockLen10ms; i++) { | 
|  | fin[i] = (float)speechFrameHB[i]; | 
|  | } | 
|  | // update analysis buffer for H band | 
|  | memcpy(inst->dataBufHB, inst->dataBufHB + inst->blockLen10ms, | 
|  | sizeof(float) * (inst->anaLen - inst->blockLen10ms)); | 
|  | memcpy(inst->dataBufHB + inst->anaLen - inst->blockLen10ms, fin, | 
|  | sizeof(float) * inst->blockLen10ms); | 
|  | } | 
|  |  | 
|  | // check if processing needed | 
|  | if (inst->outLen == 0) { | 
|  | // windowing | 
|  | energy1 = 0.0; | 
|  | for (i = 0; i < inst->anaLen; i++) { | 
|  | winData[i] = inst->window[i] * inst->dataBuf[i]; | 
|  | energy1 += winData[i] * winData[i]; | 
|  | } | 
|  | if (energy1 == 0.0) { | 
|  | // synthesize the special case of zero input | 
|  | // we want to avoid updating statistics in this case: | 
|  | // Updating feature statistics when we have zeros only will cause thresholds to | 
|  | // move towards zero signal situations. This in turn has the effect that once the | 
|  | // signal is "turned on" (non-zero values) everything will be treated as speech | 
|  | // and there is no noise suppression effect. Depending on the duration of the | 
|  | // inactive signal it takes a considerable amount of time for the system to learn | 
|  | // what is noise and what is speech. | 
|  |  | 
|  | // read out fully processed segment | 
|  | for (i = inst->windShift; i < inst->blockLen + inst->windShift; i++) { | 
|  | fout[i - inst->windShift] = inst->syntBuf[i]; | 
|  | } | 
|  | // update synthesis buffer | 
|  | memcpy(inst->syntBuf, inst->syntBuf + inst->blockLen, | 
|  | sizeof(float) * (inst->anaLen - inst->blockLen)); | 
|  | memset(inst->syntBuf + inst->anaLen - inst->blockLen, 0, | 
|  | sizeof(float) * inst->blockLen); | 
|  |  | 
|  | // out buffer | 
|  | inst->outLen = inst->blockLen - inst->blockLen10ms; | 
|  | if (inst->blockLen > inst->blockLen10ms) { | 
|  | for (i = 0; i < inst->outLen; i++) { | 
|  | inst->outBuf[i] = fout[i + inst->blockLen10ms]; | 
|  | } | 
|  | } | 
|  | // convert to short | 
|  | for (i = 0; i < inst->blockLen10ms; i++) { | 
|  | dTmp = fout[i]; | 
|  | if (dTmp < WEBRTC_SPL_WORD16_MIN) { | 
|  | dTmp = WEBRTC_SPL_WORD16_MIN; | 
|  | } else if (dTmp > WEBRTC_SPL_WORD16_MAX) { | 
|  | dTmp = WEBRTC_SPL_WORD16_MAX; | 
|  | } | 
|  | outFrame[i] = (short)dTmp; | 
|  | } | 
|  |  | 
|  | // for time-domain gain of HB | 
|  | if (flagHB == 1) { | 
|  | for (i = 0; i < inst->blockLen10ms; i++) { | 
|  | dTmp = inst->dataBufHB[i]; | 
|  | if (dTmp < WEBRTC_SPL_WORD16_MIN) { | 
|  | dTmp = WEBRTC_SPL_WORD16_MIN; | 
|  | } else if (dTmp > WEBRTC_SPL_WORD16_MAX) { | 
|  | dTmp = WEBRTC_SPL_WORD16_MAX; | 
|  | } | 
|  | outFrameHB[i] = (short)dTmp; | 
|  | } | 
|  | } // end of H band gain computation | 
|  | // | 
|  | return 0; | 
|  | } | 
|  |  | 
|  | // | 
|  | inst->blockInd++; // Update the block index only when we process a block. | 
|  | // FFT | 
|  | WebRtc_rdft(inst->anaLen, 1, winData, inst->ip, inst->wfft); | 
|  |  | 
|  | imag[0] = 0; | 
|  | real[0] = winData[0]; | 
|  | magn[0] = (float)(fabs(real[0]) + 1.0f); | 
|  | imag[inst->magnLen - 1] = 0; | 
|  | real[inst->magnLen - 1] = winData[1]; | 
|  | magn[inst->magnLen - 1] = (float)(fabs(real[inst->magnLen - 1]) + 1.0f); | 
|  | signalEnergy = (float)(real[0] * real[0]) + | 
|  | (float)(real[inst->magnLen - 1] * real[inst->magnLen - 1]); | 
|  | sumMagn = magn[0] + magn[inst->magnLen - 1]; | 
|  | if (inst->blockInd < END_STARTUP_SHORT) { | 
|  | inst->initMagnEst[0] += magn[0]; | 
|  | inst->initMagnEst[inst->magnLen - 1] += magn[inst->magnLen - 1]; | 
|  | tmpFloat2 = log((float)(inst->magnLen - 1)); | 
|  | sum_log_i = tmpFloat2; | 
|  | sum_log_i_square = tmpFloat2 * tmpFloat2; | 
|  | tmpFloat1 = log(magn[inst->magnLen - 1]); | 
|  | sum_log_magn = tmpFloat1; | 
|  | sum_log_i_log_magn = tmpFloat2 * tmpFloat1; | 
|  | } | 
|  | for (i = 1; i < inst->magnLen - 1; i++) { | 
|  | real[i] = winData[2 * i]; | 
|  | imag[i] = winData[2 * i + 1]; | 
|  | // magnitude spectrum | 
|  | fTmp = real[i] * real[i]; | 
|  | fTmp += imag[i] * imag[i]; | 
|  | signalEnergy += fTmp; | 
|  | magn[i] = ((float)sqrt(fTmp)) + 1.0f; | 
|  | sumMagn += magn[i]; | 
|  | if (inst->blockInd < END_STARTUP_SHORT) { | 
|  | inst->initMagnEst[i] += magn[i]; | 
|  | if (i >= kStartBand) { | 
|  | tmpFloat2 = log((float)i); | 
|  | sum_log_i += tmpFloat2; | 
|  | sum_log_i_square += tmpFloat2 * tmpFloat2; | 
|  | tmpFloat1 = log(magn[i]); | 
|  | sum_log_magn += tmpFloat1; | 
|  | sum_log_i_log_magn += tmpFloat2 * tmpFloat1; | 
|  | } | 
|  | } | 
|  | } | 
|  | signalEnergy = signalEnergy / ((float)inst->magnLen); | 
|  | inst->signalEnergy = signalEnergy; | 
|  | inst->sumMagn = sumMagn; | 
|  |  | 
|  | //compute spectral flatness on input spectrum | 
|  | WebRtcNs_ComputeSpectralFlatness(inst, magn); | 
|  | // quantile noise estimate | 
|  | WebRtcNs_NoiseEstimation(inst, magn, noise); | 
|  | //compute simplified noise model during startup | 
|  | if (inst->blockInd < END_STARTUP_SHORT) { | 
|  | // Estimate White noise | 
|  | inst->whiteNoiseLevel += sumMagn / ((float)inst->magnLen) * inst->overdrive; | 
|  | // Estimate Pink noise parameters | 
|  | tmpFloat1 = sum_log_i_square * ((float)(inst->magnLen - kStartBand)); | 
|  | tmpFloat1 -= (sum_log_i * sum_log_i); | 
|  | tmpFloat2 = (sum_log_i_square * sum_log_magn - sum_log_i * sum_log_i_log_magn); | 
|  | tmpFloat3 = tmpFloat2 / tmpFloat1; | 
|  | // Constrain the estimated spectrum to be positive | 
|  | if (tmpFloat3 < 0.0f) { | 
|  | tmpFloat3 = 0.0f; | 
|  | } | 
|  | inst->pinkNoiseNumerator += tmpFloat3; | 
|  | tmpFloat2 = (sum_log_i * sum_log_magn); | 
|  | tmpFloat2 -= ((float)(inst->magnLen - kStartBand)) * sum_log_i_log_magn; | 
|  | tmpFloat3 = tmpFloat2 / tmpFloat1; | 
|  | // Constrain the pink noise power to be in the interval [0, 1]; | 
|  | if (tmpFloat3 < 0.0f) { | 
|  | tmpFloat3 = 0.0f; | 
|  | } | 
|  | if (tmpFloat3 > 1.0f) { | 
|  | tmpFloat3 = 1.0f; | 
|  | } | 
|  | inst->pinkNoiseExp += tmpFloat3; | 
|  |  | 
|  | // Calculate frequency independent parts of parametric noise estimate. | 
|  | if (inst->pinkNoiseExp == 0.0f) { | 
|  | // Use white noise estimate | 
|  | parametric_noise = inst->whiteNoiseLevel; | 
|  | } else { | 
|  | // Use pink noise estimate | 
|  | parametric_num = exp(inst->pinkNoiseNumerator / (float)(inst->blockInd + 1)); | 
|  | parametric_num *= (float)(inst->blockInd + 1); | 
|  | parametric_exp = inst->pinkNoiseExp / (float)(inst->blockInd + 1); | 
|  | parametric_noise = parametric_num / pow((float)kStartBand, parametric_exp); | 
|  | } | 
|  | for (i = 0; i < inst->magnLen; i++) { | 
|  | // Estimate the background noise using the white and pink noise parameters | 
|  | if ((inst->pinkNoiseExp > 0.0f) && (i >= kStartBand)) { | 
|  | // Use pink noise estimate | 
|  | parametric_noise = parametric_num / pow((float)i, parametric_exp); | 
|  | } | 
|  | theFilterTmp[i] = (inst->initMagnEst[i] - inst->overdrive * parametric_noise); | 
|  | theFilterTmp[i] /= (inst->initMagnEst[i] + (float)0.0001); | 
|  | // Weight quantile noise with modeled noise | 
|  | noise[i] *= (inst->blockInd); | 
|  | tmpFloat2 = parametric_noise * (END_STARTUP_SHORT - inst->blockInd); | 
|  | noise[i] += (tmpFloat2 / (float)(inst->blockInd + 1)); | 
|  | noise[i] /= END_STARTUP_SHORT; | 
|  | } | 
|  | } | 
|  | //compute average signal during END_STARTUP_LONG time: | 
|  | // used to normalize spectral difference measure | 
|  | if (inst->blockInd < END_STARTUP_LONG) { | 
|  | inst->featureData[5] *= inst->blockInd; | 
|  | inst->featureData[5] += signalEnergy; | 
|  | inst->featureData[5] /= (inst->blockInd + 1); | 
|  | } | 
|  |  | 
|  | #ifdef PROCESS_FLOW_0 | 
|  | if (inst->blockInd > END_STARTUP_LONG) { | 
|  | //option: average the quantile noise: for check with AEC2 | 
|  | for (i = 0; i < inst->magnLen; i++) { | 
|  | noise[i] = (float)0.6 * inst->noisePrev[i] + (float)0.4 * noise[i]; | 
|  | } | 
|  | for (i = 0; i < inst->magnLen; i++) { | 
|  | // Wiener with over sub-substraction: | 
|  | theFilter[i] = (magn[i] - inst->overdrive * noise[i]) / (magn[i] + (float)0.0001); | 
|  | } | 
|  | } | 
|  | #else | 
|  | //start processing at frames == converged+1 | 
|  | // | 
|  | // STEP 1: compute  prior and post snr based on quantile noise est | 
|  | // | 
|  |  | 
|  | // compute DD estimate of prior SNR: needed for new method | 
|  | for (i = 0; i < inst->magnLen; i++) { | 
|  | // post snr | 
|  | snrLocPost[i] = (float)0.0; | 
|  | if (magn[i] > noise[i]) { | 
|  | snrLocPost[i] = magn[i] / (noise[i] + (float)0.0001) - (float)1.0; | 
|  | } | 
|  | // previous post snr | 
|  | // previous estimate: based on previous frame with gain filter | 
|  | previousEstimateStsa[i] = inst->magnPrev[i] / (inst->noisePrev[i] + (float)0.0001) | 
|  | * (inst->smooth[i]); | 
|  | // DD estimate is sum of two terms: current estimate and previous estimate | 
|  | // directed decision update of snrPrior | 
|  | snrLocPrior[i] = DD_PR_SNR * previousEstimateStsa[i] + ((float)1.0 - DD_PR_SNR) | 
|  | * snrLocPost[i]; | 
|  | // post and prior snr needed for step 2 | 
|  | } // end of loop over freqs | 
|  | #ifdef PROCESS_FLOW_1 | 
|  | for (i = 0; i < inst->magnLen; i++) { | 
|  | // gain filter | 
|  | tmpFloat1 = inst->overdrive + snrLocPrior[i]; | 
|  | tmpFloat2 = (float)snrLocPrior[i] / tmpFloat1; | 
|  | theFilter[i] = (float)tmpFloat2; | 
|  | } // end of loop over freqs | 
|  | #endif | 
|  | // done with step 1: dd computation of prior and post snr | 
|  |  | 
|  | // | 
|  | //STEP 2: compute speech/noise likelihood | 
|  | // | 
|  | #ifdef PROCESS_FLOW_2 | 
|  | // compute difference of input spectrum with learned/estimated noise spectrum | 
|  | WebRtcNs_ComputeSpectralDifference(inst, magn); | 
|  | // compute histograms for parameter decisions (thresholds and weights for features) | 
|  | // parameters are extracted once every window time (=inst->modelUpdatePars[1]) | 
|  | if (updateParsFlag >= 1) { | 
|  | // counter update | 
|  | inst->modelUpdatePars[3]--; | 
|  | // update histogram | 
|  | if (inst->modelUpdatePars[3] > 0) { | 
|  | WebRtcNs_FeatureParameterExtraction(inst, 0); | 
|  | } | 
|  | // compute model parameters | 
|  | if (inst->modelUpdatePars[3] == 0) { | 
|  | WebRtcNs_FeatureParameterExtraction(inst, 1); | 
|  | inst->modelUpdatePars[3] = inst->modelUpdatePars[1]; | 
|  | // if wish to update only once, set flag to zero | 
|  | if (updateParsFlag == 1) { | 
|  | inst->modelUpdatePars[0] = 0; | 
|  | } else { | 
|  | // update every window: | 
|  | // get normalization for spectral difference for next window estimate | 
|  | inst->featureData[6] = inst->featureData[6] | 
|  | / ((float)inst->modelUpdatePars[1]); | 
|  | inst->featureData[5] = (float)0.5 * (inst->featureData[6] | 
|  | + inst->featureData[5]); | 
|  | inst->featureData[6] = (float)0.0; | 
|  | } | 
|  | } | 
|  | } | 
|  | // compute speech/noise probability | 
|  | WebRtcNs_SpeechNoiseProb(inst, probSpeechFinal, snrLocPrior, snrLocPost); | 
|  | // time-avg parameter for noise update | 
|  | gammaNoiseTmp = NOISE_UPDATE; | 
|  | for (i = 0; i < inst->magnLen; i++) { | 
|  | probSpeech = probSpeechFinal[i]; | 
|  | probNonSpeech = (float)1.0 - probSpeech; | 
|  | // temporary noise update: | 
|  | // use it for speech frames if update value is less than previous | 
|  | noiseUpdateTmp = gammaNoiseTmp * inst->noisePrev[i] + ((float)1.0 - gammaNoiseTmp) | 
|  | * (probNonSpeech * magn[i] + probSpeech * inst->noisePrev[i]); | 
|  | // | 
|  | // time-constant based on speech/noise state | 
|  | gammaNoiseOld = gammaNoiseTmp; | 
|  | gammaNoiseTmp = NOISE_UPDATE; | 
|  | // increase gamma (i.e., less noise update) for frame likely to be speech | 
|  | if (probSpeech > PROB_RANGE) { | 
|  | gammaNoiseTmp = SPEECH_UPDATE; | 
|  | } | 
|  | // conservative noise update | 
|  | if (probSpeech < PROB_RANGE) { | 
|  | inst->magnAvgPause[i] += GAMMA_PAUSE * (magn[i] - inst->magnAvgPause[i]); | 
|  | } | 
|  | // noise update | 
|  | if (gammaNoiseTmp == gammaNoiseOld) { | 
|  | noise[i] = noiseUpdateTmp; | 
|  | } else { | 
|  | noise[i] = gammaNoiseTmp * inst->noisePrev[i] + ((float)1.0 - gammaNoiseTmp) | 
|  | * (probNonSpeech * magn[i] + probSpeech * inst->noisePrev[i]); | 
|  | // allow for noise update downwards: | 
|  | //  if noise update decreases the noise, it is safe, so allow it to happen | 
|  | if (noiseUpdateTmp < noise[i]) { | 
|  | noise[i] = noiseUpdateTmp; | 
|  | } | 
|  | } | 
|  | } // end of freq loop | 
|  | // done with step 2: noise update | 
|  |  | 
|  | // | 
|  | // STEP 3: compute dd update of prior snr and post snr based on new noise estimate | 
|  | // | 
|  | for (i = 0; i < inst->magnLen; i++) { | 
|  | // post and prior snr | 
|  | currentEstimateStsa = (float)0.0; | 
|  | if (magn[i] > noise[i]) { | 
|  | currentEstimateStsa = magn[i] / (noise[i] + (float)0.0001) - (float)1.0; | 
|  | } | 
|  | // DD estimate is sume of two terms: current estimate and previous estimate | 
|  | // directed decision update of snrPrior | 
|  | snrPrior = DD_PR_SNR * previousEstimateStsa[i] + ((float)1.0 - DD_PR_SNR) | 
|  | * currentEstimateStsa; | 
|  | // gain filter | 
|  | tmpFloat1 = inst->overdrive + snrPrior; | 
|  | tmpFloat2 = (float)snrPrior / tmpFloat1; | 
|  | theFilter[i] = (float)tmpFloat2; | 
|  | } // end of loop over freqs | 
|  | // done with step3 | 
|  | #endif | 
|  | #endif | 
|  |  | 
|  | for (i = 0; i < inst->magnLen; i++) { | 
|  | // flooring bottom | 
|  | if (theFilter[i] < inst->denoiseBound) { | 
|  | theFilter[i] = inst->denoiseBound; | 
|  | } | 
|  | // flooring top | 
|  | if (theFilter[i] > (float)1.0) { | 
|  | theFilter[i] = 1.0; | 
|  | } | 
|  | if (inst->blockInd < END_STARTUP_SHORT) { | 
|  | // flooring bottom | 
|  | if (theFilterTmp[i] < inst->denoiseBound) { | 
|  | theFilterTmp[i] = inst->denoiseBound; | 
|  | } | 
|  | // flooring top | 
|  | if (theFilterTmp[i] > (float)1.0) { | 
|  | theFilterTmp[i] = 1.0; | 
|  | } | 
|  | // Weight the two suppression filters | 
|  | theFilter[i] *= (inst->blockInd); | 
|  | theFilterTmp[i] *= (END_STARTUP_SHORT - inst->blockInd); | 
|  | theFilter[i] += theFilterTmp[i]; | 
|  | theFilter[i] /= (END_STARTUP_SHORT); | 
|  | } | 
|  | // smoothing | 
|  | #ifdef PROCESS_FLOW_0 | 
|  | inst->smooth[i] *= SMOOTH; // value set to 0.7 in define.h file | 
|  | inst->smooth[i] += ((float)1.0 - SMOOTH) * theFilter[i]; | 
|  | #else | 
|  | inst->smooth[i] = theFilter[i]; | 
|  | #endif | 
|  | real[i] *= inst->smooth[i]; | 
|  | imag[i] *= inst->smooth[i]; | 
|  | } | 
|  | // keep track of noise and magn spectrum for next frame | 
|  | for (i = 0; i < inst->magnLen; i++) { | 
|  | inst->noisePrev[i] = noise[i]; | 
|  | inst->magnPrev[i] = magn[i]; | 
|  | } | 
|  | // back to time domain | 
|  | winData[0] = real[0]; | 
|  | winData[1] = real[inst->magnLen - 1]; | 
|  | for (i = 1; i < inst->magnLen - 1; i++) { | 
|  | winData[2 * i] = real[i]; | 
|  | winData[2 * i + 1] = imag[i]; | 
|  | } | 
|  | WebRtc_rdft(inst->anaLen, -1, winData, inst->ip, inst->wfft); | 
|  |  | 
|  | for (i = 0; i < inst->anaLen; i++) { | 
|  | real[i] = 2.0f * winData[i] / inst->anaLen; // fft scaling | 
|  | } | 
|  |  | 
|  | //scale factor: only do it after END_STARTUP_LONG time | 
|  | factor = (float)1.0; | 
|  | if (inst->gainmap == 1 && inst->blockInd > END_STARTUP_LONG) { | 
|  | factor1 = (float)1.0; | 
|  | factor2 = (float)1.0; | 
|  |  | 
|  | energy2 = 0.0; | 
|  | for (i = 0; i < inst->anaLen; i++) { | 
|  | energy2 += (float)real[i] * (float)real[i]; | 
|  | } | 
|  | gain = (float)sqrt(energy2 / (energy1 + (float)1.0)); | 
|  |  | 
|  | #ifdef PROCESS_FLOW_2 | 
|  | // scaling for new version | 
|  | if (gain > B_LIM) { | 
|  | factor1 = (float)1.0 + (float)1.3 * (gain - B_LIM); | 
|  | if (gain * factor1 > (float)1.0) { | 
|  | factor1 = (float)1.0 / gain; | 
|  | } | 
|  | } | 
|  | if (gain < B_LIM) { | 
|  | //don't reduce scale too much for pause regions: | 
|  | // attenuation here should be controlled by flooring | 
|  | if (gain <= inst->denoiseBound) { | 
|  | gain = inst->denoiseBound; | 
|  | } | 
|  | factor2 = (float)1.0 - (float)0.3 * (B_LIM - gain); | 
|  | } | 
|  | //combine both scales with speech/noise prob: | 
|  | // note prior (priorSpeechProb) is not frequency dependent | 
|  | factor = inst->priorSpeechProb * factor1 + ((float)1.0 - inst->priorSpeechProb) | 
|  | * factor2; | 
|  | #else | 
|  | if (gain > B_LIM) { | 
|  | factor = (float)1.0 + (float)1.3 * (gain - B_LIM); | 
|  | } else { | 
|  | factor = (float)1.0 + (float)2.0 * (gain - B_LIM); | 
|  | } | 
|  | if (gain * factor > (float)1.0) { | 
|  | factor = (float)1.0 / gain; | 
|  | } | 
|  | #endif | 
|  | } // out of inst->gainmap==1 | 
|  |  | 
|  | // synthesis | 
|  | for (i = 0; i < inst->anaLen; i++) { | 
|  | inst->syntBuf[i] += factor * inst->window[i] * (float)real[i]; | 
|  | } | 
|  | // read out fully processed segment | 
|  | for (i = inst->windShift; i < inst->blockLen + inst->windShift; i++) { | 
|  | fout[i - inst->windShift] = inst->syntBuf[i]; | 
|  | } | 
|  | // update synthesis buffer | 
|  | memcpy(inst->syntBuf, inst->syntBuf + inst->blockLen, | 
|  | sizeof(float) * (inst->anaLen - inst->blockLen)); | 
|  | memset(inst->syntBuf + inst->anaLen - inst->blockLen, 0, | 
|  | sizeof(float) * inst->blockLen); | 
|  |  | 
|  | // out buffer | 
|  | inst->outLen = inst->blockLen - inst->blockLen10ms; | 
|  | if (inst->blockLen > inst->blockLen10ms) { | 
|  | for (i = 0; i < inst->outLen; i++) { | 
|  | inst->outBuf[i] = fout[i + inst->blockLen10ms]; | 
|  | } | 
|  | } | 
|  | } // end of if out.len==0 | 
|  | else { | 
|  | for (i = 0; i < inst->blockLen10ms; i++) { | 
|  | fout[i] = inst->outBuf[i]; | 
|  | } | 
|  | memcpy(inst->outBuf, inst->outBuf + inst->blockLen10ms, | 
|  | sizeof(float) * (inst->outLen - inst->blockLen10ms)); | 
|  | memset(inst->outBuf + inst->outLen - inst->blockLen10ms, 0, | 
|  | sizeof(float) * inst->blockLen10ms); | 
|  | inst->outLen -= inst->blockLen10ms; | 
|  | } | 
|  |  | 
|  | // convert to short | 
|  | for (i = 0; i < inst->blockLen10ms; i++) { | 
|  | dTmp = fout[i]; | 
|  | if (dTmp < WEBRTC_SPL_WORD16_MIN) { | 
|  | dTmp = WEBRTC_SPL_WORD16_MIN; | 
|  | } else if (dTmp > WEBRTC_SPL_WORD16_MAX) { | 
|  | dTmp = WEBRTC_SPL_WORD16_MAX; | 
|  | } | 
|  | outFrame[i] = (short)dTmp; | 
|  | } | 
|  |  | 
|  | // for time-domain gain of HB | 
|  | if (flagHB == 1) { | 
|  | for (i = 0; i < inst->magnLen; i++) { | 
|  | inst->speechProbHB[i] = probSpeechFinal[i]; | 
|  | } | 
|  | // average speech prob from low band | 
|  | // avg over second half (i.e., 4->8kHz) of freq. spectrum | 
|  | avgProbSpeechHB = 0.0; | 
|  | for (i = inst->magnLen - deltaBweHB - 1; i < inst->magnLen - 1; i++) { | 
|  | avgProbSpeechHB += inst->speechProbHB[i]; | 
|  | } | 
|  | avgProbSpeechHB = avgProbSpeechHB / ((float)deltaBweHB); | 
|  | // average filter gain from low band | 
|  | // average over second half (i.e., 4->8kHz) of freq. spectrum | 
|  | avgFilterGainHB = 0.0; | 
|  | for (i = inst->magnLen - deltaGainHB - 1; i < inst->magnLen - 1; i++) { | 
|  | avgFilterGainHB += inst->smooth[i]; | 
|  | } | 
|  | avgFilterGainHB = avgFilterGainHB / ((float)(deltaGainHB)); | 
|  | avgProbSpeechHBTmp = (float)2.0 * avgProbSpeechHB - (float)1.0; | 
|  | // gain based on speech prob: | 
|  | gainModHB = (float)0.5 * ((float)1.0 + (float)tanh(gainMapParHB * avgProbSpeechHBTmp)); | 
|  | //combine gain with low band gain | 
|  | gainTimeDomainHB = (float)0.5 * gainModHB + (float)0.5 * avgFilterGainHB; | 
|  | if (avgProbSpeechHB >= (float)0.5) { | 
|  | gainTimeDomainHB = (float)0.25 * gainModHB + (float)0.75 * avgFilterGainHB; | 
|  | } | 
|  | gainTimeDomainHB = gainTimeDomainHB * decayBweHB; | 
|  | //make sure gain is within flooring range | 
|  | // flooring bottom | 
|  | if (gainTimeDomainHB < inst->denoiseBound) { | 
|  | gainTimeDomainHB = inst->denoiseBound; | 
|  | } | 
|  | // flooring top | 
|  | if (gainTimeDomainHB > (float)1.0) { | 
|  | gainTimeDomainHB = 1.0; | 
|  | } | 
|  | //apply gain | 
|  | for (i = 0; i < inst->blockLen10ms; i++) { | 
|  | dTmp = gainTimeDomainHB * inst->dataBufHB[i]; | 
|  | if (dTmp < WEBRTC_SPL_WORD16_MIN) { | 
|  | dTmp = WEBRTC_SPL_WORD16_MIN; | 
|  | } else if (dTmp > WEBRTC_SPL_WORD16_MAX) { | 
|  | dTmp = WEBRTC_SPL_WORD16_MAX; | 
|  | } | 
|  | outFrameHB[i] = (short)dTmp; | 
|  | } | 
|  | } // end of H band gain computation | 
|  | // | 
|  |  | 
|  | return 0; | 
|  | } |