MIPS optimizations for NS audio processing module
R=andrew@webrtc.org
Review URL: https://webrtc-codereview.appspot.com/4139006
Patch from Ljubomir Papuga <lpapuga@mips.com>.
git-svn-id: http://webrtc.googlecode.com/svn/trunk@5393 4adac7df-926f-26a2-2b94-8c16560cd09d
diff --git a/webrtc/modules/audio_processing/audio_processing.gypi b/webrtc/modules/audio_processing/audio_processing.gypi
index 6512f03..1f3f705 100644
--- a/webrtc/modules/audio_processing/audio_processing.gypi
+++ b/webrtc/modules/audio_processing/audio_processing.gypi
@@ -101,6 +101,17 @@
'ns/nsx_core.h',
'ns/nsx_defines.h',
],
+ 'conditions': [
+ ['target_arch=="mipsel"', {
+ 'sources': [
+ 'ns/nsx_core_mips.c',
+ ],
+ }, {
+ 'sources': [
+ 'ns/nsx_core_c.c',
+ ],
+ }],
+ ],
}, {
'defines': ['WEBRTC_NS_FLOAT'],
'sources': [
diff --git a/webrtc/modules/audio_processing/ns/nsx_core.c b/webrtc/modules/audio_processing/ns/nsx_core.c
index c722957..e627a2e 100644
--- a/webrtc/modules/audio_processing/ns/nsx_core.c
+++ b/webrtc/modules/audio_processing/ns/nsx_core.c
@@ -70,11 +70,6 @@
// Skip first frequency bins during estimation. (0 <= value < 64)
static const int kStartBand = 5;
-static const int16_t kIndicatorTable[17] = {
- 0, 2017, 3809, 5227, 6258, 6963, 7424, 7718,
- 7901, 8014, 8084, 8126, 8152, 8168, 8177, 8183, 8187
-};
-
// hybrib Hanning & flat window
static const int16_t kBlocks80w128x[128] = {
0, 536, 1072, 1606, 2139, 2669, 3196, 3720, 4240, 4756, 5266,
@@ -481,7 +476,7 @@
}
// Denormalize the real-valued signal |in|, the output from inverse FFT.
-static __inline void Denormalize(NsxInst_t* inst, int16_t* in, int factor) {
+static void DenormalizeC(NsxInst_t* inst, int16_t* in, int factor) {
int i = 0;
int32_t tmp32 = 0;
for (i = 0; i < inst->anaLen; i += 1) {
@@ -546,9 +541,9 @@
}
// Normalize the real-valued signal |in|, the input to forward FFT.
-static __inline void NormalizeRealBuffer(NsxInst_t* inst,
- const int16_t* in,
- int16_t* out) {
+static void NormalizeRealBufferC(NsxInst_t* inst,
+ const int16_t* in,
+ int16_t* out) {
int i = 0;
for (i = 0; i < inst->anaLen; ++i) {
out[i] = WEBRTC_SPL_LSHIFT_W16(in[i], inst->normData); // Q(normData)
@@ -560,6 +555,8 @@
PrepareSpectrum WebRtcNsx_PrepareSpectrum;
SynthesisUpdate WebRtcNsx_SynthesisUpdate;
AnalysisUpdate WebRtcNsx_AnalysisUpdate;
+Denormalize WebRtcNsx_Denormalize;
+NormalizeRealBuffer WebRtcNsx_NormalizeRealBuffer;
#if (defined WEBRTC_DETECT_ARM_NEON || defined WEBRTC_ARCH_ARM_NEON)
// Initialize function pointers for ARM Neon platform.
@@ -571,6 +568,19 @@
}
#endif
+#if defined(MIPS32_LE)
+// Initialize function pointers for MIPS platform.
+static void WebRtcNsx_InitMips(void) {
+ WebRtcNsx_PrepareSpectrum = WebRtcNsx_PrepareSpectrum_mips;
+ WebRtcNsx_SynthesisUpdate = WebRtcNsx_SynthesisUpdate_mips;
+ WebRtcNsx_AnalysisUpdate = WebRtcNsx_AnalysisUpdate_mips;
+ WebRtcNsx_NormalizeRealBuffer = WebRtcNsx_NormalizeRealBuffer_mips;
+#if defined(MIPS_DSP_R1_LE)
+ WebRtcNsx_Denormalize = WebRtcNsx_Denormalize_mips;
+#endif
+}
+#endif
+
void WebRtcNsx_CalcParametricNoiseEstimate(NsxInst_t* inst,
int16_t pink_noise_exp_avg,
int32_t pink_noise_num_avg,
@@ -758,6 +768,8 @@
WebRtcNsx_PrepareSpectrum = PrepareSpectrumC;
WebRtcNsx_SynthesisUpdate = SynthesisUpdateC;
WebRtcNsx_AnalysisUpdate = AnalysisUpdateC;
+ WebRtcNsx_Denormalize = DenormalizeC;
+ WebRtcNsx_NormalizeRealBuffer = NormalizeRealBufferC;
#ifdef WEBRTC_DETECT_ARM_NEON
uint64_t features = WebRtc_GetCPUFeaturesARM();
@@ -768,6 +780,10 @@
WebRtcNsx_InitNeon();
#endif
+#if defined(MIPS32_LE)
+ WebRtcNsx_InitMips();
+#endif
+
inst->initFlag = 1;
return 0;
@@ -1169,239 +1185,6 @@
}
}
-// Compute speech/noise probability
-// speech/noise probability is returned in: probSpeechFinal
-//snrLocPrior is the prior SNR for each frequency (in Q11)
-//snrLocPost is the post SNR for each frequency (in Q11)
-void WebRtcNsx_SpeechNoiseProb(NsxInst_t* inst, uint16_t* nonSpeechProbFinal,
- uint32_t* priorLocSnr, uint32_t* postLocSnr) {
- uint32_t zeros, num, den, tmpU32no1, tmpU32no2, tmpU32no3;
-
- int32_t invLrtFX, indPriorFX, tmp32, tmp32no1, tmp32no2, besselTmpFX32;
- int32_t frac32, logTmp;
- int32_t logLrtTimeAvgKsumFX;
-
- int16_t indPriorFX16;
- int16_t tmp16, tmp16no1, tmp16no2, tmpIndFX, tableIndex, frac, intPart;
-
- int i, normTmp, normTmp2, nShifts;
-
- // compute feature based on average LR factor
- // this is the average over all frequencies of the smooth log LRT
- logLrtTimeAvgKsumFX = 0;
- for (i = 0; i < inst->magnLen; i++) {
- besselTmpFX32 = (int32_t)postLocSnr[i]; // Q11
- normTmp = WebRtcSpl_NormU32(postLocSnr[i]);
- num = WEBRTC_SPL_LSHIFT_U32(postLocSnr[i], normTmp); // Q(11+normTmp)
- if (normTmp > 10) {
- den = WEBRTC_SPL_LSHIFT_U32(priorLocSnr[i], normTmp - 11); // Q(normTmp)
- } else {
- den = WEBRTC_SPL_RSHIFT_U32(priorLocSnr[i], 11 - normTmp); // Q(normTmp)
- }
- if (den > 0) {
- besselTmpFX32 -= WEBRTC_SPL_UDIV(num, den); // Q11
- } else {
- besselTmpFX32 -= num; // Q11
- }
-
- // inst->logLrtTimeAvg[i] += LRT_TAVG * (besselTmp - log(snrLocPrior) - inst->logLrtTimeAvg[i]);
- // Here, LRT_TAVG = 0.5
- zeros = WebRtcSpl_NormU32(priorLocSnr[i]);
- frac32 = (int32_t)(((priorLocSnr[i] << zeros) & 0x7FFFFFFF) >> 19);
- tmp32 = WEBRTC_SPL_MUL(frac32, frac32);
- tmp32 = WEBRTC_SPL_RSHIFT_W32(WEBRTC_SPL_MUL(tmp32, -43), 19);
- tmp32 += WEBRTC_SPL_MUL_16_16_RSFT((int16_t)frac32, 5412, 12);
- frac32 = tmp32 + 37;
- // tmp32 = log2(priorLocSnr[i])
- tmp32 = (int32_t)(((31 - zeros) << 12) + frac32) - (11 << 12); // Q12
- logTmp = WEBRTC_SPL_RSHIFT_W32(WEBRTC_SPL_MUL_32_16(tmp32, 178), 8); // log2(priorLocSnr[i])*log(2)
- tmp32no1 = WEBRTC_SPL_RSHIFT_W32(logTmp + inst->logLrtTimeAvgW32[i], 1); // Q12
- inst->logLrtTimeAvgW32[i] += (besselTmpFX32 - tmp32no1); // Q12
-
- logLrtTimeAvgKsumFX += inst->logLrtTimeAvgW32[i]; // Q12
- }
- inst->featureLogLrt = WEBRTC_SPL_RSHIFT_W32(logLrtTimeAvgKsumFX * 5, inst->stages + 10); // 5 = BIN_SIZE_LRT / 2
- // done with computation of LR factor
-
- //
- //compute the indicator functions
- //
-
- // average LRT feature
- // FLOAT code
- // indicator0 = 0.5 * (tanh(widthPrior * (logLrtTimeAvgKsum - threshPrior0)) + 1.0);
- tmpIndFX = 16384; // Q14(1.0)
- tmp32no1 = logLrtTimeAvgKsumFX - inst->thresholdLogLrt; // Q12
- nShifts = 7 - inst->stages; // WIDTH_PR_MAP_SHIFT - inst->stages + 5;
- //use larger width in tanh map for pause regions
- if (tmp32no1 < 0) {
- tmpIndFX = 0;
- tmp32no1 = -tmp32no1;
- //widthPrior = widthPrior * 2.0;
- nShifts++;
- }
- tmp32no1 = WEBRTC_SPL_SHIFT_W32(tmp32no1, nShifts); // Q14
- // compute indicator function: sigmoid map
- tableIndex = (int16_t)WEBRTC_SPL_RSHIFT_W32(tmp32no1, 14);
- if ((tableIndex < 16) && (tableIndex >= 0)) {
- tmp16no2 = kIndicatorTable[tableIndex];
- tmp16no1 = kIndicatorTable[tableIndex + 1] - kIndicatorTable[tableIndex];
- frac = (int16_t)(tmp32no1 & 0x00003fff); // Q14
- tmp16no2 += (int16_t)WEBRTC_SPL_MUL_16_16_RSFT(tmp16no1, frac, 14);
- if (tmpIndFX == 0) {
- tmpIndFX = 8192 - tmp16no2; // Q14
- } else {
- tmpIndFX = 8192 + tmp16no2; // Q14
- }
- }
- indPriorFX = WEBRTC_SPL_MUL_16_16(inst->weightLogLrt, tmpIndFX); // 6*Q14
-
- //spectral flatness feature
- if (inst->weightSpecFlat) {
- tmpU32no1 = WEBRTC_SPL_UMUL(inst->featureSpecFlat, 400); // Q10
- tmpIndFX = 16384; // Q14(1.0)
- //use larger width in tanh map for pause regions
- tmpU32no2 = inst->thresholdSpecFlat - tmpU32no1; //Q10
- nShifts = 4;
- if (inst->thresholdSpecFlat < tmpU32no1) {
- tmpIndFX = 0;
- tmpU32no2 = tmpU32no1 - inst->thresholdSpecFlat;
- //widthPrior = widthPrior * 2.0;
- nShifts++;
- }
- tmp32no1 = (int32_t)WebRtcSpl_DivU32U16(WEBRTC_SPL_LSHIFT_U32(tmpU32no2,
- nShifts), 25); //Q14
- tmpU32no1 = WebRtcSpl_DivU32U16(WEBRTC_SPL_LSHIFT_U32(tmpU32no2, nShifts), 25); //Q14
- // compute indicator function: sigmoid map
- // FLOAT code
- // indicator1 = 0.5 * (tanh(sgnMap * widthPrior * (threshPrior1 - tmpFloat1)) + 1.0);
- tableIndex = (int16_t)WEBRTC_SPL_RSHIFT_U32(tmpU32no1, 14);
- if (tableIndex < 16) {
- tmp16no2 = kIndicatorTable[tableIndex];
- tmp16no1 = kIndicatorTable[tableIndex + 1] - kIndicatorTable[tableIndex];
- frac = (int16_t)(tmpU32no1 & 0x00003fff); // Q14
- tmp16no2 += (int16_t)WEBRTC_SPL_MUL_16_16_RSFT(tmp16no1, frac, 14);
- if (tmpIndFX) {
- tmpIndFX = 8192 + tmp16no2; // Q14
- } else {
- tmpIndFX = 8192 - tmp16no2; // Q14
- }
- }
- indPriorFX += WEBRTC_SPL_MUL_16_16(inst->weightSpecFlat, tmpIndFX); // 6*Q14
- }
-
- //for template spectral-difference
- if (inst->weightSpecDiff) {
- tmpU32no1 = 0;
- if (inst->featureSpecDiff) {
- normTmp = WEBRTC_SPL_MIN(20 - inst->stages,
- WebRtcSpl_NormU32(inst->featureSpecDiff));
- tmpU32no1 = WEBRTC_SPL_LSHIFT_U32(inst->featureSpecDiff, normTmp); // Q(normTmp-2*stages)
- tmpU32no2 = WEBRTC_SPL_RSHIFT_U32(inst->timeAvgMagnEnergy, 20 - inst->stages
- - normTmp);
- if (tmpU32no2 > 0) {
- // Q(20 - inst->stages)
- tmpU32no1 = WEBRTC_SPL_UDIV(tmpU32no1, tmpU32no2);
- } else {
- tmpU32no1 = (uint32_t)(0x7fffffff);
- }
- }
- tmpU32no3 = WEBRTC_SPL_UDIV(WEBRTC_SPL_LSHIFT_U32(inst->thresholdSpecDiff, 17), 25);
- tmpU32no2 = tmpU32no1 - tmpU32no3;
- nShifts = 1;
- tmpIndFX = 16384; // Q14(1.0)
- //use larger width in tanh map for pause regions
- if (tmpU32no2 & 0x80000000) {
- tmpIndFX = 0;
- tmpU32no2 = tmpU32no3 - tmpU32no1;
- //widthPrior = widthPrior * 2.0;
- nShifts--;
- }
- tmpU32no1 = WEBRTC_SPL_RSHIFT_U32(tmpU32no2, nShifts);
- // compute indicator function: sigmoid map
- /* FLOAT code
- indicator2 = 0.5 * (tanh(widthPrior * (tmpFloat1 - threshPrior2)) + 1.0);
- */
- tableIndex = (int16_t)WEBRTC_SPL_RSHIFT_U32(tmpU32no1, 14);
- if (tableIndex < 16) {
- tmp16no2 = kIndicatorTable[tableIndex];
- tmp16no1 = kIndicatorTable[tableIndex + 1] - kIndicatorTable[tableIndex];
- frac = (int16_t)(tmpU32no1 & 0x00003fff); // Q14
- tmp16no2 += (int16_t)WEBRTC_SPL_MUL_16_16_RSFT_WITH_ROUND(
- tmp16no1, frac, 14);
- if (tmpIndFX) {
- tmpIndFX = 8192 + tmp16no2;
- } else {
- tmpIndFX = 8192 - tmp16no2;
- }
- }
- indPriorFX += WEBRTC_SPL_MUL_16_16(inst->weightSpecDiff, tmpIndFX); // 6*Q14
- }
-
- //combine the indicator function with the feature weights
- // FLOAT code
- // indPrior = 1 - (weightIndPrior0 * indicator0 + weightIndPrior1 * indicator1 + weightIndPrior2 * indicator2);
- indPriorFX16 = WebRtcSpl_DivW32W16ResW16(98307 - indPriorFX, 6); // Q14
- // done with computing indicator function
-
- //compute the prior probability
- // FLOAT code
- // inst->priorNonSpeechProb += PRIOR_UPDATE * (indPriorNonSpeech - inst->priorNonSpeechProb);
- tmp16 = indPriorFX16 - inst->priorNonSpeechProb; // Q14
- inst->priorNonSpeechProb += (int16_t)WEBRTC_SPL_MUL_16_16_RSFT(
- PRIOR_UPDATE_Q14, tmp16, 14); // Q14
-
- //final speech probability: combine prior model with LR factor:
-
- memset(nonSpeechProbFinal, 0, sizeof(uint16_t) * inst->magnLen);
-
- if (inst->priorNonSpeechProb > 0) {
- for (i = 0; i < inst->magnLen; i++) {
- // FLOAT code
- // invLrt = exp(inst->logLrtTimeAvg[i]);
- // invLrt = inst->priorSpeechProb * invLrt;
- // nonSpeechProbFinal[i] = (1.0 - inst->priorSpeechProb) / (1.0 - inst->priorSpeechProb + invLrt);
- // invLrt = (1.0 - inst->priorNonSpeechProb) * invLrt;
- // nonSpeechProbFinal[i] = inst->priorNonSpeechProb / (inst->priorNonSpeechProb + invLrt);
- if (inst->logLrtTimeAvgW32[i] < 65300) {
- tmp32no1 = WEBRTC_SPL_RSHIFT_W32(WEBRTC_SPL_MUL(inst->logLrtTimeAvgW32[i], 23637),
- 14); // Q12
- intPart = (int16_t)WEBRTC_SPL_RSHIFT_W32(tmp32no1, 12);
- if (intPart < -8) {
- intPart = -8;
- }
- frac = (int16_t)(tmp32no1 & 0x00000fff); // Q12
-
- // Quadratic approximation of 2^frac
- tmp32no2 = WEBRTC_SPL_RSHIFT_W32(frac * frac * 44, 19); // Q12
- tmp32no2 += WEBRTC_SPL_MUL_16_16_RSFT(frac, 84, 7); // Q12
- invLrtFX = WEBRTC_SPL_LSHIFT_W32(1, 8 + intPart)
- + WEBRTC_SPL_SHIFT_W32(tmp32no2, intPart - 4); // Q8
-
- normTmp = WebRtcSpl_NormW32(invLrtFX);
- normTmp2 = WebRtcSpl_NormW16((16384 - inst->priorNonSpeechProb));
- if (normTmp + normTmp2 >= 7) {
- if (normTmp + normTmp2 < 15) {
- invLrtFX = WEBRTC_SPL_RSHIFT_W32(invLrtFX, 15 - normTmp2 - normTmp);
- // Q(normTmp+normTmp2-7)
- tmp32no1 = WEBRTC_SPL_MUL_32_16(invLrtFX, (16384 - inst->priorNonSpeechProb));
- // Q(normTmp+normTmp2+7)
- invLrtFX = WEBRTC_SPL_SHIFT_W32(tmp32no1, 7 - normTmp - normTmp2); // Q14
- } else {
- tmp32no1 = WEBRTC_SPL_MUL_32_16(invLrtFX, (16384 - inst->priorNonSpeechProb)); // Q22
- invLrtFX = WEBRTC_SPL_RSHIFT_W32(tmp32no1, 8); // Q14
- }
-
- tmp32no1 = WEBRTC_SPL_LSHIFT_W32((int32_t)inst->priorNonSpeechProb, 8); // Q22
-
- nonSpeechProbFinal[i] = (uint16_t)WEBRTC_SPL_DIV(tmp32no1,
- (int32_t)inst->priorNonSpeechProb + invLrtFX); // Q8
- }
- }
- }
- }
-}
-
// Transform input (speechFrame) to frequency domain magnitude (magnU16)
void WebRtcNsx_DataAnalysis(NsxInst_t* inst, short* speechFrame, uint16_t* magnU16) {
@@ -1461,7 +1244,7 @@
right_shifts_in_magnU16 = WEBRTC_SPL_MAX(right_shifts_in_magnU16, 0);
// create realImag as winData interleaved with zeros (= imag. part), normalize it
- NormalizeRealBuffer(inst, winData, realImag);
+ WebRtcNsx_NormalizeRealBuffer(inst, winData, realImag);
// FFT output will be in winData[].
WebRtcSpl_RealForwardFFT(inst->real_fft, realImag, winData);
@@ -1693,7 +1476,7 @@
// Inverse FFT output will be in rfft_out[].
outCIFFT = WebRtcSpl_RealInverseFFT(inst->real_fft, realImag, rfft_out);
- Denormalize(inst, rfft_out, outCIFFT);
+ WebRtcNsx_Denormalize(inst, rfft_out, outCIFFT);
//scale factor: only do it after END_STARTUP_LONG time
gainFactor = 8192; // 8192 = Q13(1.0)
diff --git a/webrtc/modules/audio_processing/ns/nsx_core.h b/webrtc/modules/audio_processing/ns/nsx_core.h
index 1ad369f..5b3c5e7 100644
--- a/webrtc/modules/audio_processing/ns/nsx_core.h
+++ b/webrtc/modules/audio_processing/ns/nsx_core.h
@@ -201,6 +201,23 @@
int16_t* new_speech);
extern AnalysisUpdate WebRtcNsx_AnalysisUpdate;
+// Denormalize the real-valued signal |in|, the output from inverse FFT.
+typedef void (*Denormalize) (NsxInst_t* inst, int16_t* in, int factor);
+extern Denormalize WebRtcNsx_Denormalize;
+
+// Normalize the real-valued signal |in|, the input to forward FFT.
+typedef void (*NormalizeRealBuffer) (NsxInst_t* inst,
+ const int16_t* in,
+ int16_t* out);
+extern NormalizeRealBuffer WebRtcNsx_NormalizeRealBuffer;
+
+// Compute speech/noise probability.
+// Intended to be private.
+void WebRtcNsx_SpeechNoiseProb(NsxInst_t* inst,
+ uint16_t* nonSpeechProbFinal,
+ uint32_t* priorLocSnr,
+ uint32_t* postLocSnr);
+
#if (defined WEBRTC_DETECT_ARM_NEON) || defined (WEBRTC_ARCH_ARM_NEON)
// For the above function pointers, functions for generic platforms are declared
// and defined as static in file nsx_core.c, while those for ARM Neon platforms
@@ -218,6 +235,26 @@
void WebRtcNsx_PrepareSpectrumNeon(NsxInst_t* inst, int16_t* freq_buff);
#endif
+#if defined(MIPS32_LE)
+// For the above function pointers, functions for generic platforms are declared
+// and defined as static in file nsx_core.c, while those for MIPS platforms
+// are declared below and defined in file nsx_core_mips.c.
+void WebRtcNsx_SynthesisUpdate_mips(NsxInst_t* inst,
+ int16_t* out_frame,
+ int16_t gain_factor);
+void WebRtcNsx_AnalysisUpdate_mips(NsxInst_t* inst,
+ int16_t* out,
+ int16_t* new_speech);
+void WebRtcNsx_PrepareSpectrum_mips(NsxInst_t* inst, int16_t* freq_buff);
+void WebRtcNsx_NormalizeRealBuffer_mips(NsxInst_t* inst,
+ const int16_t* in,
+ int16_t* out);
+#if defined(MIPS_DSP_R1_LE)
+void WebRtcNsx_Denormalize_mips(NsxInst_t* inst, int16_t* in, int factor);
+#endif
+
+#endif
+
#ifdef __cplusplus
}
#endif
diff --git a/webrtc/modules/audio_processing/ns/nsx_core_c.c b/webrtc/modules/audio_processing/ns/nsx_core_c.c
new file mode 100644
index 0000000..452b96e
--- /dev/null
+++ b/webrtc/modules/audio_processing/ns/nsx_core_c.c
@@ -0,0 +1,273 @@
+/*
+ * Copyright (c) 2013 The WebRTC project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include "webrtc/modules/audio_processing/ns/include/noise_suppression_x.h"
+#include "webrtc/modules/audio_processing/ns/nsx_core.h"
+
+static const int16_t kIndicatorTable[17] = {
+ 0, 2017, 3809, 5227, 6258, 6963, 7424, 7718,
+ 7901, 8014, 8084, 8126, 8152, 8168, 8177, 8183, 8187
+};
+
+// Compute speech/noise probability
+// speech/noise probability is returned in: probSpeechFinal
+//snrLocPrior is the prior SNR for each frequency (in Q11)
+//snrLocPost is the post SNR for each frequency (in Q11)
+void WebRtcNsx_SpeechNoiseProb(NsxInst_t* inst,
+ uint16_t* nonSpeechProbFinal,
+ uint32_t* priorLocSnr,
+ uint32_t* postLocSnr) {
+
+ uint32_t zeros, num, den, tmpU32no1, tmpU32no2, tmpU32no3;
+ int32_t invLrtFX, indPriorFX, tmp32, tmp32no1, tmp32no2, besselTmpFX32;
+ int32_t frac32, logTmp;
+ int32_t logLrtTimeAvgKsumFX;
+ int16_t indPriorFX16;
+ int16_t tmp16, tmp16no1, tmp16no2, tmpIndFX, tableIndex, frac, intPart;
+ int i, normTmp, normTmp2, nShifts;
+
+ // compute feature based on average LR factor
+ // this is the average over all frequencies of the smooth log LRT
+ logLrtTimeAvgKsumFX = 0;
+ for (i = 0; i < inst->magnLen; i++) {
+ besselTmpFX32 = (int32_t)postLocSnr[i]; // Q11
+ normTmp = WebRtcSpl_NormU32(postLocSnr[i]);
+ num = WEBRTC_SPL_LSHIFT_U32(postLocSnr[i], normTmp); // Q(11+normTmp)
+ if (normTmp > 10) {
+ den = WEBRTC_SPL_LSHIFT_U32(priorLocSnr[i], normTmp - 11); // Q(normTmp)
+ } else {
+ den = WEBRTC_SPL_RSHIFT_U32(priorLocSnr[i], 11 - normTmp); // Q(normTmp)
+ }
+ if (den > 0) {
+ besselTmpFX32 -= WEBRTC_SPL_UDIV(num, den); // Q11
+ } else {
+ besselTmpFX32 -= num; // Q11
+ }
+
+ // inst->logLrtTimeAvg[i] += LRT_TAVG * (besselTmp - log(snrLocPrior)
+ // - inst->logLrtTimeAvg[i]);
+ // Here, LRT_TAVG = 0.5
+ zeros = WebRtcSpl_NormU32(priorLocSnr[i]);
+ frac32 = (int32_t)(((priorLocSnr[i] << zeros) & 0x7FFFFFFF) >> 19);
+ tmp32 = WEBRTC_SPL_MUL(frac32, frac32);
+ tmp32 = WEBRTC_SPL_RSHIFT_W32(WEBRTC_SPL_MUL(tmp32, -43), 19);
+ tmp32 += WEBRTC_SPL_MUL_16_16_RSFT((int16_t)frac32, 5412, 12);
+ frac32 = tmp32 + 37;
+ // tmp32 = log2(priorLocSnr[i])
+ tmp32 = (int32_t)(((31 - zeros) << 12) + frac32) - (11 << 12); // Q12
+ logTmp = WEBRTC_SPL_RSHIFT_W32(WEBRTC_SPL_MUL_32_16(tmp32, 178), 8);
+ // log2(priorLocSnr[i])*log(2)
+ tmp32no1 = WEBRTC_SPL_RSHIFT_W32(logTmp + inst->logLrtTimeAvgW32[i], 1);
+ // Q12
+ inst->logLrtTimeAvgW32[i] += (besselTmpFX32 - tmp32no1); // Q12
+
+ logLrtTimeAvgKsumFX += inst->logLrtTimeAvgW32[i]; // Q12
+ }
+ inst->featureLogLrt = WEBRTC_SPL_RSHIFT_W32(logLrtTimeAvgKsumFX * 5,
+ inst->stages + 10);
+ // 5 = BIN_SIZE_LRT / 2
+ // done with computation of LR factor
+
+ //
+ //compute the indicator functions
+ //
+
+ // average LRT feature
+ // FLOAT code
+ // indicator0 = 0.5 * (tanh(widthPrior *
+ // (logLrtTimeAvgKsum - threshPrior0)) + 1.0);
+ tmpIndFX = 16384; // Q14(1.0)
+ tmp32no1 = logLrtTimeAvgKsumFX - inst->thresholdLogLrt; // Q12
+ nShifts = 7 - inst->stages; // WIDTH_PR_MAP_SHIFT - inst->stages + 5;
+ //use larger width in tanh map for pause regions
+ if (tmp32no1 < 0) {
+ tmpIndFX = 0;
+ tmp32no1 = -tmp32no1;
+ //widthPrior = widthPrior * 2.0;
+ nShifts++;
+ }
+ tmp32no1 = WEBRTC_SPL_SHIFT_W32(tmp32no1, nShifts); // Q14
+ // compute indicator function: sigmoid map
+ tableIndex = (int16_t)WEBRTC_SPL_RSHIFT_W32(tmp32no1, 14);
+ if ((tableIndex < 16) && (tableIndex >= 0)) {
+ tmp16no2 = kIndicatorTable[tableIndex];
+ tmp16no1 = kIndicatorTable[tableIndex + 1] - kIndicatorTable[tableIndex];
+ frac = (int16_t)(tmp32no1 & 0x00003fff); // Q14
+ tmp16no2 += (int16_t)WEBRTC_SPL_MUL_16_16_RSFT(tmp16no1, frac, 14);
+ if (tmpIndFX == 0) {
+ tmpIndFX = 8192 - tmp16no2; // Q14
+ } else {
+ tmpIndFX = 8192 + tmp16no2; // Q14
+ }
+ }
+ indPriorFX = WEBRTC_SPL_MUL_16_16(inst->weightLogLrt, tmpIndFX); // 6*Q14
+
+ //spectral flatness feature
+ if (inst->weightSpecFlat) {
+ tmpU32no1 = WEBRTC_SPL_UMUL(inst->featureSpecFlat, 400); // Q10
+ tmpIndFX = 16384; // Q14(1.0)
+ //use larger width in tanh map for pause regions
+ tmpU32no2 = inst->thresholdSpecFlat - tmpU32no1; //Q10
+ nShifts = 4;
+ if (inst->thresholdSpecFlat < tmpU32no1) {
+ tmpIndFX = 0;
+ tmpU32no2 = tmpU32no1 - inst->thresholdSpecFlat;
+ //widthPrior = widthPrior * 2.0;
+ nShifts++;
+ }
+ tmp32no1 = (int32_t)WebRtcSpl_DivU32U16(WEBRTC_SPL_LSHIFT_U32(tmpU32no2,
+ nShifts), 25);
+ //Q14
+ tmpU32no1 = WebRtcSpl_DivU32U16(WEBRTC_SPL_LSHIFT_U32(tmpU32no2, nShifts),
+ 25); //Q14
+ // compute indicator function: sigmoid map
+ // FLOAT code
+ // indicator1 = 0.5 * (tanh(sgnMap * widthPrior *
+ // (threshPrior1 - tmpFloat1)) + 1.0);
+ tableIndex = (int16_t)WEBRTC_SPL_RSHIFT_U32(tmpU32no1, 14);
+ if (tableIndex < 16) {
+ tmp16no2 = kIndicatorTable[tableIndex];
+ tmp16no1 = kIndicatorTable[tableIndex + 1] - kIndicatorTable[tableIndex];
+ frac = (int16_t)(tmpU32no1 & 0x00003fff); // Q14
+ tmp16no2 += (int16_t)WEBRTC_SPL_MUL_16_16_RSFT(tmp16no1, frac, 14);
+ if (tmpIndFX) {
+ tmpIndFX = 8192 + tmp16no2; // Q14
+ } else {
+ tmpIndFX = 8192 - tmp16no2; // Q14
+ }
+ }
+ indPriorFX += WEBRTC_SPL_MUL_16_16(inst->weightSpecFlat, tmpIndFX); // 6*Q14
+ }
+
+ //for template spectral-difference
+ if (inst->weightSpecDiff) {
+ tmpU32no1 = 0;
+ if (inst->featureSpecDiff) {
+ normTmp = WEBRTC_SPL_MIN(20 - inst->stages,
+ WebRtcSpl_NormU32(inst->featureSpecDiff));
+ tmpU32no1 = WEBRTC_SPL_LSHIFT_U32(inst->featureSpecDiff, normTmp);
+ // Q(normTmp-2*stages)
+ tmpU32no2 = WEBRTC_SPL_RSHIFT_U32(inst->timeAvgMagnEnergy,
+ 20 - inst->stages - normTmp);
+ if (tmpU32no2 > 0) {
+ // Q(20 - inst->stages)
+ tmpU32no1 = WEBRTC_SPL_UDIV(tmpU32no1, tmpU32no2);
+ } else {
+ tmpU32no1 = (uint32_t)(0x7fffffff);
+ }
+ }
+ tmpU32no3 = WEBRTC_SPL_UDIV(WEBRTC_SPL_LSHIFT_U32(inst->thresholdSpecDiff,
+ 17),
+ 25);
+ tmpU32no2 = tmpU32no1 - tmpU32no3;
+ nShifts = 1;
+ tmpIndFX = 16384; // Q14(1.0)
+ //use larger width in tanh map for pause regions
+ if (tmpU32no2 & 0x80000000) {
+ tmpIndFX = 0;
+ tmpU32no2 = tmpU32no3 - tmpU32no1;
+ //widthPrior = widthPrior * 2.0;
+ nShifts--;
+ }
+ tmpU32no1 = WEBRTC_SPL_RSHIFT_U32(tmpU32no2, nShifts);
+ // compute indicator function: sigmoid map
+ /* FLOAT code
+ indicator2 = 0.5 * (tanh(widthPrior * (tmpFloat1 - threshPrior2)) + 1.0);
+ */
+ tableIndex = (int16_t)WEBRTC_SPL_RSHIFT_U32(tmpU32no1, 14);
+ if (tableIndex < 16) {
+ tmp16no2 = kIndicatorTable[tableIndex];
+ tmp16no1 = kIndicatorTable[tableIndex + 1] - kIndicatorTable[tableIndex];
+ frac = (int16_t)(tmpU32no1 & 0x00003fff); // Q14
+ tmp16no2 += (int16_t)WEBRTC_SPL_MUL_16_16_RSFT_WITH_ROUND(
+ tmp16no1, frac, 14);
+ if (tmpIndFX) {
+ tmpIndFX = 8192 + tmp16no2;
+ } else {
+ tmpIndFX = 8192 - tmp16no2;
+ }
+ }
+ indPriorFX += WEBRTC_SPL_MUL_16_16(inst->weightSpecDiff, tmpIndFX); // 6*Q14
+ }
+
+ //combine the indicator function with the feature weights
+ // FLOAT code
+ // indPrior = 1 - (weightIndPrior0 * indicator0 + weightIndPrior1 *
+ // indicator1 + weightIndPrior2 * indicator2);
+ indPriorFX16 = WebRtcSpl_DivW32W16ResW16(98307 - indPriorFX, 6); // Q14
+ // done with computing indicator function
+
+ //compute the prior probability
+ // FLOAT code
+ // inst->priorNonSpeechProb += PRIOR_UPDATE *
+ // (indPriorNonSpeech - inst->priorNonSpeechProb);
+ tmp16 = indPriorFX16 - inst->priorNonSpeechProb; // Q14
+ inst->priorNonSpeechProb += (int16_t)WEBRTC_SPL_MUL_16_16_RSFT(
+ PRIOR_UPDATE_Q14, tmp16, 14); // Q14
+
+ //final speech probability: combine prior model with LR factor:
+
+ memset(nonSpeechProbFinal, 0, sizeof(uint16_t) * inst->magnLen);
+
+ if (inst->priorNonSpeechProb > 0) {
+ for (i = 0; i < inst->magnLen; i++) {
+ // FLOAT code
+ // invLrt = exp(inst->logLrtTimeAvg[i]);
+ // invLrt = inst->priorSpeechProb * invLrt;
+ // nonSpeechProbFinal[i] = (1.0 - inst->priorSpeechProb) /
+ // (1.0 - inst->priorSpeechProb + invLrt);
+ // invLrt = (1.0 - inst->priorNonSpeechProb) * invLrt;
+ // nonSpeechProbFinal[i] = inst->priorNonSpeechProb /
+ // (inst->priorNonSpeechProb + invLrt);
+ if (inst->logLrtTimeAvgW32[i] < 65300) {
+ tmp32no1 = WEBRTC_SPL_RSHIFT_W32(WEBRTC_SPL_MUL(
+ inst->logLrtTimeAvgW32[i], 23637),
+ 14); // Q12
+ intPart = (int16_t)WEBRTC_SPL_RSHIFT_W32(tmp32no1, 12);
+ if (intPart < -8) {
+ intPart = -8;
+ }
+ frac = (int16_t)(tmp32no1 & 0x00000fff); // Q12
+
+ // Quadratic approximation of 2^frac
+ tmp32no2 = WEBRTC_SPL_RSHIFT_W32(frac * frac * 44, 19); // Q12
+ tmp32no2 += WEBRTC_SPL_MUL_16_16_RSFT(frac, 84, 7); // Q12
+ invLrtFX = WEBRTC_SPL_LSHIFT_W32(1, 8 + intPart)
+ + WEBRTC_SPL_SHIFT_W32(tmp32no2, intPart - 4); // Q8
+
+ normTmp = WebRtcSpl_NormW32(invLrtFX);
+ normTmp2 = WebRtcSpl_NormW16((16384 - inst->priorNonSpeechProb));
+ if (normTmp + normTmp2 >= 7) {
+ if (normTmp + normTmp2 < 15) {
+ invLrtFX = WEBRTC_SPL_RSHIFT_W32(invLrtFX, 15 - normTmp2 - normTmp);
+ // Q(normTmp+normTmp2-7)
+ tmp32no1 = WEBRTC_SPL_MUL_32_16(invLrtFX,
+ (16384 - inst->priorNonSpeechProb));
+ // Q(normTmp+normTmp2+7)
+ invLrtFX = WEBRTC_SPL_SHIFT_W32(tmp32no1, 7 - normTmp - normTmp2);
+ // Q14
+ } else {
+ tmp32no1 = WEBRTC_SPL_MUL_32_16(invLrtFX,
+ (16384 - inst->priorNonSpeechProb));
+ // Q22
+ invLrtFX = WEBRTC_SPL_RSHIFT_W32(tmp32no1, 8); // Q14
+ }
+
+ tmp32no1 = WEBRTC_SPL_LSHIFT_W32((int32_t)inst->priorNonSpeechProb,
+ 8); // Q22
+
+ nonSpeechProbFinal[i] = (uint16_t)WEBRTC_SPL_DIV(tmp32no1,
+ (int32_t)inst->priorNonSpeechProb + invLrtFX); // Q8
+ }
+ }
+ }
+ }
+}
+
diff --git a/webrtc/modules/audio_processing/ns/nsx_core_mips.c b/webrtc/modules/audio_processing/ns/nsx_core_mips.c
new file mode 100644
index 0000000..ccb0c37
--- /dev/null
+++ b/webrtc/modules/audio_processing/ns/nsx_core_mips.c
@@ -0,0 +1,1008 @@
+/*
+ * Copyright (c) 2013 The WebRTC project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include "webrtc/modules/audio_processing/ns/include/noise_suppression_x.h"
+#include "webrtc/modules/audio_processing/ns/nsx_core.h"
+
+static const int16_t kIndicatorTable[17] = {
+ 0, 2017, 3809, 5227, 6258, 6963, 7424, 7718,
+ 7901, 8014, 8084, 8126, 8152, 8168, 8177, 8183, 8187
+};
+
+// Compute speech/noise probability
+// speech/noise probability is returned in: probSpeechFinal
+//snrLocPrior is the prior SNR for each frequency (in Q11)
+//snrLocPost is the post SNR for each frequency (in Q11)
+void WebRtcNsx_SpeechNoiseProb(NsxInst_t* inst,
+ uint16_t* nonSpeechProbFinal,
+ uint32_t* priorLocSnr,
+ uint32_t* postLocSnr) {
+
+ uint32_t tmpU32no1, tmpU32no2, tmpU32no3;
+ int32_t indPriorFX, tmp32no1;
+ int32_t logLrtTimeAvgKsumFX;
+ int16_t indPriorFX16;
+ int16_t tmp16, tmp16no1, tmp16no2, tmpIndFX, tableIndex, frac;
+ int i, normTmp, nShifts;
+
+ int32_t r0, r1, r2, r3, r4, r5, r6, r7, r8, r9;
+ int32_t const_max = 0x7fffffff;
+ int32_t const_neg43 = -43;
+ int32_t const_5412 = 5412;
+ int32_t const_11rsh12 = (11 << 12);
+ int32_t const_178 = 178;
+
+
+ // compute feature based on average LR factor
+ // this is the average over all frequencies of the smooth log LRT
+ logLrtTimeAvgKsumFX = 0;
+ for (i = 0; i < inst->magnLen; i++) {
+ r0 = postLocSnr[i]; // Q11
+ r1 = priorLocSnr[i];
+ r2 = inst->logLrtTimeAvgW32[i];
+
+ __asm __volatile(
+ ".set push \n\t"
+ ".set noreorder \n\t"
+ "clz %[r3], %[r0] \n\t"
+ "clz %[r5], %[r1] \n\t"
+ "slti %[r4], %[r3], 32 \n\t"
+ "slti %[r6], %[r5], 32 \n\t"
+ "movz %[r3], $0, %[r4] \n\t"
+ "movz %[r5], $0, %[r6] \n\t"
+ "slti %[r4], %[r3], 11 \n\t"
+ "addiu %[r6], %[r3], -11 \n\t"
+ "neg %[r7], %[r6] \n\t"
+ "sllv %[r6], %[r1], %[r6] \n\t"
+ "srav %[r7], %[r1], %[r7] \n\t"
+ "movn %[r6], %[r7], %[r4] \n\t"
+ "sllv %[r1], %[r1], %[r5] \n\t"
+ "and %[r1], %[r1], %[const_max] \n\t"
+ "sra %[r1], %[r1], 19 \n\t"
+ "mul %[r7], %[r1], %[r1] \n\t"
+ "sllv %[r3], %[r0], %[r3] \n\t"
+ "divu %[r8], %[r3], %[r6] \n\t"
+ "slti %[r6], %[r6], 1 \n\t"
+ "mul %[r7], %[r7], %[const_neg43] \n\t"
+ "sra %[r7], %[r7], 19 \n\t"
+ "movz %[r3], %[r8], %[r6] \n\t"
+ "subu %[r0], %[r0], %[r3] \n\t"
+ "mul %[r1], %[r1], %[const_5412] \n\t"
+ "sra %[r1], %[r1], 12 \n\t"
+ "addu %[r7], %[r7], %[r1] \n\t"
+ "addiu %[r1], %[r7], 37 \n\t"
+ "addiu %[r5], %[r5], -31 \n\t"
+ "neg %[r5], %[r5] \n\t"
+ "sll %[r5], %[r5], 12 \n\t"
+ "addu %[r5], %[r5], %[r1] \n\t"
+ "subu %[r7], %[r5], %[const_11rsh12] \n\t"
+ "mul %[r7], %[r7], %[const_178] \n\t"
+ "sra %[r7], %[r7], 8 \n\t"
+ "addu %[r7], %[r7], %[r2] \n\t"
+ "sra %[r7], %[r7], 1 \n\t"
+ "subu %[r2], %[r2], %[r7] \n\t"
+ "addu %[r2], %[r2], %[r0] \n\t"
+ ".set pop \n\t"
+ : [r0] "+r" (r0), [r1] "+r" (r1), [r2] "+r" (r2),
+ [r3] "=&r" (r3), [r4] "=&r" (r4), [r5] "=&r" (r5),
+ [r6] "=&r" (r6), [r7] "=&r" (r7), [r8] "=&r" (r8)
+ : [const_max] "r" (const_max), [const_neg43] "r" (const_neg43),
+ [const_5412] "r" (const_5412), [const_11rsh12] "r" (const_11rsh12),
+ [const_178] "r" (const_178)
+ : "hi", "lo"
+ );
+ inst->logLrtTimeAvgW32[i] = r2;
+ logLrtTimeAvgKsumFX += r2;
+ }
+
+ inst->featureLogLrt = WEBRTC_SPL_RSHIFT_W32(logLrtTimeAvgKsumFX * 5,
+ inst->stages + 10);
+ // 5 = BIN_SIZE_LRT / 2
+ // done with computation of LR factor
+
+ //
+ // compute the indicator functions
+ //
+
+ // average LRT feature
+ // FLOAT code
+ // indicator0 = 0.5 * (tanh(widthPrior *
+ // (logLrtTimeAvgKsum - threshPrior0)) + 1.0);
+ tmpIndFX = 16384; // Q14(1.0)
+ tmp32no1 = logLrtTimeAvgKsumFX - inst->thresholdLogLrt; // Q12
+ nShifts = 7 - inst->stages; // WIDTH_PR_MAP_SHIFT - inst->stages + 5;
+ //use larger width in tanh map for pause regions
+ if (tmp32no1 < 0) {
+ tmpIndFX = 0;
+ tmp32no1 = -tmp32no1;
+ //widthPrior = widthPrior * 2.0;
+ nShifts++;
+ }
+ tmp32no1 = WEBRTC_SPL_SHIFT_W32(tmp32no1, nShifts); // Q14
+ // compute indicator function: sigmoid map
+ tableIndex = (int16_t)WEBRTC_SPL_RSHIFT_W32(tmp32no1, 14);
+ if ((tableIndex < 16) && (tableIndex >= 0)) {
+ tmp16no2 = kIndicatorTable[tableIndex];
+ tmp16no1 = kIndicatorTable[tableIndex + 1] - kIndicatorTable[tableIndex];
+ frac = (int16_t)(tmp32no1 & 0x00003fff); // Q14
+ tmp16no2 += (int16_t)WEBRTC_SPL_MUL_16_16_RSFT(tmp16no1, frac, 14);
+ if (tmpIndFX == 0) {
+ tmpIndFX = 8192 - tmp16no2; // Q14
+ } else {
+ tmpIndFX = 8192 + tmp16no2; // Q14
+ }
+ }
+ indPriorFX = WEBRTC_SPL_MUL_16_16(inst->weightLogLrt, tmpIndFX); // 6*Q14
+
+ //spectral flatness feature
+ if (inst->weightSpecFlat) {
+ tmpU32no1 = WEBRTC_SPL_UMUL(inst->featureSpecFlat, 400); // Q10
+ tmpIndFX = 16384; // Q14(1.0)
+ //use larger width in tanh map for pause regions
+ tmpU32no2 = inst->thresholdSpecFlat - tmpU32no1; //Q10
+ nShifts = 4;
+ if (inst->thresholdSpecFlat < tmpU32no1) {
+ tmpIndFX = 0;
+ tmpU32no2 = tmpU32no1 - inst->thresholdSpecFlat;
+ //widthPrior = widthPrior * 2.0;
+ nShifts++;
+ }
+ tmp32no1 = (int32_t)WebRtcSpl_DivU32U16(WEBRTC_SPL_LSHIFT_U32(tmpU32no2,
+ nShifts), 25);
+ //Q14
+ tmpU32no1 = WebRtcSpl_DivU32U16(WEBRTC_SPL_LSHIFT_U32(tmpU32no2, nShifts),
+ 25); //Q14
+ // compute indicator function: sigmoid map
+ // FLOAT code
+ // indicator1 = 0.5 * (tanh(sgnMap * widthPrior *
+ // (threshPrior1 - tmpFloat1)) + 1.0);
+ tableIndex = (int16_t)WEBRTC_SPL_RSHIFT_U32(tmpU32no1, 14);
+ if (tableIndex < 16) {
+ tmp16no2 = kIndicatorTable[tableIndex];
+ tmp16no1 = kIndicatorTable[tableIndex + 1] - kIndicatorTable[tableIndex];
+ frac = (int16_t)(tmpU32no1 & 0x00003fff); // Q14
+ tmp16no2 += (int16_t)WEBRTC_SPL_MUL_16_16_RSFT(tmp16no1, frac, 14);
+ if (tmpIndFX) {
+ tmpIndFX = 8192 + tmp16no2; // Q14
+ } else {
+ tmpIndFX = 8192 - tmp16no2; // Q14
+ }
+ }
+ indPriorFX += WEBRTC_SPL_MUL_16_16(inst->weightSpecFlat, tmpIndFX); // 6*Q14
+ }
+
+ //for template spectral-difference
+ if (inst->weightSpecDiff) {
+ tmpU32no1 = 0;
+ if (inst->featureSpecDiff) {
+ normTmp = WEBRTC_SPL_MIN(20 - inst->stages,
+ WebRtcSpl_NormU32(inst->featureSpecDiff));
+ tmpU32no1 = WEBRTC_SPL_LSHIFT_U32(inst->featureSpecDiff, normTmp);
+ // Q(normTmp-2*stages)
+ tmpU32no2 = WEBRTC_SPL_RSHIFT_U32(inst->timeAvgMagnEnergy,
+ 20 - inst->stages - normTmp);
+ if (tmpU32no2 > 0) {
+ // Q(20 - inst->stages)
+ tmpU32no1 = WEBRTC_SPL_UDIV(tmpU32no1, tmpU32no2);
+ } else {
+ tmpU32no1 = (uint32_t)(0x7fffffff);
+ }
+ }
+ tmpU32no3 = WEBRTC_SPL_UDIV(WEBRTC_SPL_LSHIFT_U32(inst->thresholdSpecDiff,
+ 17),
+ 25);
+ tmpU32no2 = tmpU32no1 - tmpU32no3;
+ nShifts = 1;
+ tmpIndFX = 16384; // Q14(1.0)
+ //use larger width in tanh map for pause regions
+ if (tmpU32no2 & 0x80000000) {
+ tmpIndFX = 0;
+ tmpU32no2 = tmpU32no3 - tmpU32no1;
+ //widthPrior = widthPrior * 2.0;
+ nShifts--;
+ }
+ tmpU32no1 = WEBRTC_SPL_RSHIFT_U32(tmpU32no2, nShifts);
+ // compute indicator function: sigmoid map
+ /* FLOAT code
+ indicator2 = 0.5 * (tanh(widthPrior * (tmpFloat1 - threshPrior2)) + 1.0);
+ */
+ tableIndex = (int16_t)WEBRTC_SPL_RSHIFT_U32(tmpU32no1, 14);
+ if (tableIndex < 16) {
+ tmp16no2 = kIndicatorTable[tableIndex];
+ tmp16no1 = kIndicatorTable[tableIndex + 1] - kIndicatorTable[tableIndex];
+ frac = (int16_t)(tmpU32no1 & 0x00003fff); // Q14
+ tmp16no2 += (int16_t)WEBRTC_SPL_MUL_16_16_RSFT_WITH_ROUND(
+ tmp16no1, frac, 14);
+ if (tmpIndFX) {
+ tmpIndFX = 8192 + tmp16no2;
+ } else {
+ tmpIndFX = 8192 - tmp16no2;
+ }
+ }
+ indPriorFX += WEBRTC_SPL_MUL_16_16(inst->weightSpecDiff, tmpIndFX); // 6*Q14
+ }
+
+ //combine the indicator function with the feature weights
+ // FLOAT code
+ // indPrior = 1 - (weightIndPrior0 * indicator0 + weightIndPrior1 *
+ // indicator1 + weightIndPrior2 * indicator2);
+ indPriorFX16 = WebRtcSpl_DivW32W16ResW16(98307 - indPriorFX, 6); // Q14
+ // done with computing indicator function
+
+ //compute the prior probability
+ // FLOAT code
+ // inst->priorNonSpeechProb += PRIOR_UPDATE *
+ // (indPriorNonSpeech - inst->priorNonSpeechProb);
+ tmp16 = indPriorFX16 - inst->priorNonSpeechProb; // Q14
+ inst->priorNonSpeechProb += (int16_t)WEBRTC_SPL_MUL_16_16_RSFT(
+ PRIOR_UPDATE_Q14, tmp16, 14); // Q14
+
+ //final speech probability: combine prior model with LR factor:
+
+ memset(nonSpeechProbFinal, 0, sizeof(uint16_t) * inst->magnLen);
+
+ if (inst->priorNonSpeechProb > 0) {
+ r0 = inst->priorNonSpeechProb;
+ r1 = 16384 - r0;
+ int32_t const_23637 = 23637;
+ int32_t const_44 = 44;
+ int32_t const_84 = 84;
+ int32_t const_1 = 1;
+ int32_t const_neg8 = -8;
+ for (i = 0; i < inst->magnLen; i++) {
+ r2 = inst->logLrtTimeAvgW32[i];
+ if (r2 < 65300) {
+ __asm __volatile(
+ ".set push \n\t"
+ ".set noreorder \n\t"
+ "mul %[r2], %[r2], %[const_23637] \n\t"
+ "sll %[r6], %[r1], 16 \n\t"
+ "clz %[r7], %[r6] \n\t"
+ "clo %[r8], %[r6] \n\t"
+ "slt %[r9], %[r6], $0 \n\t"
+ "movn %[r7], %[r8], %[r9] \n\t"
+ "sra %[r2], %[r2], 14 \n\t"
+ "andi %[r3], %[r2], 0xfff \n\t"
+ "mul %[r4], %[r3], %[r3] \n\t"
+ "mul %[r3], %[r3], %[const_84] \n\t"
+ "sra %[r2], %[r2], 12 \n\t"
+ "slt %[r5], %[r2], %[const_neg8] \n\t"
+ "movn %[r2], %[const_neg8], %[r5] \n\t"
+ "mul %[r4], %[r4], %[const_44] \n\t"
+ "sra %[r3], %[r3], 7 \n\t"
+ "addiu %[r7], %[r7], -1 \n\t"
+ "slti %[r9], %[r7], 31 \n\t"
+ "movz %[r7], $0, %[r9] \n\t"
+ "sra %[r4], %[r4], 19 \n\t"
+ "addu %[r4], %[r4], %[r3] \n\t"
+ "addiu %[r3], %[r2], 8 \n\t"
+ "addiu %[r2], %[r2], -4 \n\t"
+ "neg %[r5], %[r2] \n\t"
+ "sllv %[r6], %[r4], %[r2] \n\t"
+ "srav %[r5], %[r4], %[r5] \n\t"
+ "slt %[r2], %[r2], $0 \n\t"
+ "movn %[r6], %[r5], %[r2] \n\t"
+ "sllv %[r3], %[const_1], %[r3] \n\t"
+ "addu %[r2], %[r3], %[r6] \n\t"
+ "clz %[r4], %[r2] \n\t"
+ "clo %[r5], %[r2] \n\t"
+ "slt %[r8], %[r2], $0 \n\t"
+ "movn %[r4], %[r5], %[r8] \n\t"
+ "addiu %[r4], %[r4], -1 \n\t"
+ "slt %[r5], $0, %[r2] \n\t"
+ "or %[r5], %[r5], %[r7] \n\t"
+ "movz %[r4], $0, %[r5] \n\t"
+ "addiu %[r6], %[r7], -7 \n\t"
+ "addu %[r6], %[r6], %[r4] \n\t"
+ "bltz %[r6], 1f \n\t"
+ " nop \n\t"
+ "addiu %[r4], %[r6], -8 \n\t"
+ "neg %[r3], %[r4] \n\t"
+ "srav %[r5], %[r2], %[r3] \n\t"
+ "mul %[r5], %[r5], %[r1] \n\t"
+ "mul %[r2], %[r2], %[r1] \n\t"
+ "slt %[r4], %[r4], $0 \n\t"
+ "srav %[r5], %[r5], %[r6] \n\t"
+ "sra %[r2], %[r2], 8 \n\t"
+ "movn %[r2], %[r5], %[r4] \n\t"
+ "sll %[r3], %[r0], 8 \n\t"
+ "addu %[r2], %[r0], %[r2] \n\t"
+ "divu %[r3], %[r3], %[r2] \n\t"
+ "1: \n\t"
+ ".set pop \n\t"
+ : [r2] "+r" (r2), [r3] "=&r" (r3), [r4] "=&r" (r4),
+ [r5] "=&r" (r5), [r6] "=&r" (r6), [r7] "=&r" (r7),
+ [r8] "=&r" (r8), [r9] "=&r" (r9)
+ : [r0] "r" (r0), [r1] "r" (r1), [const_23637] "r" (const_23637),
+ [const_neg8] "r" (const_neg8), [const_84] "r" (const_84),
+ [const_1] "r" (const_1), [const_44] "r" (const_44)
+ : "hi", "lo"
+ );
+ nonSpeechProbFinal[i] = r3;
+ }
+ }
+ }
+}
+
+// Update analysis buffer for lower band, and window data before FFT.
+void WebRtcNsx_AnalysisUpdate_mips(NsxInst_t* inst,
+ int16_t* out,
+ int16_t* new_speech) {
+
+ int iters, after;
+ int anaLen = inst->anaLen;
+ int *window = (int*)inst->window;
+ int *anaBuf = (int*)inst->analysisBuffer;
+ int *outBuf = (int*)out;
+ int r0, r1, r2, r3, r4, r5, r6, r7;
+#if defined(MIPS_DSP_R1_LE)
+ int r8;
+#endif
+
+ // For lower band update analysis buffer.
+ WEBRTC_SPL_MEMCPY_W16(inst->analysisBuffer,
+ inst->analysisBuffer + inst->blockLen10ms,
+ inst->anaLen - inst->blockLen10ms);
+ WEBRTC_SPL_MEMCPY_W16(inst->analysisBuffer
+ + inst->anaLen - inst->blockLen10ms, new_speech, inst->blockLen10ms);
+
+ // Window data before FFT.
+#if defined(MIPS_DSP_R1_LE)
+ __asm __volatile(
+ ".set push \n\t"
+ ".set noreorder \n\t"
+ "sra %[iters], %[anaLen], 3 \n\t"
+ "1: \n\t"
+ "blez %[iters], 2f \n\t"
+ " nop \n\t"
+ "lw %[r0], 0(%[window]) \n\t"
+ "lw %[r1], 0(%[anaBuf]) \n\t"
+ "lw %[r2], 4(%[window]) \n\t"
+ "lw %[r3], 4(%[anaBuf]) \n\t"
+ "lw %[r4], 8(%[window]) \n\t"
+ "lw %[r5], 8(%[anaBuf]) \n\t"
+ "lw %[r6], 12(%[window]) \n\t"
+ "lw %[r7], 12(%[anaBuf]) \n\t"
+ "muleq_s.w.phl %[r8], %[r0], %[r1] \n\t"
+ "muleq_s.w.phr %[r0], %[r0], %[r1] \n\t"
+ "muleq_s.w.phl %[r1], %[r2], %[r3] \n\t"
+ "muleq_s.w.phr %[r2], %[r2], %[r3] \n\t"
+ "muleq_s.w.phl %[r3], %[r4], %[r5] \n\t"
+ "muleq_s.w.phr %[r4], %[r4], %[r5] \n\t"
+ "muleq_s.w.phl %[r5], %[r6], %[r7] \n\t"
+ "muleq_s.w.phr %[r6], %[r6], %[r7] \n\t"
+#if defined(MIPS_DSP_R2_LE)
+ "precr_sra_r.ph.w %[r8], %[r0], 15 \n\t"
+ "precr_sra_r.ph.w %[r1], %[r2], 15 \n\t"
+ "precr_sra_r.ph.w %[r3], %[r4], 15 \n\t"
+ "precr_sra_r.ph.w %[r5], %[r6], 15 \n\t"
+ "sw %[r8], 0(%[outBuf]) \n\t"
+ "sw %[r1], 4(%[outBuf]) \n\t"
+ "sw %[r3], 8(%[outBuf]) \n\t"
+ "sw %[r5], 12(%[outBuf]) \n\t"
+#else
+ "shra_r.w %[r8], %[r8], 15 \n\t"
+ "shra_r.w %[r0], %[r0], 15 \n\t"
+ "shra_r.w %[r1], %[r1], 15 \n\t"
+ "shra_r.w %[r2], %[r2], 15 \n\t"
+ "shra_r.w %[r3], %[r3], 15 \n\t"
+ "shra_r.w %[r4], %[r4], 15 \n\t"
+ "shra_r.w %[r5], %[r5], 15 \n\t"
+ "shra_r.w %[r6], %[r6], 15 \n\t"
+ "sll %[r0], %[r0], 16 \n\t"
+ "sll %[r2], %[r2], 16 \n\t"
+ "sll %[r4], %[r4], 16 \n\t"
+ "sll %[r6], %[r6], 16 \n\t"
+ "packrl.ph %[r0], %[r8], %[r0] \n\t"
+ "packrl.ph %[r2], %[r1], %[r2] \n\t"
+ "packrl.ph %[r4], %[r3], %[r4] \n\t"
+ "packrl.ph %[r6], %[r5], %[r6] \n\t"
+ "sw %[r0], 0(%[outBuf]) \n\t"
+ "sw %[r2], 4(%[outBuf]) \n\t"
+ "sw %[r4], 8(%[outBuf]) \n\t"
+ "sw %[r6], 12(%[outBuf]) \n\t"
+#endif
+ "addiu %[window], %[window], 16 \n\t"
+ "addiu %[anaBuf], %[anaBuf], 16 \n\t"
+ "addiu %[outBuf], %[outBuf], 16 \n\t"
+ "b 1b \n\t"
+ " addiu %[iters], %[iters], -1 \n\t"
+ "2: \n\t"
+ "andi %[after], %[anaLen], 7 \n\t"
+ "3: \n\t"
+ "blez %[after], 4f \n\t"
+ " nop \n\t"
+ "lh %[r0], 0(%[window]) \n\t"
+ "lh %[r1], 0(%[anaBuf]) \n\t"
+ "mul %[r0], %[r0], %[r1] \n\t"
+ "addiu %[window], %[window], 2 \n\t"
+ "addiu %[anaBuf], %[anaBuf], 2 \n\t"
+ "addiu %[outBuf], %[outBuf], 2 \n\t"
+ "shra_r.w %[r0], %[r0], 14 \n\t"
+ "sh %[r0], -2(%[outBuf]) \n\t"
+ "b 3b \n\t"
+ " addiu %[after], %[after], -1 \n\t"
+ "4: \n\t"
+ ".set pop \n\t"
+ : [r0] "=&r" (r0), [r1] "=&r" (r1), [r2] "=&r" (r2),
+ [r3] "=&r" (r3), [r4] "=&r" (r4), [r5] "=&r" (r5),
+ [r6] "=&r" (r6), [r7] "=&r" (r7), [r8] "=&r" (r8),
+ [iters] "=&r" (iters), [after] "=&r" (after),
+ [window] "+r" (window),[anaBuf] "+r" (anaBuf),
+ [outBuf] "+r" (outBuf)
+ : [anaLen] "r" (anaLen)
+ : "memory", "hi", "lo"
+ );
+#else
+ __asm __volatile(
+ ".set push \n\t"
+ ".set noreorder \n\t"
+ "sra %[iters], %[anaLen], 2 \n\t"
+ "1: \n\t"
+ "blez %[iters], 2f \n\t"
+ " nop \n\t"
+ "lh %[r0], 0(%[window]) \n\t"
+ "lh %[r1], 0(%[anaBuf]) \n\t"
+ "lh %[r2], 2(%[window]) \n\t"
+ "lh %[r3], 2(%[anaBuf]) \n\t"
+ "lh %[r4], 4(%[window]) \n\t"
+ "lh %[r5], 4(%[anaBuf]) \n\t"
+ "lh %[r6], 6(%[window]) \n\t"
+ "lh %[r7], 6(%[anaBuf]) \n\t"
+ "mul %[r0], %[r0], %[r1] \n\t"
+ "mul %[r2], %[r2], %[r3] \n\t"
+ "mul %[r4], %[r4], %[r5] \n\t"
+ "mul %[r6], %[r6], %[r7] \n\t"
+ "addiu %[window], %[window], 8 \n\t"
+ "addiu %[anaBuf], %[anaBuf], 8 \n\t"
+ "addiu %[r0], %[r0], 0x2000 \n\t"
+ "addiu %[r2], %[r2], 0x2000 \n\t"
+ "addiu %[r4], %[r4], 0x2000 \n\t"
+ "addiu %[r6], %[r6], 0x2000 \n\t"
+ "sra %[r0], %[r0], 14 \n\t"
+ "sra %[r2], %[r2], 14 \n\t"
+ "sra %[r4], %[r4], 14 \n\t"
+ "sra %[r6], %[r6], 14 \n\t"
+ "sh %[r0], 0(%[outBuf]) \n\t"
+ "sh %[r2], 2(%[outBuf]) \n\t"
+ "sh %[r4], 4(%[outBuf]) \n\t"
+ "sh %[r6], 6(%[outBuf]) \n\t"
+ "addiu %[outBuf], %[outBuf], 8 \n\t"
+ "b 1b \n\t"
+ " addiu %[iters], %[iters], -1 \n\t"
+ "2: \n\t"
+ "andi %[after], %[anaLen], 3 \n\t"
+ "3: \n\t"
+ "blez %[after], 4f \n\t"
+ " nop \n\t"
+ "lh %[r0], 0(%[window]) \n\t"
+ "lh %[r1], 0(%[anaBuf]) \n\t"
+ "mul %[r0], %[r0], %[r1] \n\t"
+ "addiu %[window], %[window], 2 \n\t"
+ "addiu %[anaBuf], %[anaBuf], 2 \n\t"
+ "addiu %[outBuf], %[outBuf], 2 \n\t"
+ "addiu %[r0], %[r0], 0x2000 \n\t"
+ "sra %[r0], %[r0], 14 \n\t"
+ "sh %[r0], -2(%[outBuf]) \n\t"
+ "b 3b \n\t"
+ " addiu %[after], %[after], -1 \n\t"
+ "4: \n\t"
+ ".set pop \n\t"
+ : [r0] "=&r" (r0), [r1] "=&r" (r1), [r2] "=&r" (r2),
+ [r3] "=&r" (r3), [r4] "=&r" (r4), [r5] "=&r" (r5),
+ [r6] "=&r" (r6), [r7] "=&r" (r7), [iters] "=&r" (iters),
+ [after] "=&r" (after), [window] "+r" (window),
+ [anaBuf] "+r" (anaBuf), [outBuf] "+r" (outBuf)
+ : [anaLen] "r" (anaLen)
+ : "memory", "hi", "lo"
+ );
+#endif
+}
+
+// For the noise supression process, synthesis, read out fully processed
+// segment, and update synthesis buffer.
+void WebRtcNsx_SynthesisUpdate_mips(NsxInst_t* inst,
+ int16_t* out_frame,
+ int16_t gain_factor) {
+
+ int iters = inst->blockLen10ms >> 2;
+ int after = inst->blockLen10ms & 3;
+ int r0, r1, r2, r3, r4, r5, r6, r7;
+ int16_t *window = (int16_t*)inst->window;
+ int16_t *real = inst->real;
+ int16_t *synthBuf = inst->synthesisBuffer;
+ int16_t *out = out_frame;
+ int sat_pos = 0x7fff;
+ int sat_neg = 0xffff8000;
+ int block10 = (int)inst->blockLen10ms;
+ int anaLen = (int)inst->anaLen;
+
+ __asm __volatile(
+ ".set push \n\t"
+ ".set noreorder \n\t"
+ "1: \n\t"
+ "blez %[iters], 2f \n\t"
+ " nop \n\t"
+ "lh %[r0], 0(%[window]) \n\t"
+ "lh %[r1], 0(%[real]) \n\t"
+ "lh %[r2], 2(%[window]) \n\t"
+ "lh %[r3], 2(%[real]) \n\t"
+ "lh %[r4], 4(%[window]) \n\t"
+ "lh %[r5], 4(%[real]) \n\t"
+ "lh %[r6], 6(%[window]) \n\t"
+ "lh %[r7], 6(%[real]) \n\t"
+ "mul %[r0], %[r0], %[r1] \n\t"
+ "mul %[r2], %[r2], %[r3] \n\t"
+ "mul %[r4], %[r4], %[r5] \n\t"
+ "mul %[r6], %[r6], %[r7] \n\t"
+ "addiu %[r0], %[r0], 0x2000 \n\t"
+ "addiu %[r2], %[r2], 0x2000 \n\t"
+ "addiu %[r4], %[r4], 0x2000 \n\t"
+ "addiu %[r6], %[r6], 0x2000 \n\t"
+ "sra %[r0], %[r0], 14 \n\t"
+ "sra %[r2], %[r2], 14 \n\t"
+ "sra %[r4], %[r4], 14 \n\t"
+ "sra %[r6], %[r6], 14 \n\t"
+ "mul %[r0], %[r0], %[gain_factor] \n\t"
+ "mul %[r2], %[r2], %[gain_factor] \n\t"
+ "mul %[r4], %[r4], %[gain_factor] \n\t"
+ "mul %[r6], %[r6], %[gain_factor] \n\t"
+ "addiu %[r0], %[r0], 0x1000 \n\t"
+ "addiu %[r2], %[r2], 0x1000 \n\t"
+ "addiu %[r4], %[r4], 0x1000 \n\t"
+ "addiu %[r6], %[r6], 0x1000 \n\t"
+ "sra %[r0], %[r0], 13 \n\t"
+ "sra %[r2], %[r2], 13 \n\t"
+ "sra %[r4], %[r4], 13 \n\t"
+ "sra %[r6], %[r6], 13 \n\t"
+ "slt %[r1], %[r0], %[sat_pos] \n\t"
+ "slt %[r3], %[r2], %[sat_pos] \n\t"
+ "slt %[r5], %[r4], %[sat_pos] \n\t"
+ "slt %[r7], %[r6], %[sat_pos] \n\t"
+ "movz %[r0], %[sat_pos], %[r1] \n\t"
+ "movz %[r2], %[sat_pos], %[r3] \n\t"
+ "movz %[r4], %[sat_pos], %[r5] \n\t"
+ "movz %[r6], %[sat_pos], %[r7] \n\t"
+ "lh %[r1], 0(%[synthBuf]) \n\t"
+ "lh %[r3], 2(%[synthBuf]) \n\t"
+ "lh %[r5], 4(%[synthBuf]) \n\t"
+ "lh %[r7], 6(%[synthBuf]) \n\t"
+ "addu %[r0], %[r0], %[r1] \n\t"
+ "addu %[r2], %[r2], %[r3] \n\t"
+ "addu %[r4], %[r4], %[r5] \n\t"
+ "addu %[r6], %[r6], %[r7] \n\t"
+ "slt %[r1], %[r0], %[sat_pos] \n\t"
+ "slt %[r3], %[r2], %[sat_pos] \n\t"
+ "slt %[r5], %[r4], %[sat_pos] \n\t"
+ "slt %[r7], %[r6], %[sat_pos] \n\t"
+ "movz %[r0], %[sat_pos], %[r1] \n\t"
+ "movz %[r2], %[sat_pos], %[r3] \n\t"
+ "movz %[r4], %[sat_pos], %[r5] \n\t"
+ "movz %[r6], %[sat_pos], %[r7] \n\t"
+ "slt %[r1], %[r0], %[sat_neg] \n\t"
+ "slt %[r3], %[r2], %[sat_neg] \n\t"
+ "slt %[r5], %[r4], %[sat_neg] \n\t"
+ "slt %[r7], %[r6], %[sat_neg] \n\t"
+ "movn %[r0], %[sat_neg], %[r1] \n\t"
+ "movn %[r2], %[sat_neg], %[r3] \n\t"
+ "movn %[r4], %[sat_neg], %[r5] \n\t"
+ "movn %[r6], %[sat_neg], %[r7] \n\t"
+ "sh %[r0], 0(%[synthBuf]) \n\t"
+ "sh %[r2], 2(%[synthBuf]) \n\t"
+ "sh %[r4], 4(%[synthBuf]) \n\t"
+ "sh %[r6], 6(%[synthBuf]) \n\t"
+ "sh %[r0], 0(%[out]) \n\t"
+ "sh %[r2], 2(%[out]) \n\t"
+ "sh %[r4], 4(%[out]) \n\t"
+ "sh %[r6], 6(%[out]) \n\t"
+ "addiu %[window], %[window], 8 \n\t"
+ "addiu %[real], %[real], 8 \n\t"
+ "addiu %[synthBuf],%[synthBuf], 8 \n\t"
+ "addiu %[out], %[out], 8 \n\t"
+ "b 1b \n\t"
+ " addiu %[iters], %[iters], -1 \n\t"
+ "2: \n\t"
+ "blez %[after], 3f \n\t"
+ " subu %[block10], %[anaLen], %[block10] \n\t"
+ "lh %[r0], 0(%[window]) \n\t"
+ "lh %[r1], 0(%[real]) \n\t"
+ "mul %[r0], %[r0], %[r1] \n\t"
+ "addiu %[window], %[window], 2 \n\t"
+ "addiu %[real], %[real], 2 \n\t"
+ "addiu %[r0], %[r0], 0x2000 \n\t"
+ "sra %[r0], %[r0], 14 \n\t"
+ "mul %[r0], %[r0], %[gain_factor] \n\t"
+ "addiu %[r0], %[r0], 0x1000 \n\t"
+ "sra %[r0], %[r0], 13 \n\t"
+ "slt %[r1], %[r0], %[sat_pos] \n\t"
+ "movz %[r0], %[sat_pos], %[r1] \n\t"
+ "lh %[r1], 0(%[synthBuf]) \n\t"
+ "addu %[r0], %[r0], %[r1] \n\t"
+ "slt %[r1], %[r0], %[sat_pos] \n\t"
+ "movz %[r0], %[sat_pos], %[r1] \n\t"
+ "slt %[r1], %[r0], %[sat_neg] \n\t"
+ "movn %[r0], %[sat_neg], %[r1] \n\t"
+ "sh %[r0], 0(%[synthBuf]) \n\t"
+ "sh %[r0], 0(%[out]) \n\t"
+ "addiu %[synthBuf],%[synthBuf], 2 \n\t"
+ "addiu %[out], %[out], 2 \n\t"
+ "b 2b \n\t"
+ " addiu %[after], %[after], -1 \n\t"
+ "3: \n\t"
+ "sra %[iters], %[block10], 2 \n\t"
+ "4: \n\t"
+ "blez %[iters], 5f \n\t"
+ " andi %[after], %[block10], 3 \n\t"
+ "lh %[r0], 0(%[window]) \n\t"
+ "lh %[r1], 0(%[real]) \n\t"
+ "lh %[r2], 2(%[window]) \n\t"
+ "lh %[r3], 2(%[real]) \n\t"
+ "lh %[r4], 4(%[window]) \n\t"
+ "lh %[r5], 4(%[real]) \n\t"
+ "lh %[r6], 6(%[window]) \n\t"
+ "lh %[r7], 6(%[real]) \n\t"
+ "mul %[r0], %[r0], %[r1] \n\t"
+ "mul %[r2], %[r2], %[r3] \n\t"
+ "mul %[r4], %[r4], %[r5] \n\t"
+ "mul %[r6], %[r6], %[r7] \n\t"
+ "addiu %[r0], %[r0], 0x2000 \n\t"
+ "addiu %[r2], %[r2], 0x2000 \n\t"
+ "addiu %[r4], %[r4], 0x2000 \n\t"
+ "addiu %[r6], %[r6], 0x2000 \n\t"
+ "sra %[r0], %[r0], 14 \n\t"
+ "sra %[r2], %[r2], 14 \n\t"
+ "sra %[r4], %[r4], 14 \n\t"
+ "sra %[r6], %[r6], 14 \n\t"
+ "mul %[r0], %[r0], %[gain_factor] \n\t"
+ "mul %[r2], %[r2], %[gain_factor] \n\t"
+ "mul %[r4], %[r4], %[gain_factor] \n\t"
+ "mul %[r6], %[r6], %[gain_factor] \n\t"
+ "addiu %[r0], %[r0], 0x1000 \n\t"
+ "addiu %[r2], %[r2], 0x1000 \n\t"
+ "addiu %[r4], %[r4], 0x1000 \n\t"
+ "addiu %[r6], %[r6], 0x1000 \n\t"
+ "sra %[r0], %[r0], 13 \n\t"
+ "sra %[r2], %[r2], 13 \n\t"
+ "sra %[r4], %[r4], 13 \n\t"
+ "sra %[r6], %[r6], 13 \n\t"
+ "slt %[r1], %[r0], %[sat_pos] \n\t"
+ "slt %[r3], %[r2], %[sat_pos] \n\t"
+ "slt %[r5], %[r4], %[sat_pos] \n\t"
+ "slt %[r7], %[r6], %[sat_pos] \n\t"
+ "movz %[r0], %[sat_pos], %[r1] \n\t"
+ "movz %[r2], %[sat_pos], %[r3] \n\t"
+ "movz %[r4], %[sat_pos], %[r5] \n\t"
+ "movz %[r6], %[sat_pos], %[r7] \n\t"
+ "lh %[r1], 0(%[synthBuf]) \n\t"
+ "lh %[r3], 2(%[synthBuf]) \n\t"
+ "lh %[r5], 4(%[synthBuf]) \n\t"
+ "lh %[r7], 6(%[synthBuf]) \n\t"
+ "addu %[r0], %[r0], %[r1] \n\t"
+ "addu %[r2], %[r2], %[r3] \n\t"
+ "addu %[r4], %[r4], %[r5] \n\t"
+ "addu %[r6], %[r6], %[r7] \n\t"
+ "slt %[r1], %[r0], %[sat_pos] \n\t"
+ "slt %[r3], %[r2], %[sat_pos] \n\t"
+ "slt %[r5], %[r4], %[sat_pos] \n\t"
+ "slt %[r7], %[r6], %[sat_pos] \n\t"
+ "movz %[r0], %[sat_pos], %[r1] \n\t"
+ "movz %[r2], %[sat_pos], %[r3] \n\t"
+ "movz %[r4], %[sat_pos], %[r5] \n\t"
+ "movz %[r6], %[sat_pos], %[r7] \n\t"
+ "slt %[r1], %[r0], %[sat_neg] \n\t"
+ "slt %[r3], %[r2], %[sat_neg] \n\t"
+ "slt %[r5], %[r4], %[sat_neg] \n\t"
+ "slt %[r7], %[r6], %[sat_neg] \n\t"
+ "movn %[r0], %[sat_neg], %[r1] \n\t"
+ "movn %[r2], %[sat_neg], %[r3] \n\t"
+ "movn %[r4], %[sat_neg], %[r5] \n\t"
+ "movn %[r6], %[sat_neg], %[r7] \n\t"
+ "sh %[r0], 0(%[synthBuf]) \n\t"
+ "sh %[r2], 2(%[synthBuf]) \n\t"
+ "sh %[r4], 4(%[synthBuf]) \n\t"
+ "sh %[r6], 6(%[synthBuf]) \n\t"
+ "addiu %[window], %[window], 8 \n\t"
+ "addiu %[real], %[real], 8 \n\t"
+ "addiu %[synthBuf],%[synthBuf], 8 \n\t"
+ "b 4b \n\t"
+ " addiu %[iters], %[iters], -1 \n\t"
+ "5: \n\t"
+ "blez %[after], 6f \n\t"
+ " nop \n\t"
+ "lh %[r0], 0(%[window]) \n\t"
+ "lh %[r1], 0(%[real]) \n\t"
+ "mul %[r0], %[r0], %[r1] \n\t"
+ "addiu %[window], %[window], 2 \n\t"
+ "addiu %[real], %[real], 2 \n\t"
+ "addiu %[r0], %[r0], 0x2000 \n\t"
+ "sra %[r0], %[r0], 14 \n\t"
+ "mul %[r0], %[r0], %[gain_factor] \n\t"
+ "addiu %[r0], %[r0], 0x1000 \n\t"
+ "sra %[r0], %[r0], 13 \n\t"
+ "slt %[r1], %[r0], %[sat_pos] \n\t"
+ "movz %[r0], %[sat_pos], %[r1] \n\t"
+ "lh %[r1], 0(%[synthBuf]) \n\t"
+ "addu %[r0], %[r0], %[r1] \n\t"
+ "slt %[r1], %[r0], %[sat_pos] \n\t"
+ "movz %[r0], %[sat_pos], %[r1] \n\t"
+ "slt %[r1], %[r0], %[sat_neg] \n\t"
+ "movn %[r0], %[sat_neg], %[r1] \n\t"
+ "sh %[r0], 0(%[synthBuf]) \n\t"
+ "addiu %[synthBuf],%[synthBuf], 2 \n\t"
+ "b 2b \n\t"
+ " addiu %[after], %[after], -1 \n\t"
+ "6: \n\t"
+ ".set pop \n\t"
+ : [r0] "=&r" (r0), [r1] "=&r" (r1), [r2] "=&r" (r2),
+ [r3] "=&r" (r3), [r4] "=&r" (r4), [r5] "=&r" (r5),
+ [r6] "=&r" (r6), [r7] "=&r" (r7), [iters] "+r" (iters),
+ [after] "+r" (after), [block10] "+r" (block10),
+ [window] "+r" (window), [real] "+r" (real),
+ [synthBuf] "+r" (synthBuf), [out] "+r" (out)
+ : [gain_factor] "r" (gain_factor), [sat_pos] "r" (sat_pos),
+ [sat_neg] "r" (sat_neg), [anaLen] "r" (anaLen)
+ : "memory", "hi", "lo"
+ );
+
+ // update synthesis buffer
+ WEBRTC_SPL_MEMCPY_W16(inst->synthesisBuffer,
+ inst->synthesisBuffer + inst->blockLen10ms,
+ inst->anaLen - inst->blockLen10ms);
+ WebRtcSpl_ZerosArrayW16(inst->synthesisBuffer
+ + inst->anaLen - inst->blockLen10ms, inst->blockLen10ms);
+}
+
+// Filter the data in the frequency domain, and create spectrum.
+void WebRtcNsx_PrepareSpectrum_mips(NsxInst_t* inst, int16_t* freq_buf) {
+
+ uint16_t *noiseSupFilter = inst->noiseSupFilter;
+ int16_t *real = inst->real;
+ int16_t *imag = inst->imag;
+ int32_t loop_count = 2;
+ int16_t tmp_1, tmp_2, tmp_3, tmp_4, tmp_5, tmp_6;
+ int16_t tmp16 = (inst->anaLen << 1) - 4;
+ int16_t* freq_buf_f = freq_buf;
+ int16_t* freq_buf_s = &freq_buf[tmp16];
+
+ __asm __volatile (
+ ".set push \n\t"
+ ".set noreorder \n\t"
+ //first sample
+ "lh %[tmp_1], 0(%[noiseSupFilter]) \n\t"
+ "lh %[tmp_2], 0(%[real]) \n\t"
+ "lh %[tmp_3], 0(%[imag]) \n\t"
+ "mul %[tmp_2], %[tmp_2], %[tmp_1] \n\t"
+ "mul %[tmp_3], %[tmp_3], %[tmp_1] \n\t"
+ "sra %[tmp_2], %[tmp_2], 14 \n\t"
+ "sra %[tmp_3], %[tmp_3], 14 \n\t"
+ "sh %[tmp_2], 0(%[real]) \n\t"
+ "sh %[tmp_3], 0(%[imag]) \n\t"
+ "negu %[tmp_3], %[tmp_3] \n\t"
+ "sh %[tmp_2], 0(%[freq_buf_f]) \n\t"
+ "sh %[tmp_3], 2(%[freq_buf_f]) \n\t"
+ "addiu %[real], %[real], 2 \n\t"
+ "addiu %[imag], %[imag], 2 \n\t"
+ "addiu %[noiseSupFilter], %[noiseSupFilter], 2 \n\t"
+ "addiu %[freq_buf_f], %[freq_buf_f], 4 \n\t"
+ "1: \n\t"
+ "lh %[tmp_1], 0(%[noiseSupFilter]) \n\t"
+ "lh %[tmp_2], 0(%[real]) \n\t"
+ "lh %[tmp_3], 0(%[imag]) \n\t"
+ "lh %[tmp_4], 2(%[noiseSupFilter]) \n\t"
+ "lh %[tmp_5], 2(%[real]) \n\t"
+ "lh %[tmp_6], 2(%[imag]) \n\t"
+ "mul %[tmp_2], %[tmp_2], %[tmp_1] \n\t"
+ "mul %[tmp_3], %[tmp_3], %[tmp_1] \n\t"
+ "mul %[tmp_5], %[tmp_5], %[tmp_4] \n\t"
+ "mul %[tmp_6], %[tmp_6], %[tmp_4] \n\t"
+ "addiu %[loop_count], %[loop_count], 2 \n\t"
+ "sra %[tmp_2], %[tmp_2], 14 \n\t"
+ "sra %[tmp_3], %[tmp_3], 14 \n\t"
+ "sra %[tmp_5], %[tmp_5], 14 \n\t"
+ "sra %[tmp_6], %[tmp_6], 14 \n\t"
+ "addiu %[noiseSupFilter], %[noiseSupFilter], 4 \n\t"
+ "sh %[tmp_2], 0(%[real]) \n\t"
+ "sh %[tmp_2], 4(%[freq_buf_s]) \n\t"
+ "sh %[tmp_3], 0(%[imag]) \n\t"
+ "sh %[tmp_3], 6(%[freq_buf_s]) \n\t"
+ "negu %[tmp_3], %[tmp_3] \n\t"
+ "sh %[tmp_5], 2(%[real]) \n\t"
+ "sh %[tmp_5], 0(%[freq_buf_s]) \n\t"
+ "sh %[tmp_6], 2(%[imag]) \n\t"
+ "sh %[tmp_6], 2(%[freq_buf_s]) \n\t"
+ "negu %[tmp_6], %[tmp_6] \n\t"
+ "addiu %[freq_buf_s], %[freq_buf_s], -8 \n\t"
+ "addiu %[real], %[real], 4 \n\t"
+ "addiu %[imag], %[imag], 4 \n\t"
+ "sh %[tmp_2], 0(%[freq_buf_f]) \n\t"
+ "sh %[tmp_3], 2(%[freq_buf_f]) \n\t"
+ "sh %[tmp_5], 4(%[freq_buf_f]) \n\t"
+ "sh %[tmp_6], 6(%[freq_buf_f]) \n\t"
+ "blt %[loop_count], %[loop_size], 1b \n\t"
+ " addiu %[freq_buf_f], %[freq_buf_f], 8 \n\t"
+ //last two samples:
+ "lh %[tmp_1], 0(%[noiseSupFilter]) \n\t"
+ "lh %[tmp_2], 0(%[real]) \n\t"
+ "lh %[tmp_3], 0(%[imag]) \n\t"
+ "lh %[tmp_4], 2(%[noiseSupFilter]) \n\t"
+ "lh %[tmp_5], 2(%[real]) \n\t"
+ "lh %[tmp_6], 2(%[imag]) \n\t"
+ "mul %[tmp_2], %[tmp_2], %[tmp_1] \n\t"
+ "mul %[tmp_3], %[tmp_3], %[tmp_1] \n\t"
+ "mul %[tmp_5], %[tmp_5], %[tmp_4] \n\t"
+ "mul %[tmp_6], %[tmp_6], %[tmp_4] \n\t"
+ "sra %[tmp_2], %[tmp_2], 14 \n\t"
+ "sra %[tmp_3], %[tmp_3], 14 \n\t"
+ "sra %[tmp_5], %[tmp_5], 14 \n\t"
+ "sra %[tmp_6], %[tmp_6], 14 \n\t"
+ "sh %[tmp_2], 0(%[real]) \n\t"
+ "sh %[tmp_2], 4(%[freq_buf_s]) \n\t"
+ "sh %[tmp_3], 0(%[imag]) \n\t"
+ "sh %[tmp_3], 6(%[freq_buf_s]) \n\t"
+ "negu %[tmp_3], %[tmp_3] \n\t"
+ "sh %[tmp_2], 0(%[freq_buf_f]) \n\t"
+ "sh %[tmp_3], 2(%[freq_buf_f]) \n\t"
+ "sh %[tmp_5], 4(%[freq_buf_f]) \n\t"
+ "sh %[tmp_6], 6(%[freq_buf_f]) \n\t"
+ "sh %[tmp_5], 2(%[real]) \n\t"
+ "sh %[tmp_6], 2(%[imag]) \n\t"
+ ".set pop \n\t"
+ : [real] "+r" (real), [imag] "+r" (imag),
+ [freq_buf_f] "+r" (freq_buf_f), [freq_buf_s] "+r" (freq_buf_s),
+ [loop_count] "+r" (loop_count), [noiseSupFilter] "+r" (noiseSupFilter),
+ [tmp_1] "=&r" (tmp_1), [tmp_2] "=&r" (tmp_2), [tmp_3] "=&r" (tmp_3),
+ [tmp_4] "=&r" (tmp_4), [tmp_5] "=&r" (tmp_5), [tmp_6] "=&r" (tmp_6)
+ : [loop_size] "r" (inst->anaLen2)
+ : "memory", "hi", "lo"
+ );
+}
+
+#if defined(MIPS_DSP_R1_LE)
+// Denormalize the real-valued signal |in|, the output from inverse FFT.
+void WebRtcNsx_Denormalize_mips(NsxInst_t* inst, int16_t* in, int factor) {
+ int32_t r0, r1, r2, r3, t0;
+ int len = inst->anaLen;
+ int16_t *out = &inst->real[0];
+ int shift = factor - inst->normData;
+
+ __asm __volatile (
+ ".set push \n\t"
+ ".set noreorder \n\t"
+ "beqz %[len], 8f \n\t"
+ " nop \n\t"
+ "bltz %[shift], 4f \n\t"
+ " sra %[t0], %[len], 2 \n\t"
+ "beqz %[t0], 2f \n\t"
+ " andi %[len], %[len], 3 \n\t"
+ "1: \n\t"
+ "lh %[r0], 0(%[in]) \n\t"
+ "lh %[r1], 2(%[in]) \n\t"
+ "lh %[r2], 4(%[in]) \n\t"
+ "lh %[r3], 6(%[in]) \n\t"
+ "shllv_s.ph %[r0], %[r0], %[shift] \n\t"
+ "shllv_s.ph %[r1], %[r1], %[shift] \n\t"
+ "shllv_s.ph %[r2], %[r2], %[shift] \n\t"
+ "shllv_s.ph %[r3], %[r3], %[shift] \n\t"
+ "addiu %[in], %[in], 8 \n\t"
+ "addiu %[t0], %[t0], -1 \n\t"
+ "sh %[r0], 0(%[out]) \n\t"
+ "sh %[r1], 2(%[out]) \n\t"
+ "sh %[r2], 4(%[out]) \n\t"
+ "sh %[r3], 6(%[out]) \n\t"
+ "bgtz %[t0], 1b \n\t"
+ " addiu %[out], %[out], 8 \n\t"
+ "2: \n\t"
+ "beqz %[len], 8f \n\t"
+ " nop \n\t"
+ "3: \n\t"
+ "lh %[r0], 0(%[in]) \n\t"
+ "addiu %[in], %[in], 2 \n\t"
+ "addiu %[len], %[len], -1 \n\t"
+ "shllv_s.ph %[r0], %[r0], %[shift] \n\t"
+ "addiu %[out], %[out], 2 \n\t"
+ "bgtz %[len], 3b \n\t"
+ " sh %[r0], -2(%[out]) \n\t"
+ "b 8f \n\t"
+ "4: \n\t"
+ "negu %[shift], %[shift] \n\t"
+ "beqz %[t0], 6f \n\t"
+ " andi %[len], %[len], 3 \n\t"
+ "5: \n\t"
+ "lh %[r0], 0(%[in]) \n\t"
+ "lh %[r1], 2(%[in]) \n\t"
+ "lh %[r2], 4(%[in]) \n\t"
+ "lh %[r3], 6(%[in]) \n\t"
+ "srav %[r0], %[r0], %[shift] \n\t"
+ "srav %[r1], %[r1], %[shift] \n\t"
+ "srav %[r2], %[r2], %[shift] \n\t"
+ "srav %[r3], %[r3], %[shift] \n\t"
+ "addiu %[in], %[in], 8 \n\t"
+ "addiu %[t0], %[t0], -1 \n\t"
+ "sh %[r0], 0(%[out]) \n\t"
+ "sh %[r1], 2(%[out]) \n\t"
+ "sh %[r2], 4(%[out]) \n\t"
+ "sh %[r3], 6(%[out]) \n\t"
+ "bgtz %[t0], 5b \n\t"
+ " addiu %[out], %[out], 8 \n\t"
+ "6: \n\t"
+ "beqz %[len], 8f \n\t"
+ " nop \n\t"
+ "7: \n\t"
+ "lh %[r0], 0(%[in]) \n\t"
+ "addiu %[in], %[in], 2 \n\t"
+ "addiu %[len], %[len], -1 \n\t"
+ "srav %[r0], %[r0], %[shift] \n\t"
+ "addiu %[out], %[out], 2 \n\t"
+ "bgtz %[len], 7b \n\t"
+ " sh %[r0], -2(%[out]) \n\t"
+ "8: \n\t"
+ ".set pop \n\t"
+ : [t0] "=&r" (t0), [r0] "=&r" (r0), [r1] "=&r" (r1),
+ [r2] "=&r" (r2), [r3] "=&r" (r3)
+ : [len] "r" (len), [shift] "r" (shift), [in] "r" (in),
+ [out] "r" (out)
+ : "memory"
+ );
+}
+#endif
+
+// Normalize the real-valued signal |in|, the input to forward FFT.
+void WebRtcNsx_NormalizeRealBuffer_mips(NsxInst_t* inst,
+ const int16_t* in,
+ int16_t* out) {
+ int32_t r0, r1, r2, r3, t0;
+ int len = inst->anaLen;
+ int shift = inst->normData;
+
+ __asm __volatile (
+ ".set push \n\t"
+ ".set noreorder \n\t"
+ "beqz %[len], 4f \n\t"
+ " sra %[t0], %[len], 2 \n\t"
+ "beqz %[t0], 2f \n\t"
+ " andi %[len], %[len], 3 \n\t"
+ "1: \n\t"
+ "lh %[r0], 0(%[in]) \n\t"
+ "lh %[r1], 2(%[in]) \n\t"
+ "lh %[r2], 4(%[in]) \n\t"
+ "lh %[r3], 6(%[in]) \n\t"
+ "sllv %[r0], %[r0], %[shift] \n\t"
+ "sllv %[r1], %[r1], %[shift] \n\t"
+ "sllv %[r2], %[r2], %[shift] \n\t"
+ "sllv %[r3], %[r3], %[shift] \n\t"
+ "addiu %[in], %[in], 8 \n\t"
+ "addiu %[t0], %[t0], -1 \n\t"
+ "sh %[r0], 0(%[out]) \n\t"
+ "sh %[r1], 2(%[out]) \n\t"
+ "sh %[r2], 4(%[out]) \n\t"
+ "sh %[r3], 6(%[out]) \n\t"
+ "bgtz %[t0], 1b \n\t"
+ " addiu %[out], %[out], 8 \n\t"
+ "2: \n\t"
+ "beqz %[len], 4f \n\t"
+ " nop \n\t"
+ "3: \n\t"
+ "lh %[r0], 0(%[in]) \n\t"
+ "addiu %[in], %[in], 2 \n\t"
+ "addiu %[len], %[len], -1 \n\t"
+ "sllv %[r0], %[r0], %[shift] \n\t"
+ "addiu %[out], %[out], 2 \n\t"
+ "bgtz %[len], 3b \n\t"
+ " sh %[r0], -2(%[out]) \n\t"
+ "4: \n\t"
+ ".set pop \n\t"
+ : [t0] "=&r" (t0), [r0] "=&r" (r0), [r1] "=&r" (r1),
+ [r2] "=&r" (r2), [r3] "=&r" (r3)
+ : [len] "r" (len), [shift] "r" (shift), [in] "r" (in),
+ [out] "r" (out)
+ : "memory"
+ );
+}
+