blob: 55602b9725ed8a1c4b8a75722c1a4287e3846292 [file] [log] [blame]
/*
* Copyright (c) 2014 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include "webrtc/modules/audio_coding/codecs/isac/fix/source/lpc_masking_model.h"
// MIPS DSPR2 optimization for function WebRtcIsacfix_CalculateResidualEnergy
// Bit-exact with WebRtcIsacfix_CalculateResidualEnergyC from file
// lpc_masking_model.c
int32_t WebRtcIsacfix_CalculateResidualEnergyMIPS(int lpc_order,
int32_t q_val_corr,
int q_val_polynomial,
int16_t* a_polynomial,
int32_t* corr_coeffs,
int* q_val_residual_energy) {
int i = 0, j = 0;
int shift_internal = 0, shift_norm = 0;
int32_t tmp32 = 0, word32_high = 0, word32_low = 0, residual_energy = 0;
int32_t tmp_corr_c = corr_coeffs[0];
int16_t* tmp_a_poly = &a_polynomial[0];
int32_t sum64_hi = 0;
int32_t sum64_lo = 0;
for (j = 0; j <= lpc_order; j++) {
// For the case of i == 0:
// residual_energy +=
// a_polynomial[j] * corr_coeffs[i] * a_polynomial[j - i];
int32_t tmp2, tmp3;
int16_t sign_1;
int16_t sign_2;
int16_t sign_3;
__asm __volatile (
".set push \n\t"
".set noreorder \n\t"
"lh %[tmp2], 0(%[tmp_a_poly]) \n\t"
"mul %[tmp32], %[tmp2], %[tmp2] \n\t"
"addiu %[tmp_a_poly], %[tmp_a_poly], 2 \n\t"
"sra %[sign_2], %[sum64_hi], 31 \n\t"
"mult $ac0, %[tmp32], %[tmp_corr_c] \n\t"
"shilov $ac0, %[shift_internal] \n\t"
"mfhi %[tmp2], $ac0 \n\t"
"mflo %[tmp3], $ac0 \n\t"
"sra %[sign_1], %[tmp2], 31 \n\t"
"xor %[sign_3], %[sign_1], %[sign_2] \n\t"
".set pop \n\t"
: [tmp2] "=&r" (tmp2), [tmp3] "=&r" (tmp3), [tmp32] "=&r" (tmp32),
[tmp_a_poly] "+r" (tmp_a_poly), [sign_1] "=&r" (sign_1),
[sign_3] "=&r" (sign_3), [sign_2] "=&r" (sign_2),
[sum64_hi] "+r" (sum64_hi), [sum64_lo] "+r" (sum64_lo)
: [tmp_corr_c] "r" (tmp_corr_c), [shift_internal] "r" (shift_internal)
: "hi", "lo", "memory"
);
if (sign_3 != 0) {
__asm __volatile (
".set push \n\t"
".set noreorder \n\t"
"addsc %[sum64_lo], %[sum64_lo], %[tmp3] \n\t"
"addwc %[sum64_hi], %[sum64_hi], %[tmp2] \n\t"
".set pop \n\t"
: [sum64_hi] "+r" (sum64_hi), [sum64_lo] "+r" (sum64_lo)
: [tmp2] "r" (tmp2), [tmp3] "r" (tmp3)
: "hi", "lo", "memory"
);
} else {
if (((!(sign_1 || sign_2)) && (0x7FFFFFFF - sum64_hi < tmp2)) ||
((sign_1 && sign_2) && (sum64_hi + tmp2 > 0))) {
// Shift right for overflow.
__asm __volatile (
".set push \n\t"
".set noreorder \n\t"
"addiu %[shift_internal], %[shift_internal], 1 \n\t"
"prepend %[sum64_lo], %[sum64_hi], 1 \n\t"
"sra %[sum64_hi], %[sum64_hi], 1 \n\t"
"prepend %[tmp3], %[tmp2], 1 \n\t"
"sra %[tmp2], %[tmp2], 1 \n\t"
"addsc %[sum64_lo], %[sum64_lo], %[tmp3] \n\t"
"addwc %[sum64_hi], %[sum64_hi], %[tmp2] \n\t"
".set pop \n\t"
: [tmp2] "+r" (tmp2), [tmp3] "+r" (tmp3),
[shift_internal] "+r" (shift_internal),
[sum64_hi] "+r" (sum64_hi), [sum64_lo] "+r" (sum64_lo)
:
: "hi", "lo", "memory"
);
} else {
__asm __volatile (
".set push \n\t"
".set noreorder \n\t"
"addsc %[sum64_lo], %[sum64_lo], %[tmp3] \n\t"
"addwc %[sum64_hi], %[sum64_hi], %[tmp2] \n\t"
".set pop \n\t"
: [sum64_hi] "+r" (sum64_hi), [sum64_lo] "+r" (sum64_lo)
: [tmp2] "r" (tmp2), [tmp3] "r" (tmp3)
: "hi", "lo", "memory"
);
}
}
}
for (i = 1; i <= lpc_order; i++) {
tmp_corr_c = corr_coeffs[i];
int16_t* tmp_a_poly_j = &a_polynomial[i];
int16_t* tmp_a_poly_j_i = &a_polynomial[0];
for (j = i; j <= lpc_order; j++) {
// For the case of i = 1 .. lpc_order:
// residual_energy +=
// a_polynomial[j] * corr_coeffs[i] * a_polynomial[j - i] * 2;
int32_t tmp2, tmp3;
int16_t sign_1;
int16_t sign_2;
int16_t sign_3;
__asm __volatile (
".set push \n\t"
".set noreorder \n\t"
"lh %[tmp3], 0(%[tmp_a_poly_j]) \n\t"
"lh %[tmp2], 0(%[tmp_a_poly_j_i]) \n\t"
"addiu %[tmp_a_poly_j], %[tmp_a_poly_j], 2 \n\t"
"addiu %[tmp_a_poly_j_i], %[tmp_a_poly_j_i], 2 \n\t"
"mul %[tmp32], %[tmp3], %[tmp2] \n\t"
"sll %[tmp32], %[tmp32], 1 \n\t"
"mult $ac0, %[tmp32], %[tmp_corr_c] \n\t"
"shilov $ac0, %[shift_internal] \n\t"
"mfhi %[tmp2], $ac0 \n\t"
"mflo %[tmp3], $ac0 \n\t"
"sra %[sign_1], %[tmp2], 31 \n\t"
"sra %[sign_2], %[sum64_hi], 31 \n\t"
"xor %[sign_3], %[sign_1], %[sign_2] \n\t"
".set pop \n\t"
: [tmp2] "=&r" (tmp2), [tmp3] "=&r" (tmp3), [tmp32] "=&r" (tmp32),
[tmp_a_poly_j] "+r" (tmp_a_poly_j), [sign_1] "=&r" (sign_1),
[tmp_a_poly_j_i] "+r" (tmp_a_poly_j_i), [sign_2] "=&r" (sign_2),
[sign_3] "=&r" (sign_3), [sum64_hi] "+r" (sum64_hi),
[sum64_lo] "+r" (sum64_lo)
: [tmp_corr_c] "r" (tmp_corr_c), [shift_internal] "r" (shift_internal)
: "hi", "lo", "memory"
);
if (sign_3 != 0) {
__asm __volatile (
".set push \n\t"
".set noreorder \n\t"
"addsc %[sum64_lo], %[sum64_lo], %[tmp3] \n\t"
"addwc %[sum64_hi], %[sum64_hi], %[tmp2] \n\t"
".set pop \n\t"
: [tmp2] "+r" (tmp2), [tmp3] "+r" (tmp3), [sum64_hi] "+r" (sum64_hi),
[sum64_lo] "+r" (sum64_lo)
:
:"memory"
);
} else {
// Test overflow and sum the result.
if (((!(sign_1 || sign_2)) && (0x7FFFFFFF - sum64_hi < tmp2)) ||
((sign_1 && sign_2) && (sum64_hi + tmp2 > 0))) {
// Shift right for overflow.
__asm __volatile (
".set push \n\t"
".set noreorder \n\t"
"addiu %[shift_internal], %[shift_internal], 1 \n\t"
"prepend %[sum64_lo], %[sum64_hi], 1 \n\t"
"sra %[sum64_hi], %[sum64_hi], 1 \n\t"
"prepend %[tmp3], %[tmp2], 1 \n\t"
"sra %[tmp2], %[tmp2], 1 \n\t"
"addsc %[sum64_lo], %[sum64_lo], %[tmp3] \n\t"
"addwc %[sum64_hi], %[sum64_hi], %[tmp2] \n\t"
".set pop \n\t"
: [tmp2] "+r" (tmp2), [tmp3] "+r" (tmp3),
[shift_internal] "+r" (shift_internal),
[sum64_hi] "+r" (sum64_hi), [sum64_lo] "+r" (sum64_lo)
:
: "hi", "lo", "memory"
);
} else {
__asm __volatile (
".set push \n\t"
".set noreorder \n\t"
"addsc %[sum64_lo], %[sum64_lo], %[tmp3] \n\t"
"addwc %[sum64_hi], %[sum64_hi], %[tmp2] \n\t"
".set pop \n\t"
: [tmp2] "+r" (tmp2), [tmp3] "+r" (tmp3),
[sum64_hi] "+r" (sum64_hi), [sum64_lo] "+r" (sum64_lo)
:
: "hi", "lo", "memory"
);
}
}
}
}
word32_high = sum64_hi;
word32_low = sum64_lo;
// Calculate the value of shifting (shift_norm) for the 64-bit sum.
if (word32_high != 0) {
shift_norm = 32 - WebRtcSpl_NormW32(word32_high);
int tmp1;
__asm __volatile (
".set push \n\t"
".set noreorder \n\t"
"srl %[residual_energy], %[sum64_lo], %[shift_norm] \n\t"
"li %[tmp1], 32 \n\t"
"subu %[tmp1], %[tmp1], %[shift_norm] \n\t"
"sll %[tmp1], %[sum64_hi], %[tmp1] \n\t"
"or %[residual_energy], %[residual_energy], %[tmp1] \n\t"
".set pop \n\t"
: [residual_energy] "=&r" (residual_energy), [tmp1]"=&r"(tmp1),
[sum64_hi] "+r" (sum64_hi), [sum64_lo] "+r" (sum64_lo)
: [shift_norm] "r" (shift_norm)
: "memory"
);
} else {
if ((word32_low & 0x80000000) != 0) {
shift_norm = 1;
residual_energy = (uint32_t)word32_low >> 1;
} else {
shift_norm = WebRtcSpl_NormW32(word32_low);
residual_energy = word32_low << shift_norm;
shift_norm = -shift_norm;
}
}
// Q(q_val_polynomial * 2) * Q(q_val_corr) >> shift_internal >> shift_norm
// = Q(q_val_corr - shift_internal - shift_norm + q_val_polynomial * 2)
*q_val_residual_energy =
q_val_corr - shift_internal - shift_norm + q_val_polynomial * 2;
return residual_energy;
}