modules/audio_coding/codecs/isac/fix/source/lpc_masking_model_mips.c - src/webrtc - Git at Google

 /*
  *  Copyright (c) 2014 The WebRTC project authors. All Rights Reserved.
  *
  *  Use of this source code is governed by a BSD-style license
  *  that can be found in the LICENSE file in the root of the source
  *  tree. An additional intellectual property rights grant can be found
  *  in the file PATENTS.  All contributing project authors may
  *  be found in the AUTHORS file in the root of the source tree.
  */

 #include "webrtc/modules/audio_coding/codecs/isac/fix/source/lpc_masking_model.h"

 // MIPS DSPR2 optimization for function WebRtcIsacfix_CalculateResidualEnergy
 // Bit-exact with WebRtcIsacfix_CalculateResidualEnergyC from file
 // lpc_masking_model.c
 int32_t WebRtcIsacfix_CalculateResidualEnergyMIPS(int lpc_order,
                                                   int32_t q_val_corr,
                                                   int q_val_polynomial,
                                                   int16_t* a_polynomial,
                                                   int32_t* corr_coeffs,
                                                   int* q_val_residual_energy) {

   int i = 0, j = 0;
   int shift_internal = 0, shift_norm = 0;
   int32_t tmp32 = 0, word32_high = 0, word32_low = 0, residual_energy = 0;
   int32_t tmp_corr_c = corr_coeffs[0];
   int16_t* tmp_a_poly = &a_polynomial[0];
   int32_t sum64_hi = 0;
   int32_t sum64_lo = 0;

   for (j = 0; j <= lpc_order; j++) {
     // For the case of i == 0:
     //   residual_energy +=
     //     a_polynomial[j] * corr_coeffs[i] * a_polynomial[j - i];

     int32_t tmp2, tmp3;
     int16_t sign_1;
     int16_t sign_2;
     int16_t sign_3;

     __asm __volatile (
       ".set      push                                                \n\t"
       ".set      noreorder                                           \n\t"
       "lh        %[tmp2],         0(%[tmp_a_poly])                   \n\t"
       "mul       %[tmp32],        %[tmp2],            %[tmp2]        \n\t"
       "addiu     %[tmp_a_poly],   %[tmp_a_poly],      2              \n\t"
       "sra       %[sign_2],       %[sum64_hi],        31             \n\t"
       "mult      $ac0,            %[tmp32],           %[tmp_corr_c]  \n\t"
       "shilov    $ac0,            %[shift_internal]                  \n\t"
       "mfhi      %[tmp2],         $ac0                               \n\t"
       "mflo      %[tmp3],         $ac0                               \n\t"
       "sra       %[sign_1],       %[tmp2],            31             \n\t"
       "xor       %[sign_3],       %[sign_1],          %[sign_2]      \n\t"
       ".set      pop                                                 \n\t"
       : [tmp2] "=&r" (tmp2), [tmp3] "=&r" (tmp3), [tmp32] "=&r" (tmp32),
         [tmp_a_poly] "+r" (tmp_a_poly), [sign_1] "=&r" (sign_1),
         [sign_3] "=&r" (sign_3), [sign_2] "=&r" (sign_2),
         [sum64_hi] "+r" (sum64_hi), [sum64_lo] "+r" (sum64_lo)
       : [tmp_corr_c] "r" (tmp_corr_c), [shift_internal] "r" (shift_internal)
       : "hi", "lo", "memory"
     );

     if (sign_3 != 0) {
       __asm __volatile (
         ".set      push                                      \n\t"
         ".set      noreorder                                 \n\t"
         "addsc     %[sum64_lo],   %[sum64_lo],    %[tmp3]    \n\t"
         "addwc     %[sum64_hi],   %[sum64_hi],    %[tmp2]    \n\t"
         ".set      pop                                       \n\t"
         : [sum64_hi] "+r" (sum64_hi), [sum64_lo] "+r" (sum64_lo)
         : [tmp2] "r" (tmp2), [tmp3] "r" (tmp3)
         : "hi", "lo", "memory"
       );
     } else {
       if (((!(sign_1 || sign_2)) && (0x7FFFFFFF - sum64_hi < tmp2)) ||
           ((sign_1 && sign_2) && (sum64_hi + tmp2 > 0))) {
         // Shift right for overflow.
         __asm __volatile (
           ".set      push                                             \n\t"
           ".set      noreorder                                        \n\t"
           "addiu     %[shift_internal], %[shift_internal],  1         \n\t"
           "prepend   %[sum64_lo],       %[sum64_hi],        1         \n\t"
           "sra       %[sum64_hi],       %[sum64_hi],        1         \n\t"
           "prepend   %[tmp3],           %[tmp2],            1         \n\t"
           "sra       %[tmp2],           %[tmp2],            1         \n\t"
           "addsc     %[sum64_lo],       %[sum64_lo],        %[tmp3]   \n\t"
           "addwc     %[sum64_hi],       %[sum64_hi],        %[tmp2]   \n\t"
           ".set      pop                                              \n\t"
           : [tmp2] "+r" (tmp2), [tmp3] "+r" (tmp3),
             [shift_internal] "+r" (shift_internal),
             [sum64_hi] "+r" (sum64_hi), [sum64_lo] "+r" (sum64_lo)
           :
           : "hi", "lo", "memory"
         );
       } else {
         __asm __volatile (
           ".set      push                                      \n\t"
           ".set      noreorder                                 \n\t"
           "addsc     %[sum64_lo],   %[sum64_lo],    %[tmp3]    \n\t"
           "addwc     %[sum64_hi],   %[sum64_hi],    %[tmp2]    \n\t"
           ".set      pop                                       \n\t"
           : [sum64_hi] "+r" (sum64_hi), [sum64_lo] "+r" (sum64_lo)
           : [tmp2] "r" (tmp2), [tmp3] "r" (tmp3)
           : "hi", "lo", "memory"
         );
       }
     }
   }

   for (i = 1; i <= lpc_order; i++) {
     tmp_corr_c = corr_coeffs[i];
     int16_t* tmp_a_poly_j = &a_polynomial[i];
     int16_t* tmp_a_poly_j_i = &a_polynomial[0];
     for (j = i; j <= lpc_order; j++) {
       // For the case of i = 1 .. lpc_order:
       //   residual_energy +=
       //     a_polynomial[j] * corr_coeffs[i] * a_polynomial[j - i] * 2;

       int32_t tmp2, tmp3;
       int16_t sign_1;
       int16_t sign_2;
       int16_t sign_3;

       __asm __volatile (
         ".set      push                                                   \n\t"
         ".set      noreorder                                              \n\t"
         "lh        %[tmp3],           0(%[tmp_a_poly_j])                  \n\t"
         "lh        %[tmp2],           0(%[tmp_a_poly_j_i])                \n\t"
         "addiu     %[tmp_a_poly_j],   %[tmp_a_poly_j],    2               \n\t"
         "addiu     %[tmp_a_poly_j_i], %[tmp_a_poly_j_i],  2               \n\t"
         "mul       %[tmp32],          %[tmp3],            %[tmp2]         \n\t"
         "sll       %[tmp32],          %[tmp32],           1               \n\t"
         "mult      $ac0,              %[tmp32],           %[tmp_corr_c]   \n\t"
         "shilov    $ac0,              %[shift_internal]                   \n\t"
         "mfhi      %[tmp2],           $ac0                                \n\t"
         "mflo      %[tmp3],           $ac0                                \n\t"
         "sra       %[sign_1],         %[tmp2],            31              \n\t"
         "sra       %[sign_2],         %[sum64_hi],        31              \n\t"
         "xor       %[sign_3],         %[sign_1],          %[sign_2]       \n\t"
         ".set      pop                                                    \n\t"
         : [tmp2] "=&r" (tmp2), [tmp3] "=&r" (tmp3), [tmp32] "=&r" (tmp32),
           [tmp_a_poly_j] "+r" (tmp_a_poly_j), [sign_1] "=&r" (sign_1),
           [tmp_a_poly_j_i] "+r" (tmp_a_poly_j_i), [sign_2] "=&r" (sign_2),
           [sign_3] "=&r" (sign_3), [sum64_hi] "+r" (sum64_hi),
           [sum64_lo] "+r" (sum64_lo)
         : [tmp_corr_c] "r" (tmp_corr_c), [shift_internal] "r" (shift_internal)
         : "hi", "lo", "memory"
       );
       if (sign_3 != 0) {
         __asm __volatile (
           ".set      push                                     \n\t"
           ".set      noreorder                                \n\t"
           "addsc     %[sum64_lo],   %[sum64_lo],   %[tmp3]    \n\t"
           "addwc     %[sum64_hi],   %[sum64_hi],   %[tmp2]    \n\t"
           ".set      pop                                      \n\t"
           : [tmp2] "+r" (tmp2), [tmp3] "+r" (tmp3), [sum64_hi] "+r" (sum64_hi),
             [sum64_lo] "+r" (sum64_lo)
           :
           :"memory"
         );
       } else {
         // Test overflow and sum the result.
         if (((!(sign_1 || sign_2)) && (0x7FFFFFFF - sum64_hi < tmp2)) ||
             ((sign_1 && sign_2) && (sum64_hi + tmp2 > 0))) {
           // Shift right for overflow.
           __asm __volatile (
             ".set      push                                              \n\t"
             ".set      noreorder                                         \n\t"
             "addiu     %[shift_internal],  %[shift_internal],  1         \n\t"
             "prepend   %[sum64_lo],        %[sum64_hi],        1         \n\t"
             "sra       %[sum64_hi],        %[sum64_hi],        1         \n\t"
             "prepend   %[tmp3],            %[tmp2],            1         \n\t"
             "sra       %[tmp2],            %[tmp2],            1         \n\t"
             "addsc     %[sum64_lo],        %[sum64_lo],        %[tmp3]   \n\t"
             "addwc     %[sum64_hi],        %[sum64_hi],        %[tmp2]   \n\t"
             ".set      pop                                               \n\t"
             : [tmp2] "+r" (tmp2), [tmp3] "+r" (tmp3),
               [shift_internal] "+r" (shift_internal),
               [sum64_hi] "+r" (sum64_hi), [sum64_lo] "+r" (sum64_lo)
             :
             : "hi", "lo", "memory"
           );
         } else {
           __asm __volatile (
             ".set      push                                      \n\t"
             ".set      noreorder                                 \n\t"
             "addsc     %[sum64_lo],    %[sum64_lo],   %[tmp3]    \n\t"
             "addwc     %[sum64_hi],    %[sum64_hi],   %[tmp2]    \n\t"
             ".set      pop                                       \n\t"
             : [tmp2] "+r" (tmp2), [tmp3] "+r" (tmp3),
               [sum64_hi] "+r" (sum64_hi), [sum64_lo] "+r" (sum64_lo)
             :
             : "hi", "lo", "memory"
           );
         }
       }
     }
   }
   word32_high = sum64_hi;
   word32_low = sum64_lo;

   // Calculate the value of shifting (shift_norm) for the 64-bit sum.
   if (word32_high != 0) {
     shift_norm = 32 - WebRtcSpl_NormW32(word32_high);
     int tmp1;
     __asm __volatile (
       ".set    push                                                     \n\t"
       ".set    noreorder                                                \n\t"
       "srl     %[residual_energy],  %[sum64_lo],         %[shift_norm]  \n\t"
       "li      %[tmp1],             32                                  \n\t"
       "subu    %[tmp1],             %[tmp1],             %[shift_norm]  \n\t"
       "sll     %[tmp1],             %[sum64_hi],         %[tmp1]        \n\t"
       "or      %[residual_energy],  %[residual_energy],  %[tmp1]        \n\t"
       ".set    pop                                                      \n\t"
       : [residual_energy] "=&r" (residual_energy), [tmp1]"=&r"(tmp1),
         [sum64_hi] "+r" (sum64_hi), [sum64_lo] "+r" (sum64_lo)
       : [shift_norm] "r" (shift_norm)
       : "memory"
     );
   } else {
     if ((word32_low & 0x80000000) != 0) {
       shift_norm = 1;
       residual_energy = (uint32_t)word32_low >> 1;
     } else {
       shift_norm = WebRtcSpl_NormW32(word32_low);
       residual_energy = word32_low << shift_norm;
       shift_norm = -shift_norm;
     }
   }

   // Q(q_val_polynomial * 2) * Q(q_val_corr) >> shift_internal >> shift_norm
   //   = Q(q_val_corr - shift_internal - shift_norm + q_val_polynomial * 2)
   *q_val_residual_energy =
       q_val_corr - shift_internal - shift_norm + q_val_polynomial * 2;

   return residual_energy;
 }
	/*
	* Copyright (c) 2014 The WebRTC project authors. All Rights Reserved.
	*
	* Use of this source code is governed by a BSD-style license
	* that can be found in the LICENSE file in the root of the source
	* tree. An additional intellectual property rights grant can be found
	* in the file PATENTS. All contributing project authors may
	* be found in the AUTHORS file in the root of the source tree.
	*/

	#include "webrtc/modules/audio_coding/codecs/isac/fix/source/lpc_masking_model.h"

	// MIPS DSPR2 optimization for function WebRtcIsacfix_CalculateResidualEnergy
	// Bit-exact with WebRtcIsacfix_CalculateResidualEnergyC from file
	// lpc_masking_model.c
	int32_t WebRtcIsacfix_CalculateResidualEnergyMIPS(int lpc_order,
	int32_t q_val_corr,
	int q_val_polynomial,
	int16_t* a_polynomial,
	int32_t* corr_coeffs,
	int* q_val_residual_energy) {

	int i = 0, j = 0;
	int shift_internal = 0, shift_norm = 0;
	int32_t tmp32 = 0, word32_high = 0, word32_low = 0, residual_energy = 0;
	int32_t tmp_corr_c = corr_coeffs[0];
	int16_t* tmp_a_poly = &a_polynomial[0];
	int32_t sum64_hi = 0;
	int32_t sum64_lo = 0;

	for (j = 0; j <= lpc_order; j++) {
	// For the case of i == 0:
	// residual_energy +=
	// a_polynomial[j] * corr_coeffs[i] * a_polynomial[j - i];

	int32_t tmp2, tmp3;
	int16_t sign_1;
	int16_t sign_2;
	int16_t sign_3;

	__asm __volatile (
	".set push \n\t"
	".set noreorder \n\t"
	"lh %[tmp2], 0(%[tmp_a_poly]) \n\t"
	"mul %[tmp32], %[tmp2], %[tmp2] \n\t"
	"addiu %[tmp_a_poly], %[tmp_a_poly], 2 \n\t"
	"sra %[sign_2], %[sum64_hi], 31 \n\t"
	"mult $ac0, %[tmp32], %[tmp_corr_c] \n\t"
	"shilov $ac0, %[shift_internal] \n\t"
	"mfhi %[tmp2], $ac0 \n\t"
	"mflo %[tmp3], $ac0 \n\t"
	"sra %[sign_1], %[tmp2], 31 \n\t"
	"xor %[sign_3], %[sign_1], %[sign_2] \n\t"
	".set pop \n\t"
	: [tmp2] "=&r" (tmp2), [tmp3] "=&r" (tmp3), [tmp32] "=&r" (tmp32),
	[tmp_a_poly] "+r" (tmp_a_poly), [sign_1] "=&r" (sign_1),
	[sign_3] "=&r" (sign_3), [sign_2] "=&r" (sign_2),
	[sum64_hi] "+r" (sum64_hi), [sum64_lo] "+r" (sum64_lo)
	: [tmp_corr_c] "r" (tmp_corr_c), [shift_internal] "r" (shift_internal)
	: "hi", "lo", "memory"
	);

	if (sign_3 != 0) {
	__asm __volatile (
	".set push \n\t"
	".set noreorder \n\t"
	"addsc %[sum64_lo], %[sum64_lo], %[tmp3] \n\t"
	"addwc %[sum64_hi], %[sum64_hi], %[tmp2] \n\t"
	".set pop \n\t"
	: [sum64_hi] "+r" (sum64_hi), [sum64_lo] "+r" (sum64_lo)
	: [tmp2] "r" (tmp2), [tmp3] "r" (tmp3)
	: "hi", "lo", "memory"
	);
	} else {
	if (((!(sign_1 \|\| sign_2)) && (0x7FFFFFFF - sum64_hi < tmp2)) \|\|
	((sign_1 && sign_2) && (sum64_hi + tmp2 > 0))) {
	// Shift right for overflow.
	__asm __volatile (
	".set push \n\t"
	".set noreorder \n\t"
	"addiu %[shift_internal], %[shift_internal], 1 \n\t"
	"prepend %[sum64_lo], %[sum64_hi], 1 \n\t"
	"sra %[sum64_hi], %[sum64_hi], 1 \n\t"
	"prepend %[tmp3], %[tmp2], 1 \n\t"
	"sra %[tmp2], %[tmp2], 1 \n\t"
	"addsc %[sum64_lo], %[sum64_lo], %[tmp3] \n\t"
	"addwc %[sum64_hi], %[sum64_hi], %[tmp2] \n\t"
	".set pop \n\t"
	: [tmp2] "+r" (tmp2), [tmp3] "+r" (tmp3),
	[shift_internal] "+r" (shift_internal),
	[sum64_hi] "+r" (sum64_hi), [sum64_lo] "+r" (sum64_lo)
	:
	: "hi", "lo", "memory"
	);
	} else {
	__asm __volatile (
	".set push \n\t"
	".set noreorder \n\t"
	"addsc %[sum64_lo], %[sum64_lo], %[tmp3] \n\t"
	"addwc %[sum64_hi], %[sum64_hi], %[tmp2] \n\t"
	".set pop \n\t"
	: [sum64_hi] "+r" (sum64_hi), [sum64_lo] "+r" (sum64_lo)
	: [tmp2] "r" (tmp2), [tmp3] "r" (tmp3)
	: "hi", "lo", "memory"
	);
	}
	}
	}

	for (i = 1; i <= lpc_order; i++) {
	tmp_corr_c = corr_coeffs[i];
	int16_t* tmp_a_poly_j = &a_polynomial[i];
	int16_t* tmp_a_poly_j_i = &a_polynomial[0];
	for (j = i; j <= lpc_order; j++) {
	// For the case of i = 1 .. lpc_order:
	// residual_energy +=
	// a_polynomial[j] * corr_coeffs[i] * a_polynomial[j - i] * 2;

	int32_t tmp2, tmp3;
	int16_t sign_1;
	int16_t sign_2;
	int16_t sign_3;

	__asm __volatile (
	".set push \n\t"
	".set noreorder \n\t"
	"lh %[tmp3], 0(%[tmp_a_poly_j]) \n\t"
	"lh %[tmp2], 0(%[tmp_a_poly_j_i]) \n\t"
	"addiu %[tmp_a_poly_j], %[tmp_a_poly_j], 2 \n\t"
	"addiu %[tmp_a_poly_j_i], %[tmp_a_poly_j_i], 2 \n\t"
	"mul %[tmp32], %[tmp3], %[tmp2] \n\t"
	"sll %[tmp32], %[tmp32], 1 \n\t"
	"mult $ac0, %[tmp32], %[tmp_corr_c] \n\t"
	"shilov $ac0, %[shift_internal] \n\t"
	"mfhi %[tmp2], $ac0 \n\t"
	"mflo %[tmp3], $ac0 \n\t"
	"sra %[sign_1], %[tmp2], 31 \n\t"
	"sra %[sign_2], %[sum64_hi], 31 \n\t"
	"xor %[sign_3], %[sign_1], %[sign_2] \n\t"
	".set pop \n\t"
	: [tmp2] "=&r" (tmp2), [tmp3] "=&r" (tmp3), [tmp32] "=&r" (tmp32),
	[tmp_a_poly_j] "+r" (tmp_a_poly_j), [sign_1] "=&r" (sign_1),
	[tmp_a_poly_j_i] "+r" (tmp_a_poly_j_i), [sign_2] "=&r" (sign_2),
	[sign_3] "=&r" (sign_3), [sum64_hi] "+r" (sum64_hi),
	[sum64_lo] "+r" (sum64_lo)
	: [tmp_corr_c] "r" (tmp_corr_c), [shift_internal] "r" (shift_internal)
	: "hi", "lo", "memory"
	);
	if (sign_3 != 0) {
	__asm __volatile (
	".set push \n\t"
	".set noreorder \n\t"
	"addsc %[sum64_lo], %[sum64_lo], %[tmp3] \n\t"
	"addwc %[sum64_hi], %[sum64_hi], %[tmp2] \n\t"
	".set pop \n\t"
	: [tmp2] "+r" (tmp2), [tmp3] "+r" (tmp3), [sum64_hi] "+r" (sum64_hi),
	[sum64_lo] "+r" (sum64_lo)
	:
	:"memory"
	);
	} else {
	// Test overflow and sum the result.
	if (((!(sign_1 \|\| sign_2)) && (0x7FFFFFFF - sum64_hi < tmp2)) \|\|
	((sign_1 && sign_2) && (sum64_hi + tmp2 > 0))) {
	// Shift right for overflow.
	__asm __volatile (
	".set push \n\t"
	".set noreorder \n\t"
	"addiu %[shift_internal], %[shift_internal], 1 \n\t"
	"prepend %[sum64_lo], %[sum64_hi], 1 \n\t"
	"sra %[sum64_hi], %[sum64_hi], 1 \n\t"
	"prepend %[tmp3], %[tmp2], 1 \n\t"
	"sra %[tmp2], %[tmp2], 1 \n\t"
	"addsc %[sum64_lo], %[sum64_lo], %[tmp3] \n\t"
	"addwc %[sum64_hi], %[sum64_hi], %[tmp2] \n\t"
	".set pop \n\t"
	: [tmp2] "+r" (tmp2), [tmp3] "+r" (tmp3),
	[shift_internal] "+r" (shift_internal),
	[sum64_hi] "+r" (sum64_hi), [sum64_lo] "+r" (sum64_lo)
	:
	: "hi", "lo", "memory"
	);
	} else {
	__asm __volatile (
	".set push \n\t"
	".set noreorder \n\t"
	"addsc %[sum64_lo], %[sum64_lo], %[tmp3] \n\t"
	"addwc %[sum64_hi], %[sum64_hi], %[tmp2] \n\t"
	".set pop \n\t"
	: [tmp2] "+r" (tmp2), [tmp3] "+r" (tmp3),
	[sum64_hi] "+r" (sum64_hi), [sum64_lo] "+r" (sum64_lo)
	:
	: "hi", "lo", "memory"
	);
	}
	}
	}
	}
	word32_high = sum64_hi;
	word32_low = sum64_lo;

	// Calculate the value of shifting (shift_norm) for the 64-bit sum.
	if (word32_high != 0) {
	shift_norm = 32 - WebRtcSpl_NormW32(word32_high);
	int tmp1;
	__asm __volatile (
	".set push \n\t"
	".set noreorder \n\t"
	"srl %[residual_energy], %[sum64_lo], %[shift_norm] \n\t"
	"li %[tmp1], 32 \n\t"
	"subu %[tmp1], %[tmp1], %[shift_norm] \n\t"
	"sll %[tmp1], %[sum64_hi], %[tmp1] \n\t"
	"or %[residual_energy], %[residual_energy], %[tmp1] \n\t"
	".set pop \n\t"
	: [residual_energy] "=&r" (residual_energy), [tmp1]"=&r"(tmp1),
	[sum64_hi] "+r" (sum64_hi), [sum64_lo] "+r" (sum64_lo)
	: [shift_norm] "r" (shift_norm)
	: "memory"
	);
	} else {
	if ((word32_low & 0x80000000) != 0) {
	shift_norm = 1;
	residual_energy = (uint32_t)word32_low >> 1;
	} else {
	shift_norm = WebRtcSpl_NormW32(word32_low);
	residual_energy = word32_low << shift_norm;
	shift_norm = -shift_norm;
	}
	}

	// Q(q_val_polynomial * 2) * Q(q_val_corr) >> shift_internal >> shift_norm
	// = Q(q_val_corr - shift_internal - shift_norm + q_val_polynomial * 2)
	*q_val_residual_energy =
	q_val_corr - shift_internal - shift_norm + q_val_polynomial * 2;

	return residual_energy;
	}