andrew@webrtc.org | eed919d | 2013-05-30 16:38:36 | [diff] [blame] | 1 | /* |
| 2 | * Copyright (c) 2013 The WebRTC project authors. All Rights Reserved. |
| 3 | * |
| 4 | * Use of this source code is governed by a BSD-style license |
| 5 | * that can be found in the LICENSE file in the root of the source |
| 6 | * tree. An additional intellectual property rights grant can be found |
| 7 | * in the file PATENTS. All contributing project authors may |
| 8 | * be found in the AUTHORS file in the root of the source tree. |
| 9 | */ |
| 10 | |
| 11 | #include "webrtc/common_audio/signal_processing/include/signal_processing_library.h" |
| 12 | |
| 13 | // Version of WebRtcSpl_DownsampleFast() for MIPS platforms. |
| 14 | int WebRtcSpl_DownsampleFast_mips(const int16_t* data_in, |
Peter Kasting | dce40cf | 2015-08-24 21:52:23 | [diff] [blame] | 15 | size_t data_in_length, |
andrew@webrtc.org | eed919d | 2013-05-30 16:38:36 | [diff] [blame] | 16 | int16_t* data_out, |
Peter Kasting | dce40cf | 2015-08-24 21:52:23 | [diff] [blame] | 17 | size_t data_out_length, |
andrew@webrtc.org | eed919d | 2013-05-30 16:38:36 | [diff] [blame] | 18 | const int16_t* __restrict coefficients, |
Peter Kasting | dce40cf | 2015-08-24 21:52:23 | [diff] [blame] | 19 | size_t coefficients_length, |
andrew@webrtc.org | eed919d | 2013-05-30 16:38:36 | [diff] [blame] | 20 | int factor, |
Peter Kasting | dce40cf | 2015-08-24 21:52:23 | [diff] [blame] | 21 | size_t delay) { |
andrew@webrtc.org | eed919d | 2013-05-30 16:38:36 | [diff] [blame] | 22 | int i; |
| 23 | int j; |
| 24 | int k; |
| 25 | int32_t out_s32 = 0; |
Peter Kasting | dce40cf | 2015-08-24 21:52:23 | [diff] [blame] | 26 | size_t endpos = delay + factor * (data_out_length - 1) + 1; |
andrew@webrtc.org | eed919d | 2013-05-30 16:38:36 | [diff] [blame] | 27 | |
| 28 | int32_t tmp1, tmp2, tmp3, tmp4, factor_2; |
| 29 | int16_t* p_coefficients; |
| 30 | int16_t* p_data_in; |
| 31 | int16_t* p_data_in_0 = (int16_t*)&data_in[delay]; |
| 32 | int16_t* p_coefficients_0 = (int16_t*)&coefficients[0]; |
| 33 | #if !defined(MIPS_DSP_R1_LE) |
| 34 | int32_t max_16 = 0x7FFF; |
| 35 | int32_t min_16 = 0xFFFF8000; |
| 36 | #endif // #if !defined(MIPS_DSP_R1_LE) |
| 37 | |
| 38 | // Return error if any of the running conditions doesn't meet. |
Peter Kasting | dce40cf | 2015-08-24 21:52:23 | [diff] [blame] | 39 | if (data_out_length == 0 || coefficients_length == 0 |
andrew@webrtc.org | eed919d | 2013-05-30 16:38:36 | [diff] [blame] | 40 | || data_in_length < endpos) { |
| 41 | return -1; |
| 42 | } |
| 43 | #if defined(MIPS_DSP_R2_LE) |
| 44 | __asm __volatile ( |
| 45 | ".set push \n\t" |
| 46 | ".set noreorder \n\t" |
| 47 | "subu %[i], %[endpos], %[delay] \n\t" |
| 48 | "sll %[factor_2], %[factor], 1 \n\t" |
| 49 | "1: \n\t" |
| 50 | "move %[p_data_in], %[p_data_in_0] \n\t" |
| 51 | "mult $zero, $zero \n\t" |
| 52 | "move %[p_coefs], %[p_coefs_0] \n\t" |
| 53 | "sra %[j], %[coef_length], 2 \n\t" |
| 54 | "beq %[j], $zero, 3f \n\t" |
| 55 | " andi %[k], %[coef_length], 3 \n\t" |
| 56 | "2: \n\t" |
| 57 | "lwl %[tmp1], 1(%[p_data_in]) \n\t" |
| 58 | "lwl %[tmp2], 3(%[p_coefs]) \n\t" |
| 59 | "lwl %[tmp3], -3(%[p_data_in]) \n\t" |
| 60 | "lwl %[tmp4], 7(%[p_coefs]) \n\t" |
| 61 | "lwr %[tmp1], -2(%[p_data_in]) \n\t" |
| 62 | "lwr %[tmp2], 0(%[p_coefs]) \n\t" |
| 63 | "lwr %[tmp3], -6(%[p_data_in]) \n\t" |
| 64 | "lwr %[tmp4], 4(%[p_coefs]) \n\t" |
| 65 | "packrl.ph %[tmp1], %[tmp1], %[tmp1] \n\t" |
| 66 | "packrl.ph %[tmp3], %[tmp3], %[tmp3] \n\t" |
| 67 | "dpa.w.ph $ac0, %[tmp1], %[tmp2] \n\t" |
| 68 | "dpa.w.ph $ac0, %[tmp3], %[tmp4] \n\t" |
| 69 | "addiu %[j], %[j], -1 \n\t" |
| 70 | "addiu %[p_data_in], %[p_data_in], -8 \n\t" |
| 71 | "bgtz %[j], 2b \n\t" |
| 72 | " addiu %[p_coefs], %[p_coefs], 8 \n\t" |
| 73 | "3: \n\t" |
| 74 | "beq %[k], $zero, 5f \n\t" |
| 75 | " nop \n\t" |
| 76 | "4: \n\t" |
| 77 | "lhu %[tmp1], 0(%[p_data_in]) \n\t" |
| 78 | "lhu %[tmp2], 0(%[p_coefs]) \n\t" |
| 79 | "addiu %[p_data_in], %[p_data_in], -2 \n\t" |
| 80 | "addiu %[k], %[k], -1 \n\t" |
| 81 | "dpa.w.ph $ac0, %[tmp1], %[tmp2] \n\t" |
| 82 | "bgtz %[k], 4b \n\t" |
| 83 | " addiu %[p_coefs], %[p_coefs], 2 \n\t" |
| 84 | "5: \n\t" |
| 85 | "extr_r.w %[out_s32], $ac0, 12 \n\t" |
| 86 | "addu %[p_data_in_0], %[p_data_in_0], %[factor_2] \n\t" |
| 87 | "subu %[i], %[i], %[factor] \n\t" |
| 88 | "shll_s.w %[out_s32], %[out_s32], 16 \n\t" |
| 89 | "sra %[out_s32], %[out_s32], 16 \n\t" |
| 90 | "sh %[out_s32], 0(%[data_out]) \n\t" |
| 91 | "bgtz %[i], 1b \n\t" |
| 92 | " addiu %[data_out], %[data_out], 2 \n\t" |
| 93 | ".set pop \n\t" |
| 94 | : [tmp1] "=&r" (tmp1), [tmp2] "=&r" (tmp2), [tmp3] "=&r" (tmp3), |
| 95 | [tmp4] "=&r" (tmp4), [p_data_in] "=&r" (p_data_in), |
| 96 | [p_data_in_0] "+r" (p_data_in_0), [p_coefs] "=&r" (p_coefficients), |
| 97 | [j] "=&r" (j), [out_s32] "=&r" (out_s32), [factor_2] "=&r" (factor_2), |
| 98 | [i] "=&r" (i), [k] "=&r" (k) |
| 99 | : [coef_length] "r" (coefficients_length), [data_out] "r" (data_out), |
| 100 | [p_coefs_0] "r" (p_coefficients_0), [endpos] "r" (endpos), |
| 101 | [delay] "r" (delay), [factor] "r" (factor) |
| 102 | : "memory", "hi", "lo" |
| 103 | ); |
| 104 | #else // #if defined(MIPS_DSP_R2_LE) |
| 105 | __asm __volatile ( |
| 106 | ".set push \n\t" |
| 107 | ".set noreorder \n\t" |
| 108 | "sll %[factor_2], %[factor], 1 \n\t" |
| 109 | "subu %[i], %[endpos], %[delay] \n\t" |
| 110 | "1: \n\t" |
| 111 | "move %[p_data_in], %[p_data_in_0] \n\t" |
| 112 | "addiu %[out_s32], $zero, 2048 \n\t" |
| 113 | "move %[p_coefs], %[p_coefs_0] \n\t" |
| 114 | "sra %[j], %[coef_length], 1 \n\t" |
| 115 | "beq %[j], $zero, 3f \n\t" |
| 116 | " andi %[k], %[coef_length], 1 \n\t" |
| 117 | "2: \n\t" |
| 118 | "lh %[tmp1], 0(%[p_data_in]) \n\t" |
| 119 | "lh %[tmp2], 0(%[p_coefs]) \n\t" |
| 120 | "lh %[tmp3], -2(%[p_data_in]) \n\t" |
| 121 | "lh %[tmp4], 2(%[p_coefs]) \n\t" |
| 122 | "mul %[tmp1], %[tmp1], %[tmp2] \n\t" |
| 123 | "addiu %[p_coefs], %[p_coefs], 4 \n\t" |
| 124 | "mul %[tmp3], %[tmp3], %[tmp4] \n\t" |
| 125 | "addiu %[j], %[j], -1 \n\t" |
| 126 | "addiu %[p_data_in], %[p_data_in], -4 \n\t" |
| 127 | "addu %[tmp1], %[tmp1], %[tmp3] \n\t" |
| 128 | "bgtz %[j], 2b \n\t" |
| 129 | " addu %[out_s32], %[out_s32], %[tmp1] \n\t" |
| 130 | "3: \n\t" |
| 131 | "beq %[k], $zero, 4f \n\t" |
| 132 | " nop \n\t" |
| 133 | "lh %[tmp1], 0(%[p_data_in]) \n\t" |
| 134 | "lh %[tmp2], 0(%[p_coefs]) \n\t" |
| 135 | "mul %[tmp1], %[tmp1], %[tmp2] \n\t" |
| 136 | "addu %[out_s32], %[out_s32], %[tmp1] \n\t" |
| 137 | "4: \n\t" |
| 138 | "sra %[out_s32], %[out_s32], 12 \n\t" |
| 139 | "addu %[p_data_in_0], %[p_data_in_0], %[factor_2] \n\t" |
| 140 | #if defined(MIPS_DSP_R1_LE) |
| 141 | "shll_s.w %[out_s32], %[out_s32], 16 \n\t" |
| 142 | "sra %[out_s32], %[out_s32], 16 \n\t" |
| 143 | #else // #if defined(MIPS_DSP_R1_LE) |
| 144 | "slt %[tmp1], %[max_16], %[out_s32] \n\t" |
| 145 | "movn %[out_s32], %[max_16], %[tmp1] \n\t" |
| 146 | "slt %[tmp1], %[out_s32], %[min_16] \n\t" |
| 147 | "movn %[out_s32], %[min_16], %[tmp1] \n\t" |
| 148 | #endif // #if defined(MIPS_DSP_R1_LE) |
| 149 | "subu %[i], %[i], %[factor] \n\t" |
| 150 | "sh %[out_s32], 0(%[data_out]) \n\t" |
| 151 | "bgtz %[i], 1b \n\t" |
| 152 | " addiu %[data_out], %[data_out], 2 \n\t" |
| 153 | ".set pop \n\t" |
| 154 | : [tmp1] "=&r" (tmp1), [tmp2] "=&r" (tmp2), [tmp3] "=&r" (tmp3), |
| 155 | [tmp4] "=&r" (tmp4), [p_data_in] "=&r" (p_data_in), [k] "=&r" (k), |
| 156 | [p_data_in_0] "+r" (p_data_in_0), [p_coefs] "=&r" (p_coefficients), |
| 157 | [j] "=&r" (j), [out_s32] "=&r" (out_s32), [factor_2] "=&r" (factor_2), |
| 158 | [i] "=&r" (i) |
| 159 | : [coef_length] "r" (coefficients_length), [data_out] "r" (data_out), |
| 160 | [p_coefs_0] "r" (p_coefficients_0), [endpos] "r" (endpos), |
| 161 | #if !defined(MIPS_DSP_R1_LE) |
| 162 | [max_16] "r" (max_16), [min_16] "r" (min_16), |
| 163 | #endif // #if !defined(MIPS_DSP_R1_LE) |
| 164 | [delay] "r" (delay), [factor] "r" (factor) |
| 165 | : "memory", "hi", "lo" |
| 166 | ); |
| 167 | #endif // #if defined(MIPS_DSP_R2_LE) |
| 168 | return 0; |
| 169 | } |