|  | @ | 
|  | @ Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. | 
|  | @ | 
|  | @ Use of this source code is governed by a BSD-style license | 
|  | @ that can be found in the LICENSE file in the root of the source | 
|  | @ tree. An additional intellectual property rights grant can be found | 
|  | @ in the file PATENTS.  All contributing project authors may | 
|  | @ be found in the AUTHORS file in the root of the source tree. | 
|  | @ | 
|  |  | 
|  | @ vector_scaling_operations_neon.s | 
|  | @ This file contains the function WebRtcSpl_ScaleAndAddVectorsWithRoundNeon(), | 
|  | @ optimized for ARM Neon platform. Output is bit-exact with the reference | 
|  | @ C code in vector_scaling_operations.c. | 
|  |  | 
|  | #include "webrtc/system_wrappers/interface/asm_defines.h" | 
|  |  | 
|  | GLOBAL_FUNCTION WebRtcSpl_ScaleAndAddVectorsWithRoundNeon | 
|  | .align  2 | 
|  | DEFINE_FUNCTION WebRtcSpl_ScaleAndAddVectorsWithRoundNeon | 
|  | push {r4-r9} | 
|  |  | 
|  | ldr r4, [sp, #32]           @ length | 
|  | ldr r5, [sp, #28]           @ out_vector | 
|  | ldrsh r6, [sp, #24]         @ right_shifts | 
|  |  | 
|  | cmp r4, #0 | 
|  | ble END                     @ Return if length <= 0. | 
|  |  | 
|  | cmp r4, #8 | 
|  | blt SET_ROUND_VALUE | 
|  |  | 
|  | vdup.16 d26, r1             @ in_vector1_scale | 
|  | vdup.16 d27, r3             @ in_vector2_scale | 
|  |  | 
|  | @ Neon instructions can only right shift by an immediate value. To shift right | 
|  | @ by a register value, we have to do a left shift left by the negative value. | 
|  | rsb r7, r6, #0 | 
|  | vdup.16 q12, r7             @ -right_shifts | 
|  |  | 
|  | bic r7, r4, #7              @ Counter for LOOP_UNROLLED_BY_8: length / 8 * 8. | 
|  |  | 
|  | LOOP_UNROLLED_BY_8: | 
|  | vld1.16 {d28, d29}, [r0]!   @ in_vector1[] | 
|  | vld1.16 {d30, d31}, [r2]!   @ in_vector2[] | 
|  | vmull.s16 q0, d28, d26 | 
|  | vmull.s16 q1, d29, d26 | 
|  | vmull.s16 q2, d30, d27 | 
|  | vmull.s16 q3, d31, d27 | 
|  | vadd.s32 q0, q2 | 
|  | vadd.s32 q1, q3 | 
|  | vrshl.s32 q0, q12           @ Round shift right by right_shifts. | 
|  | vrshl.s32 q1, q12 | 
|  | vmovn.i32 d0, q0            @ Cast to 16 bit values. | 
|  | vmovn.i32 d1, q1 | 
|  | subs r7, #8 | 
|  | vst1.16 {d0, d1}, [r5]! | 
|  | bgt LOOP_UNROLLED_BY_8 | 
|  |  | 
|  | ands r4, #0xFF              @ Counter for LOOP_NO_UNROLLING: length % 8. | 
|  | beq END | 
|  |  | 
|  | SET_ROUND_VALUE: | 
|  | mov r9, #1 | 
|  | lsl r9, r6 | 
|  | lsr r9, #1 | 
|  |  | 
|  | LOOP_NO_UNROLLING: | 
|  | ldrh  r7, [r0], #2 | 
|  | ldrh  r8, [r2], #2 | 
|  | smulbb r7, r7, r1 | 
|  | smulbb r8, r8, r3 | 
|  | subs r4, #1 | 
|  | add r7, r9 | 
|  | add r7, r8 | 
|  | asr r7, r6 | 
|  | strh r7, [r5], #2 | 
|  | bne LOOP_NO_UNROLLING | 
|  |  | 
|  | END: | 
|  | pop {r4-r9} | 
|  | bx  lr |