blob: bc18d445687e17dd9a2bb1ccdada56cb8dc46ee1 [file] [log] [blame]
@
@ Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
@
@ Use of this source code is governed by a BSD-style license
@ that can be found in the LICENSE file in the root of the source
@ tree. An additional intellectual property rights grant can be found
@ in the file PATENTS. All contributing project authors may
@ be found in the AUTHORS file in the root of the source tree.
@
@ Contains the core loop routine for the pitch filter function in iSAC,
@ optimized for ARMv7 platforms.
@
@ Output is bit-exact with the reference C code in pitch_filter.c.
#include "webrtc/system_wrappers/include/asm_defines.h"
#include "settings.h"
GLOBAL_FUNCTION WebRtcIsacfix_PitchFilterCore
.align 2
@ void WebRtcIsacfix_PitchFilterCore(int loopNumber,
@ int16_t gain,
@ size_t index,
@ int16_t sign,
@ int16_t* inputState,
@ int16_t* outputBuf2,
@ const int16_t* coefficient,
@ int16_t* inputBuf,
@ int16_t* outputBuf,
@ int* index2) {
DEFINE_FUNCTION WebRtcIsacfix_PitchFilterCore
push {r4-r11}
sub sp, #8
str r0, [sp] @ loopNumber
str r3, [sp, #4] @ sign
ldr r3, [sp, #44] @ outputBuf2
ldr r6, [sp, #60] @ index2
ldr r7, [r6] @ *index2
ldr r8, [sp, #52] @ inputBuf
ldr r12, [sp, #56] @ outputBuf
add r4, r7, r0
str r4, [r6] @ Store return value to index2.
mov r10, r7, asl #1
add r12, r10 @ &outputBuf[*index2]
add r8, r10 @ &inputBuf[*index2]
add r4, r7, #PITCH_BUFFSIZE @ *index2 + PITCH_BUFFSIZE
add r6, r3, r4, lsl #1 @ &outputBuf2[*index2 + PITCH_BUFFSIZE]
sub r4, r2 @ r2: index
sub r4, #2 @ *index2 + PITCH_BUFFSIZE - index - 2
add r3, r4, lsl #1 @ &ubufQQpos2[*index2]
ldr r9, [sp, #48] @ coefficient
LOOP:
@ Usage of registers in the loop:
@ r0: loop counter
@ r1: gain
@ r2: tmpW32
@ r3: &ubufQQpos2[]
@ r6: &outputBuf2[]
@ r8: &inputBuf[]
@ r9: &coefficient[]
@ r12: &outputBuf[]
@ r4, r5, r7, r10, r11: scratch
@ Filter to get fractional pitch.
@ The pitch filter loop here is unrolled with 9 multipications.
pld [r3]
ldr r10, [r3], #4 @ ubufQQpos2[*index2 + 0, *index2 + 1]
ldr r4, [r9], #4 @ coefficient[0, 1]
ldr r11, [r3], #4
ldr r5, [r9], #4
smuad r2, r10, r4
smlad r2, r11, r5, r2
ldr r10, [r3], #4
ldr r4, [r9], #4
ldr r11, [r3], #4
ldr r5, [r9], #4
smlad r2, r10, r4, r2
ldrh r10, [r3], #-14 @ r3 back to &ubufQQpos2[*index2].
ldrh r4, [r9], #-16 @ r9 back to &coefficient[0].
smlad r2, r11, r5, r2
smlabb r2, r10, r4, r2
@ Saturate to avoid overflow in tmpW16.
asr r2, #1
add r4, r2, #0x1000
ssat r7, #16, r4, asr #13
@ Shift low pass filter state, and excute the low pass filter.
@ The memmove() and the low pass filter loop are unrolled and mixed.
smulbb r5, r1, r7
add r7, r5, #0x800
asr r7, #12 @ Get the value for inputState[0].
ldr r11, [sp, #40] @ inputState
pld [r11]
adr r10, kDampFilter
ldrsh r4, [r10], #2 @ kDampFilter[0]
mul r2, r7, r4
ldr r4, [r11] @ inputState[0, 1], before shift.
strh r7, [r11] @ inputState[0], after shift.
ldr r5, [r11, #4] @ inputState[2, 3], before shift.
ldr r7, [r10], #4 @ kDampFilter[1, 2]
ldr r10, [r10] @ kDampFilter[3, 4]
str r4, [r11, #2] @ inputState[1, 2], after shift.
str r5, [r11, #6] @ inputState[3, 4], after shift.
smlad r2, r4, r7, r2
smlad r2, r5, r10, r2
@ Saturate to avoid overflow.
@ First shift the sample to the range of [0xC0000000, 0x3FFFFFFF],
@ to avoid overflow in the next saturation step.
asr r2, #1
add r10, r2, #0x2000
ssat r10, #16, r10, asr #14
@ Subtract from input and update buffer.
ldr r11, [sp, #4] @ sign
ldrsh r4, [r8]
ldrsh r7, [r8], #2 @ inputBuf[*index2]
smulbb r5, r11, r10
subs r0, #1
sub r4, r5
ssat r2, #16, r4
strh r2, [r12], #2 @ outputBuf[*index2]
add r2, r7
ssat r2, #16, r2
strh r2, [r6], #2 @ outputBuff2[*index2 + PITCH_BUFFSIZE]
bgt LOOP
add sp, #8
pop {r4-r11}
bx lr
.align 2
kDampFilter:
.short -2294, 8192, 20972, 8192, -2294