blob: 02f3888c56f00e3e2242f3535e81ea2f8a61ec8a [file] [log] [blame]
@// Copyright (c) 2013 The WebRTC project authors. All Rights Reserved.
@// Use of this source code is governed by a BSD-style license
@// that can be found in the LICENSE file in the root of the source
@// tree. An additional intellectual property rights grant can be found
@// in the file PATENTS. All contributing project authors may
@// be found in the AUTHORS file in the root of the source tree.
@// This is a modification of armSP_FFT_CToC_SC32_Radix2_ls_unsafe_s.S
@// to support float instead of SC32.
@// Description:
@// Compute the last stage of a Radix 2 DIT in-order out-of-place FFT
@// stage for a N point complex signal.
@// Include standard headers
#include "dl/api/armCOMM_s.h"
#include "dl/api/omxtypes_s.h"
@// Import symbols required from other files
@// (For example tables)
@// Set debugging level
@// Guarding implementation by the processor name
@//Input Registers
#define pSrc r0
#define pDst r2
#define pTwiddle r1
#define subFFTNum r6
#define subFFTSize r7
@//Output Registers
@//Local Scratch Registers
#define outPointStep r3
#define grpCount r4
#define dstStep r5
#define pTmp r4
@// Neon Registers
#define dWr d0.f32
#define dWi d1.f32
#define dXr0 d2.f32
#define dXi0 d3.f32
#define dXr1 d4.f32
#define dXi1 d5.f32
#define dYr0 d6.f32
#define dYi0 d7.f32
#define dYr1 d8.f32
#define dYi1 d9.f32
#define qT0 d10.f32
#define qT1 d12.f32
.MACRO FFTSTAGE scaled, inverse, name
MOV outPointStep,subFFTSize,LSL #3
@// Update grpCount and grpSize rightaway
MOV subFFTNum,#1 @//after the last stage
LSL grpCount,subFFTSize,#1
@// update subFFTSize for the next stage
MOV subFFTSize,grpCount
RSB dstStep,outPointStep,#16
@// Loop on 2 grps at a time for the last stage
radix2lsGrpLoop\name :
@ dWr = [pTwiddle[0].Re, pTwiddle[1].Re]
@ dWi = [pTwiddle[0].Im, pTwiddle[1].Im]
VLD2 {dWr,dWi},[pTwiddle :64]!
@ dXr0 = [pSrc[0].Re, pSrc[2].Re]
@ dXi0 = [pSrc[0].Im, pSrc[2].Im]
@ dXr1 = [pSrc[1].Re, pSrc[3].Re]
@ dXi1 = [pSrc[1].Im, pSrc[3].Im]
VLD4 {dXr0,dXi0,dXr1,dXi1},[pSrc :128]!
SUBS grpCount,grpCount,#4 @// grpCount is multiplied by 2
.ifeqs "\inverse", "TRUE"
VMUL qT0,dWr,dXr1
VMLA qT0,dWi,dXi1 @// real part
VMUL qT1,dWr,dXi1
VMLS qT1,dWi,dXr1 @// imag part
VMUL qT0,dWr,dXr1
VMLS qT0,dWi,dXi1 @// real part
VMUL qT1,dWr,dXi1
VMLA qT1,dWi,dXr1 @// imag part
VSUB dYr0,dXr0,qT0
VSUB dYi0,dXi0,qT1
VADD dYr1,dXr0,qT0
VADD dYi1,dXi0,qT1
VST2 {dYr0,dYi0},[pDst],outPointStep
VST2 {dYr1,dYi1},[pDst],dstStep @// dstStep = step = -outPointStep + 16
BGT radix2lsGrpLoop\name
@// Reset and Swap pSrc and pDst for the next stage
MOV pTmp,pDst
SUB pDst,pSrc,outPointStep,LSL #1 @// pDst -= 4*size; pSrc -= 8*size bytes
SUB pSrc,pTmp,outPointStep
@// Reset pTwiddle for the next stage
SUB pTwiddle,pTwiddle,outPointStep @// pTwiddle -= 4*size bytes
M_START armSP_FFTFwd_CToC_FC32_Radix2_ls_OutOfPlace_unsafe,r4,""
M_START armSP_FFTInv_CToC_FC32_Radix2_ls_OutOfPlace_unsafe,r4