Revert 4347 "Implementation of real value 16 bit FFT with 16 bit..."
> Implementation of real value 16 bit FFT with 16 bit complex FFT routines, for ARM Neon platforms.
> Verified with SNR testing code in Openmax folder.
>
> R=aedla@chromium.org, rtoy@google.com
>
> Review URL: https://webrtc-codereview.appspot.com/1323010
TBR=kma@webrtc.org
Review URL: https://webrtc-codereview.appspot.com/1820004
git-svn-id: http://webrtc.googlecode.com/svn/deps/third_party/openmax@4361 4adac7df-926f-26a2-2b94-8c16560cd09d
diff --git a/dl/dl.gyp b/dl/dl.gyp
index 1f014b0..0573ce2 100644
--- a/dl/dl.gyp
+++ b/dl/dl.gyp
@@ -54,6 +54,8 @@
'sp/src/omxSP_FFTInit_R_S32.c',
'sp/src/omxSP_FFTInv_CCSToR_S32_Sfs_s.S',
# Complex 16-bit fixed-point FFT
+ 'sp/src/omxSP_FFTInit_C_SC16.c',
+ 'sp/src/omxSP_FFTGetBufSize_C_SC16.c',
'sp/src/armSP_FFT_CToC_SC16_Radix2_fs_unsafe_s.S',
'sp/src/armSP_FFT_CToC_SC16_Radix2_ls_unsafe_s.S',
'sp/src/armSP_FFT_CToC_SC16_Radix2_ps_unsafe_s.S',
@@ -63,18 +65,11 @@
'sp/src/armSP_FFT_CToC_SC16_Radix4_unsafe_s.S',
'sp/src/armSP_FFT_CToC_SC16_Radix8_fs_unsafe_s.S',
'sp/src/omxSP_FFTFwd_CToC_SC16_Sfs_s.S',
- 'sp/src/omxSP_FFTGetBufSize_C_SC16.c',
- 'sp/src/omxSP_FFTInit_C_SC16.c',
'sp/src/omxSP_FFTInv_CToC_SC16_Sfs_s.S',
# Real 16-bit fixed-point FFT
- 'sp/src/omxSP_FFTFwd_RToCCS_S16_Sfs_s.S',
'sp/src/omxSP_FFTFwd_RToCCS_S16S32_Sfs_s.S',
- 'sp/src/omxSP_FFTGetBufSize_R_S16.c',
'sp/src/omxSP_FFTGetBufSize_R_S16S32.c',
- 'sp/src/omxSP_FFTInit_R_S16.c',
'sp/src/omxSP_FFTInit_R_S16S32.c',
- 'sp/src/omxSP_FFTInv_CCSToR_S16_Sfs_s.S',
- 'sp/src/armSP_FFTInv_CCSToR_S16_preTwiddleRadix2_unsafe_s.S',
'sp/src/omxSP_FFTInv_CCSToR_S32S16_Sfs_s.S',
# Complex floating-point FFT
'sp/src/armSP_FFT_CToC_FC32_Radix2_fs_unsafe_s.S',
diff --git a/dl/sp/api/armSP.h b/dl/sp/api/armSP.h
index 4972f09..f615a87 100644
--- a/dl/sp/api/armSP.h
+++ b/dl/sp/api/armSP.h
@@ -64,14 +64,6 @@
OMX_S32 *pBuf;
}ARMsFFTSpec_R_SC32;
-typedef struct ARMsFFTSpec_R_SC16_Tag
-{
- OMX_U32 N;
- OMX_U16 *pBitRev;
- OMX_SC16 *pTwiddle;
- OMX_S16 *pBuf;
-} ARMsFFTSpec_R_SC16;
-
typedef struct ARMsFFTSpec_R_FC32_Tag
{
OMX_U32 N;
diff --git a/dl/sp/api/omxSP.h b/dl/sp/api/omxSP.h
index 13c64e3..695fa90 100644
--- a/dl/sp/api/omxSP.h
+++ b/dl/sp/api/omxSP.h
@@ -44,7 +44,6 @@
typedef void OMXFFTSpec_C_SC16;
typedef void OMXFFTSpec_C_SC32;
typedef void OMXFFTSpec_R_S16S32;
- typedef void OMXFFTSpec_R_S16;
typedef void OMXFFTSpec_R_S32;
typedef void OMXFFTSpec_R_F32;
typedef void OMXFFTSpec_C_FC32;
diff --git a/dl/sp/src/armSP_FFTInv_CCSToR_S16_preTwiddleRadix2_unsafe_s.S b/dl/sp/src/armSP_FFTInv_CCSToR_S16_preTwiddleRadix2_unsafe_s.S
deleted file mode 100644
index 7e33484..0000000
--- a/dl/sp/src/armSP_FFTInv_CCSToR_S16_preTwiddleRadix2_unsafe_s.S
+++ /dev/null
@@ -1,413 +0,0 @@
-@
-@ Copyright (c) 2013 The WebRTC project authors. All Rights Reserved.
-@
-@ Use of this source code is governed by a BSD-style license
-@ that can be found in the LICENSE file in the root of the source
-@ tree. An additional intellectual property rights grant can be found
-@ in the file PATENTS. All contributing project authors may
-@ be found in the AUTHORS file in the root of the source tree.
-@
-@ Some code in this file was originally from file
-@ armSP_FFTInv_CCSToR_S32_preTwiddleRadix2_unsafe_s.S which was licensed as
-@ follows. It has been relicensed with permission from the copyright holders.
-@
-
-@
-@ OpenMAX DL: v1.0.2
-@ Last Modified Revision: 7485
-@ Last Modified Date: Fri, 21 Sep 2007
-@
-@ (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
-@
-
-@
-@ Description:
-@ Compute the "preTwiddleRadix2" stage prior to the call to the complexFFT.
-@ It does a Z(k) = Feven(k) + jW^(-k) FOdd(k); k=0,1,2,...N/2-1 computation.
-@ It implements both "scaled"(by 1/2) and "unscaled" versions of the above
-@ formula.
-@
-
-#include "dl/api/armCOMM_s.h"
-#include "dl/api/omxtypes_s.h"
-
-@//Input Registers
-#define pSrc r0
-#define pDst r1
-#define pFFTSpec r2
-#define scale r3
-
-@ Output registers
-#define result r0
-
-@//Local Scratch Registers
-#define argTwiddle r1
-#define argDst r2
-#define argScale r4
-#define tmpOrder r4
-#define pTwiddle r4
-#define pOut r5
-#define subFFTSize r7
-#define subFFTNum r6
-#define N r6
-#define order r14
-#define diff r9
-@ Total num of radix stages to comple the FFT.
-#define count r8
-#define x0r r4
-#define x0i r5
-#define diffMinusOne r2
-#define round r3
-#define pOut1 r2
-#define size r7
-#define step r8
-#define step1 r9
-#define step2 r10
-#define twStep r10
-#define pTwiddleTmp r11
-#define argTwiddle1 r12
-#define zero r14
-
-@ Neon registers
-#define dX0 D0.S16
-#define dX0S32 D0.S32
-#define dShift D1.S16
-#define dX1 D1.S16
-#define dX1S32 D1.S32
-#define dY0 D2.S16
-#define dY1 D3.S16
-#define dX0r D0.S16
-#define dX0rS32 D0.S32
-#define dX0i D1.S16
-#define dX1r D2.S16
-#define dX1i D3.S16
-#define qX1 Q1.S16
-#define dW0r D4.S16
-#define dW0i D5.S16
-#define dW1r D6.S16
-#define dW1i D7.S16
-#define dW0rS32 D4.S32
-#define dW0iS32 D5.S32
-#define dW1rS32 D6.S32
-#define dW1iS32 D7.S32
-#define dT0 D8.S16
-#define dT1 D9.S16
-#define dT2 D10.S16
-#define dT3 D11.S16
-#define qT0 Q6.S32
-#define qT1 Q7.S32
-#define qT2 Q8.S32
-#define qT3 Q9.S32
-#define dY0r D4.S16
-#define dY0i D5.S16
-#define dY1r D6.S16
-#define dY1i D7.S16
-#define qY1 Q3.S16
-#define dY2 D4.S16
-#define dY3 D5.S16
-#define dW0 D6.S16
-#define dW1 D7.S16
-#define dW0Tmp D10.S16
-#define dW1Neg D11.S16
-
- @ Structure offsets for the FFTSpec
- .set ARMsFFTSpec_N, 0
- .set ARMsFFTSpec_pBitRev, 4
- .set ARMsFFTSpec_pTwiddle, 8
- .set ARMsFFTSpec_pBuf, 12
-
- .MACRO FFTSTAGE scaled, inverse, name
-
- @ Read the size from structure and take log
- LDR N, [pFFTSpec, #ARMsFFTSpec_N]
-
- @ Read other structure parameters
- LDR pTwiddle, [pFFTSpec, #ARMsFFTSpec_pTwiddle]
- LDR pOut, [pFFTSpec, #ARMsFFTSpec_pBuf]
-
- MOV size,N,ASR #1 @ preserve the contents of N
- MOV step,N,LSL #1 @ step = N/2 * 4 bytes
-
- @ Process different FFT sizes with different loops.
- CMP size,#4
- BLE smallFFTSize\name
-
- @ Z(k) = 1/2 {[F(k) + F'(N/2-k)] +j*W^(-k) [F(k) - F'(N/2-k)]}
- @ Note: W^(k) is stored as negated value and also need to
- @ conjugate the values from the table.
-
- @ Z(0) : no need of twiddle multiply
- @ Z(0) = 1/2 { [F(0) + F'(N/2)] +j [F(0) - F'(N/2)] }
-
- VLD1 dX0S32[0],[pSrc],step
- ADD pOut1,pOut,step @ pOut1 = pOut+ N/2*4 bytes
-
- VLD1 dX1S32[0],[pSrc]!
- SUB twStep,step,size @ twStep = 3N/8 * 4 bytes pointing to W^1
-
- MOV step1,size,LSL #1 @ step1 = N/4 * 4 = N/2*2 bytes
- SUB step1,step1,#4 @ (N/4-1)*4 bytes
-
- VHADD dY0,dX0,dX1 @ [b+d | a+c]
- VHSUB dY1,dX0,dX1 @ [b-d | a-c]
- VTRN dY0,dY1 @ dY0= [a-c | a+c] ;dY1= [b-d | b+d]
-
- .ifeqs "\scaled", "TRUE"
- VHSUB dX0,dY0,dY1
- SUBS size,size,#2
- VHADD dX1,dY0,dY1
- .else
- VSUB dX0,dY0,dY1
- SUBS size,size,#2
- VADD dX1,dY0,dY1
- .endif
-
- SUB pSrc,pSrc,step
- VST1 dX0[0],[pOut1]!
- ADD pTwiddleTmp,pTwiddle,#4 @ W^2
- VST1 dX1[1],[pOut1]!
- ADD argTwiddle1,pTwiddle,twStep @ W^1
-
- BLT decrementScale\name
- BEQ lastElement\name
-
- SUB step,step,#20
- SUB step1,step1,#4 @ (N/4-1)*8 bytes
- SUB step2, step1, #4
-
- @ Z(k) = 1/2[F(k) + F'(N/2-k)] +j*W^(-k) [F(k) - F'(N/2-k)]
- @ Note: W^k is stored as negative values in the table and also need to
- @ conjugate the values from the table.
- @ Process 4 elements at a time. E.g: Z(1),Z(2) and Z(N/2-2),Z(N/2-1)
- @ since both of them require F(1),F(2) and F(N/2-2),F(N/2-1).
-
-evenOddButterflyLoop\name:
- VLD2 {dX0r,dX0i},[pSrc],step
- VLD2 {dX1r,dX1i},[pSrc]!
- SUB pSrc, pSrc, step
-
- VLD1 dW0r,[argTwiddle1],step1
- VREV64 qX1,qX1
- VLD1 dW1r,[argTwiddle1]!
- VHSUB dT2,dX0r,dX1r @ a-c
- SUB argTwiddle1, argTwiddle1, step1
- SUB step1,step1,#16
-
- VLD1 dW0i,[pTwiddleTmp],step2
- VHADD dT3,dX0i,dX1i @ b+d
- VLD1 dW1i,[pTwiddleTmp]!
- VHADD dT0,dX0r,dX1r @ a+c
- VHSUB dT1,dX0i,dX1i @ b-d
- SUB pTwiddleTmp, pTwiddleTmp, step2
- SUB step2,step2,#16
-
- SUBS size,size,#8
-
- VZIP dW1r,dW1i
- VTRN dW0r,dW0i
- VZIP dW1iS32, dW1rS32
-
- VMULL qT0,dW1i,dT2
- VMLSL qT0,dW1r,dT3
- VMULL qT1,dW1i,dT3
- VMLAL qT1,dW1r,dT2
- VMULL qT2,dW0r,dT2
- VMLAL qT2,dW0i,dT3
- VMULL qT3,dW0r,dT3
- VMLSL qT3,dW0i,dT2
-
- VRSHRN dX1r,qT0,#15
- VRSHRN dX1i,qT1,#15
- VRSHRN dX0r,qT2,#15
- VRSHRN dX0i,qT3,#15
-
- .ifeqs "\scaled", "TRUE"
- VHADD dY1r,dT0,dX1i @ F(N/2 -1)
- VHSUB dY1i,dX1r,dT1
- .else
- VADD dY1r,dT0,dX1i @ F(N/2 -1)
- VSUB dY1i,dX1r,dT1
- .endif
-
- .ifeqs "\scaled", "TRUE"
- VHADD dY0r,dT0,dX0i @ F(1)
- VHSUB dY0i,dT1,dX0r
- .else
- VADD dY0r,dT0,dX0i @ F(1)
- VSUB dY0i,dT1,dX0r
- .endif
-
- VREV64 qY1,qY1
-
- VST2 {dY0r,dY0i},[pOut1],step
- VST2 {dY1r,dY1i},[pOut1]
- ADD pOut1,pOut1,#16
- SUB pOut1, pOut1, step
- SUB step,step,#32
-
- BGT evenOddButterflyLoop\name
-
- SUB pSrc,pSrc,#4 @ set both the ptrs to the last element
- SUB pOut1,pOut1,#4
- B lastElement\name
-
-smallFFTSize\name:
- @ Z(k) = 1/2 {[F(k) + F'(N/2-k)] +j*W^(-k) [F(k) - F'(N/2-k)]}
- @ Note: W^(k) is stored as negated value and also need to
- @ conjugate the values from the table.
-
- @ Z(0) : no need of twiddle multiply
- @ Z(0) = 1/2 { [F(0) + F'(N/2)] +j [F(0) - F'(N/2)] }
-
- VLD1 dX0S32[0],[pSrc],step
- ADD pOut1,pOut,step @ pOut1 = pOut+ N/2*4 bytes
-
- VLD1 dX1S32[0],[pSrc]!
- SUB twStep,step,size @ twStep = 3N/8 * 4 bytes pointing to W^1
-
- MOV step1,size,LSL #1 @ step1 = N/4 * 4 = N/2*2 bytes
- SUB step1,step1,#4 @ (N/4-1)*4 bytes
-
- VHADD dY0,dX0,dX1 @ [b+d | a+c]
- VHSUB dY1,dX0,dX1 @ [b-d | a-c]
- VTRN dY0,dY1 @ dY0= [a-c | a+c] ;dY1= [b-d | b+d]
-
- .ifeqs "\scaled", "TRUE"
- VHSUB dX0,dY0,dY1
- SUBS size,size,#2
- VHADD dX1,dY0,dY1
- .else
- VSUB dX0,dY0,dY1
- SUBS size,size,#2
- VADD dX1,dY0,dY1
- .endif
-
- SUB pSrc,pSrc,step
- VST1 dX0[0],[pOut1]!
- ADD pTwiddleTmp,pTwiddle,#4 @ W^2
- VST1 dX1[1],[pOut1]!
- ADD argTwiddle1,pTwiddle,twStep @ W^1
-
- BLT decrementScale\name
- BEQ lastElement\name
-
- @ Z(k) = 1/2[F(k) + F'(N/2-k)] +j*W^(-k) [F(k) - F'(N/2-k)]
- @ Note: W^k is stored as negative values in the table and also need to
- @ conjugate the values from the table.
- @ Process 4 elements at a time. E.g: Z(1),Z(2) and Z(N/2-2),Z(N/2-1)
- @ since both of them require F(1),F(2) and F(N/2-2),F(N/2-1).
-
- SUB step,step,#12
-
-evenOddButterflyLoopSize4\name:
- VLD1 dW0rS32[0],[argTwiddle1],step1
- VLD1 dW1rS32[0],[argTwiddle1]!
-
- VLD2 {dX0r[0],dX0i[0]},[pSrc]!
- VLD2 {dX0r[1],dX0i[1]},[pSrc],step
- SUB pSrc,pSrc,#4
- SUB argTwiddle1,argTwiddle1,step1
- VLD2 {dX1r[0],dX1i[0]},[pSrc]!
- VLD2 {dX1r[1],dX1i[1]},[pSrc]!
-
- SUB step1,step1,#4 @ (N/4-2)*4 bytes
- VLD1 dW0iS32[0],[pTwiddleTmp],step1
- VLD1 dW1iS32[0],[pTwiddleTmp]!
- SUB pSrc,pSrc,step
-
- SUB pTwiddleTmp,pTwiddleTmp,step1
- VREV32 dX1r,dX1r
- VREV32 dX1i,dX1i
- SUBS size,size,#4
-
- VHSUB dT2,dX0r,dX1r @ a-c
- VHADD dT3,dX0i,dX1i @ b+d
- SUB step1,step1,#4
- VHADD dT0,dX0r,dX1r @ a+c
- VHSUB dT1,dX0i,dX1i @ b-d
-
- VTRN dW1r,dW1i
- VTRN dW0r,dW0i
-
- VMULL qT0,dW1r,dT2
- VMLSL qT0,dW1i,dT3
- VMULL qT1,dW1r,dT3
- VMLAL qT1,dW1i,dT2
- VMULL qT2,dW0r,dT2
- VMLAL qT2,dW0i,dT3
- VMULL qT3,dW0r,dT3
- VMLSL qT3,dW0i,dT2
-
- VRSHRN dX1r,qT0,#15
- VRSHRN dX1i,qT1,#15
-
- .ifeqs "\scaled", "TRUE"
- VHADD dY1r,dT0,dX1i @ F(N/2 -1)
- VHSUB dY1i,dX1r,dT1
- .else
- VADD dY1r,dT0,dX1i @ F(N/2 -1)
- VSUB dY1i,dX1r,dT1
- .endif
-
- VREV32 dY1r,dY1r
- VREV32 dY1i,dY1i
-
- VRSHRN dX0r,qT2,#15
- VRSHRN dX0i,qT3,#15
-
- .ifeqs "\scaled", "TRUE"
- VHADD dY0r,dT0,dX0i @ F(1)
- VHSUB dY0i,dT1,dX0r
- .else
- VADD dY0r,dT0,dX0i @ F(1)
- VSUB dY0i,dT1,dX0r
- .endif
-
- VST2 {dY0r[0],dY0i[0]},[pOut1]!
- VST2 {dY0r[1],dY0i[1]},[pOut1],step
- SUB pOut1, #4
- VST2 {dY1r[0],dY1i[0]},[pOut1]!
- VST2 {dY1r[1],dY1i[1]},[pOut1]!
- SUB pOut1,pOut1,step
- SUB step,step,#16 @ (N/2-4)*8 bytes
-
- BGT evenOddButterflyLoopSize4\name
-
- SUB pSrc,pSrc,#4 @ set both the ptrs to the last element
- SUB pOut1,pOut1,#4
-
- @ Last element can be expanded as follows
- @ 1/2[Z(k) + Z'(k)] - j w^-k [Z(k) - Z'(k)] (W^k is stored as -ve)
- @ 1/2[(a+jb) + (a-jb)] - j w^-k [(a+jb) - (a-jb)]
- @ 1/2[2a+j0] - j (c-jd) [0+j2b]
- @ (a+bc, -bd)
- @ Since (c,d) = (0,1) for the last element, result is just (a,-b)
-
-lastElement\name:
- VLD1 dX0rS32[0],[pSrc]
-
- .ifeqs "\scaled", "TRUE"
- VSHR dX0r,dX0r,#1
- .endif
-
- VST1 dX0r[0],[pOut1]!
- VNEG dX0r,dX0r
- VST1 dX0r[1],[pOut1]
-
-decrementScale\name:
- .ifeqs "\scaled", "TRUE"
- SUB scale,scale,#1
- .endif
-
- .endm
-
- M_START armSP_FFTInv_CCSToR_S16_preTwiddleRadix2_unsafe,r4
- FFTSTAGE "FALSE","TRUE",Inv
- M_END
-
- M_START armSP_FFTInv_CCSToR_S16_Sfs_preTwiddleRadix2_unsafe,r4
- FFTSTAGE "TRUE","TRUE",InvSfs
- M_END
-
-
- .end
diff --git a/dl/sp/src/omxSP_FFTFwd_RToCCS_S16_Sfs_s.S b/dl/sp/src/omxSP_FFTFwd_RToCCS_S16_Sfs_s.S
deleted file mode 100644
index 50d8833..0000000
--- a/dl/sp/src/omxSP_FFTFwd_RToCCS_S16_Sfs_s.S
+++ /dev/null
@@ -1,660 +0,0 @@
-@
-@ Copyright (c) 2013 The WebRTC project authors. All Rights Reserved.
-@
-@ Use of this source code is governed by a BSD-style license
-@ that can be found in the LICENSE file in the root of the source
-@ tree. An additional intellectual property rights grant can be found
-@ in the file PATENTS. All contributing project authors may
-@ be found in the AUTHORS file in the root of the source tree.
-@
-@ Some code in this file was originally from file
-@ omxSP_FFTFwd_RToCCS_S32_Sfs_s.S which was licensed as follows.
-@ It has been relicensed with permission from the copyright holders.
-@
-
-@
-@ OpenMAX DL: v1.0.2
-@ Last Modified Revision: 7810
-@ Last Modified Date: Thu, 04 Oct 2007
-@
-@ (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
-@
-
-@
-@ Description:
-@ Compute a forward FFT for a real signal, using 16 bit complex FFT routines.
-@
-
-#include "dl/api/armCOMM_s.h"
-#include "dl/api/omxtypes_s.h"
-
-.extern armSP_FFTFwd_CToC_SC16_Sfs_Radix2_fs_OutOfPlace_unsafe
-.extern armSP_FFTFwd_CToC_SC16_Radix2_fs_OutOfPlace_unsafe
-.extern armSP_FFTFwd_CToC_SC16_Radix4_fs_OutOfPlace_unsafe
-.extern armSP_FFTFwd_CToC_SC16_Radix4_ls_OutOfPlace_unsafe
-.extern armSP_FFTFwd_CToC_SC16_Radix8_fs_OutOfPlace_unsafe
-.extern armSP_FFTFwd_CToC_SC16_Radix4_OutOfPlace_unsafe
-.extern armSP_FFTFwd_CToC_SC16_Sfs_Radix4_fs_OutOfPlace_unsafe
-.extern armSP_FFTFwd_CToC_SC16_Sfs_Radix4_ls_OutOfPlace_unsafe
-.extern armSP_FFTFwd_CToC_SC16_Sfs_Radix8_fs_OutOfPlace_unsafe
-.extern armSP_FFTFwd_CToC_SC16_Sfs_Radix4_OutOfPlace_unsafe
-.extern armSP_FFTFwd_CToC_SC16_Sfs_Radix2_OutOfPlace_unsafe
-.extern armSP_FFTFwd_CToC_SC16_Radix2_OutOfPlace_unsafe
-.extern armSP_FFTFwd_CToC_SC16_Sfs_Radix2_ls_OutOfPlace_unsafe
-.extern armSP_FFTFwd_CToC_SC16_Radix2_ls_OutOfPlace_unsafe
-.extern armSP_FFTFwd_CToC_SC16_Radix2_ps_OutOfPlace_unsafe
-.extern armSP_FFTFwd_CToC_SC16_Sfs_Radix2_ps_OutOfPlace_unsafe
-
-@Input Registers
-#define pSrc r0
-#define pDst r1
-#define pFFTSpec r2
-#define scale r3
-
-@ Output registers
-#define result r0
-
-@Local Scratch Registers
-#define argTwiddle r1
-#define argDst r2
-#define argScale r4
-#define pTwiddle r4
-#define tmpOrder r4
-#define pOut r5
-#define subFFTSize r7
-#define subFFTNum r6
-#define N r6
-#define order r14
-#define diff r9
-@ Total num of radix stages to comple the FFT
-#define count r8
-#define x0r r4
-#define x0i r5
-#define diffMinusOne r2
-#define round r3
-#define subFFTSizeTmp r6
-#define step r3
-#define stepr r11
-#define step1 r10
-#define step1r r6
-#define step2 r8
-#define step2r r9
-#define twStep r8
-#define zero r9
-#define pTwiddleTmp r5
-#define t0 r10
-
-@ Neon registers
-#define dX0 d0.s16
-#define dX0S32 d0.s32
-#define dzero d1.s16
-#define dZero d2.s16
-#define dShift d3.s16
-#define qShift q1.s16
-#define dX0r d2.s16
-#define dX0i d3.s16
-#define dX1r d4.s16
-#define dX1i d5.s16
-#define qX1 q2.s16
-#define dX0rS32 d2.s32
-#define dX0iS32 d3.s32
-#define dX1rS32 d4.s32
-#define dX1iS32 d5.s32
-#define dT0 d6.s16
-#define dT1 d7.s16
-#define dT2 d8.s16
-#define dT3 d9.s16
-#define qT0 q5.s32
-#define qT1 q6.s32
-#define qT0s q5.s16
-#define qT1s q6.s16
-#define dW0r d14.s16
-#define dW0i d15.s16
-#define dW1r d16.s16
-#define dW1i d17.s16
-#define dW0rS32 d14.s32
-#define dW0iS32 d15.s32
-#define dW1rS32 d16.s32
-#define dW1iS32 d17.s32
-#define dY0r d14.s16
-#define dY0i d15.s16
-#define dY0rS32 d14.s32
-#define dY0iS32 d15.s32
-#define dY1r d16.s16
-#define dY1i d17.s16
-#define qY1 q8.s16
-#define dY1rS32 d16.s32
-#define dY1iS32 d17.s32
-#define dY0rS64 d14.s32
-#define dY0iS64 d15.s32
-#define qT2 q9.s32
-#define qT3 q10.s32
-#define d18s16 d18.s16
-#define d19s16 d19.s16
-#define d20s16 d20.s16
-#define d21s16 d21.s16
-@ lastThreeelements
-#define dX1 d3.s16
-#define dW0 d4.s16
-#define dW1 d5.s16
-#define dY0 d10.s16
-#define dY1 d11.s16
-#define dY2 d12.s16
-#define dY3 d13.s16
-
- @ Allocate stack memory required by the function
- M_ALLOC4 diffOnStack, 4
-
- @ Write function header
- M_START omxSP_FFTFwd_RToCCS_S16_Sfs,r11,d15
-
- @ Structure offsets for the FFTSpec
- .set ARMsFFTSpec_N, 0
- .set ARMsFFTSpec_pBitRev, 4
- .set ARMsFFTSpec_pTwiddle, 8
- .set ARMsFFTSpec_pBuf, 12
-
- @ Define stack arguments
-
- @ Read the size from structure and take log
- LDR N, [pFFTSpec, #ARMsFFTSpec_N]
-
- @ Read other structure parameters
- LDR pTwiddle, [pFFTSpec, #ARMsFFTSpec_pTwiddle]
- LDR pOut, [pFFTSpec, #ARMsFFTSpec_pBuf]
-
- @ N = 1 Treat seperately
- CMP N,#1
- BGT sizeGreaterThanOne
- VLD1 dX0[0],[pSrc]
- RSB scale,scale,#0 @ for right shift by a variable
- MOV zero,#0
- VMOV dShift[0],scale
- VMOV dzero[0],zero
- VRSHL dX0,dShift
- VMOV dZero[0],zero
- VST3 {dX0[0],dzero[0],dZero[0]},[pDst]
-
- B End
-
-sizeGreaterThanOne:
- @ Do a N/2 point complex FFT including the scaling
-
- MOV N,N,ASR #1 @ N/2 point complex FFT
-
- CLZ order,N @ N = 2^order
- RSB order,order,#31
- MOV subFFTSize,#1
-
- CMP order,#3
- BGT orderGreaterthan3 @ order > 3
-
- CMP order,#1
- BGE orderGreaterthan0 @ order > 0
- M_STR scale, diffOnStack,LT @ order = 0
- LDR x0r,[pSrc]
- STR x0r,[pOut]
- MOV pSrc,pOut
- MOV argDst,pDst
- B FFTEnd
-
-orderGreaterthan0:
- @ set the buffers appropriately for various orders
- CMP order,#2
- MOVEQ argDst,pDst
- MOVNE argDst,pOut
- MOVNE pOut,pDst @ Pass 1st stage destination in RN5
- MOV argTwiddle,pTwiddle
-
- SUBS diff,scale,order
- M_STR diff,diffOnStack
- MOVGT scale,order
- @ Now scale <= order
-
- CMP order,#1
- BGT orderGreaterthan1
- @ order = 1:
- SUBS scale,scale,#1
- BLEQ armSP_FFTFwd_CToC_SC16_Sfs_Radix2_fs_OutOfPlace_unsafe
- BLLT armSP_FFTFwd_CToC_SC16_Radix2_fs_OutOfPlace_unsafe
- B FFTEnd
-
-orderGreaterthan1:
- CMP order,#2
- MOV argScale,scale
- BGT orderGreaterthan2
- @ order = 2:
- SUBS argScale,argScale,#1
- BLGE armSP_FFTFwd_CToC_SC16_Sfs_Radix2_fs_OutOfPlace_unsafe
- BLLT armSP_FFTFwd_CToC_SC16_Radix2_fs_OutOfPlace_unsafe
- SUBS argScale,argScale,#1
- BLEQ armSP_FFTFwd_CToC_SC16_Sfs_Radix2_ls_OutOfPlace_unsafe
- BLLT armSP_FFTFwd_CToC_SC16_Radix2_ls_OutOfPlace_unsafe
- B FFTEnd
-
-orderGreaterthan2: @ order = 3
- SUBS argScale,argScale,#1
- BLGE armSP_FFTFwd_CToC_SC16_Sfs_Radix2_fs_OutOfPlace_unsafe
- BLLT armSP_FFTFwd_CToC_SC16_Radix2_fs_OutOfPlace_unsafe
- SUBS argScale,argScale,#1
- BLGE armSP_FFTFwd_CToC_SC16_Sfs_Radix2_ps_OutOfPlace_unsafe
- BLLT armSP_FFTFwd_CToC_SC16_Radix2_ps_OutOfPlace_unsafe
- SUBS argScale,argScale,#1
- BLEQ armSP_FFTFwd_CToC_SC16_Sfs_Radix2_ls_OutOfPlace_unsafe
- BLLT armSP_FFTFwd_CToC_SC16_Radix2_ls_OutOfPlace_unsafe
- B FFTEnd
-
-
-orderGreaterthan3:
- @ check scale = 0 or scale = order
- SUBS diff, scale, order @ scale > order
- MOVGT scale,order
- BGE specialScaleCase @ scale = 0 or scale = order
- CMP scale,#0
- BEQ specialScaleCase
- B generalScaleCase
-
-specialScaleCase: @ scale = 0, or, scale = order && order > 3
- TST order, #2 @ Set input args to fft stages
- MOVEQ argDst,pDst
- MOVNE argDst,pOut
- MOVNE pOut,pDst @ Pass the first stage destination in RN5
- MOV argTwiddle,pTwiddle
-
- CMP diff,#0
- M_STR diff, diffOnStack
- BGE scaleEqualsOrder
-
- @ check for even or odd order.
- @ NOTE: The following combination of BL's would work fine even though
- @ the first BL would corrupt the flags. This is because the end of the
- @ "grpZeroSetLoop" loop inside
- @ armSP_FFTFwd_CToC_SC16_Radix4_fs_OutOfPlace_unsafe sets Z flag to EQ.
-
- TST order,#0x00000001
- BLEQ armSP_FFTFwd_CToC_SC16_Radix4_fs_OutOfPlace_unsafe
- BLNE armSP_FFTFwd_CToC_SC16_Radix8_fs_OutOfPlace_unsafe
-
- CMP subFFTNum,#4
- BLT FFTEnd
-
-unscaledRadix4Loop:
- BEQ lastStageUnscaledRadix4
- BL armSP_FFTFwd_CToC_SC16_Radix4_OutOfPlace_unsafe
- CMP subFFTNum,#4
- B unscaledRadix4Loop
-
-lastStageUnscaledRadix4:
- BL armSP_FFTFwd_CToC_SC16_Radix4_ls_OutOfPlace_unsafe
- B FFTEnd
-
-scaleEqualsOrder:
- @ check for even or odd order
- @ NOTE: The following combination of BL's would work fine even though
- @ the first BL would corrupt the flags. This is because the end of the
- @ "grpZeroSetLoop" loop inside
- @ armSP_FFTFwd_CToC_SC32_Radix4_fs_OutOfPlace_unsafe sets Z flag to EQ.
-
- TST order,#0x00000001
- BLEQ armSP_FFTFwd_CToC_SC16_Sfs_Radix4_fs_OutOfPlace_unsafe
- BLNE armSP_FFTFwd_CToC_SC16_Sfs_Radix8_fs_OutOfPlace_unsafe
-
- CMP subFFTNum,#4
- BLT FFTEnd
-
-scaledRadix4Loop:
- BEQ lastStageScaledRadix4
- BL armSP_FFTFwd_CToC_SC16_Sfs_Radix4_OutOfPlace_unsafe
- CMP subFFTNum,#4
- B scaledRadix4Loop
-
-lastStageScaledRadix4:
- BL armSP_FFTFwd_CToC_SC16_Sfs_Radix4_ls_OutOfPlace_unsafe
- B FFTEnd
-
-generalScaleCase: @ 0 < scale < order and order > 3
- @ Determine the correct destination buffer
- SUB diff,order,scale
- TST diff,#0x01
- ADDEQ count,scale,diff,LSR #1 @ count = scale + (order - scale)/2
- MOVNE count,order
- TST count,#0x01 @ Is count even or odd ?
-
- MOVEQ argDst,pDst @ Set input args to fft stages
- MOVNE argDst,pOut
- MOVNE pOut,pDst @ Pass 1st stage destination in RN5
- MOV argTwiddle,pTwiddle
-
- CMP diff,#1
- M_STR diff, diffOnStack
- BEQ scaleps @ scaling including a radix2_ps stage
-
- MOV argScale,scale @ Put scale in RN4 to save and restore
- BL armSP_FFTFwd_CToC_SC16_Sfs_Radix2_fs_OutOfPlace_unsafe
- SUBS argScale,argScale,#1
-
-scaledRadix2Loop:
- BLGT armSP_FFTFwd_CToC_SC16_Sfs_Radix2_OutOfPlace_unsafe
- SUBS argScale,argScale,#1 @ save, restore scale in scaled stages
- BGT scaledRadix2Loop
- B outScale
-
-scaleps:
- SUB argScale,scale,#1 @ order>3 and diff=1 => scale >= 3
- BL armSP_FFTFwd_CToC_SC16_Sfs_Radix2_fs_OutOfPlace_unsafe
- SUBS argScale,argScale,#1
-
-scaledRadix2psLoop:
- BEQ scaledRadix2psStage
- BLGT armSP_FFTFwd_CToC_SC16_Sfs_Radix2_OutOfPlace_unsafe
- SUBS argScale,argScale,#1 @ save, restore scale in scaled stages
- BGE scaledRadix2psLoop
-
-scaledRadix2psStage:
- BL armSP_FFTFwd_CToC_SC16_Sfs_Radix2_ps_OutOfPlace_unsafe
- B generalLastStageUnscaledRadix2
-
-outScale:
- M_LDR diff, diffOnStack
- @check for even or odd order
- TST diff,#0x00000001
- BEQ generalUnscaledRadix4Loop
- B unscaledRadix2Loop
-
-generalUnscaledRadix4Loop:
- CMP subFFTNum,#4
- BEQ generalLastStageUnscaledRadix4
- BL armSP_FFTFwd_CToC_SC16_Radix4_OutOfPlace_unsafe
- B generalUnscaledRadix4Loop
-
-generalLastStageUnscaledRadix4:
- BL armSP_FFTFwd_CToC_SC16_Radix4_ls_OutOfPlace_unsafe
- B End
-
-unscaledRadix2Loop:
- CMP subFFTNum,#4
- BEQ generalLastTwoStagesUnscaledRadix2
- BL armSP_FFTFwd_CToC_SC16_Radix2_OutOfPlace_unsafe
- B unscaledRadix2Loop
-
-generalLastTwoStagesUnscaledRadix2:
- BL armSP_FFTFwd_CToC_SC16_Radix2_ps_OutOfPlace_unsafe
-generalLastStageUnscaledRadix2:
- BL armSP_FFTFwd_CToC_SC16_Radix2_ls_OutOfPlace_unsafe
- B End
-
-FFTEnd: @ Does only the scaling
- M_LDR diff, diffOnStack
- CMP diff,#0
- BLE finalComplexToRealFixup
-
- RSB diff,diff,#0 @ for right shift by a variable
- VDUP qShift,diff
-
- @ save subFFTSize and use subFFTSizeTmp in the following loop
- MOV subFFTSizeTmp,subFFTSize @ subFFTSizeTmp same reg as subFFTNum
-
- @ Use parallel loads for bigger FFT size.
- CMP subFFTSizeTmp, #8
- BLT scaleLessFFTData
-
-scaleFFTData:
- VLD1 {qT0s, qT1s},[pSrc:256] @ pSrc contains pDst pointer
- SUBS subFFTSizeTmp,subFFTSizeTmp,#8
- VSHL qT0s,qShift
- VSHL qT1s,qShift
- VST1 {qT0s, qT1s},[pSrc:256]!
- BGT scaleFFTData
- B afterScaling
-
-scaleLessFFTData:
- VLD1 {dX0S32[0]},[pSrc] @ pSrc contains pDst pointer
- SUBS subFFTSizeTmp,subFFTSizeTmp,#1
- VSHL dX0,dShift
- VST1 {dX0S32[0]},[pSrc]!
- BGT scaleLessFFTData
-
-afterScaling:
- SUB pSrc,pSrc,subFFTSize,LSL #2 @ reset pSrc for final fixup
-
- @ change the logic so that output after scaling is in pOut and not in pDst
- @ finally store from pOut to pDst
- @ change branch "End" to branch "finalComplexToRealFixup" in the above
- @ chk the code below for multiplication by j factor
-
-finalComplexToRealFixup:
- @ F(0) = 1/2[Z(0) + Z'(0)] - j [Z(0) - Z'(0)]
- @ 1/2[(a+jb) + (a-jb)] - j [(a+jb) - (a-jb)]
- @ 1/2[2a+j0] - j [0+j2b]
- @ (a+b, 0)
-
- @ F(N/2) = 1/2[Z(0) + Z'(0)] + j [Z(0) - Z'(0)]
- @ 1/2[(a+jb) + (a-jb)] + j [(a+jb) - (a-jb)]
- @ 1/2[2a+j0] + j [0+j2b]
- @ (a-b, 0)
-
- CMP subFFTSize,#4
- BLE smallFFTSize
-
-@ SubSize > 3:
- @ F(0) and F(N/2)
- VLD2 {dX0r[0],dX0i[0]},[pSrc]!
- MOV zero,#0
- VMOV dX0r[1],zero
- MOV step,subFFTSize,LSL #2 @ step = N/2 * 4 bytes
- VMOV dX0i[1],zero
- SUB twStep,step,subFFTSize @ twStep = 3N/8 * 8 bytes
-
- VADD dY0r,dX0r,dX0i @ F(0) = ((Z0.r+Z0.i) , 0)
- MOV step1,subFFTSize,LSL #1 @ step1 = N/2 * 2 bytes
- VSUB dY0i,dX0r,dX0i @ F(N/2) = ((Z0.r-Z0.i) , 0)
- SUBS subFFTSize,subFFTSize,#2
-
- VST1 dY0rS32[0],[argDst], step
- ADD pTwiddleTmp,argTwiddle,#4 @ W^2
- VST1 dY0iS32[0],[argDst]!
- ADD argTwiddle,argTwiddle,twStep @ W^1
-
- VDUP dzero,zero
- SUB argDst,argDst,step
- SUB step,step,#20
- RSB stepr, step, #16
- SUB step1,step1,#8 @ (N/4-1)*8 bytes
- RSB step1r,step1,#8
-
- SUB step2, step1, #4
- RSB step2r, step2, #8
-
- @ F(k) = 1/2[Z(k) + Z'(N/2-k)] -j*W^(k) [Z(k) - Z'(N/2-k)]
- @ Note: W^k is stored as negative values in the table.
- @ Process 4 elements at a time. E.g: F(1),F(2) and F(N/2-2),F(N/2-1)
- @ since both of them require Z(1),Z(2) and Z(N/2-2),Z(N/2-1).
-
-evenOddButterflyLoop:
- VLD2 {dX0r,dX0i},[pSrc],step
- VLD2 {dX1r,dX1i},[pSrc],stepr
-
- VLD1 dW0r,[argTwiddle],step1
- SUB step1, step1, #16
- VREV64 qX1,qX1
-
- VLD1 dW1r,[argTwiddle],step1r
- ADD step1r, step1r, #16
- VSUB dT2,dX0r,dX1r @ a-c
-
- VLD1 dW0i,[pTwiddleTmp],step2
- SUB step2, step2, #16
- VADD dT3,dX0i,dX1i @ b+d
-
- VLD1 dW1i,[pTwiddleTmp],step2r
- ADD step2r, step2r, #16
-
- VTRN dW0r,dW0i
- VZIP dW1r, dW1i
-
- SUBS subFFTSize,subFFTSize,#8
-
- VHADD dT0,dX0r,dX1r @ (a+c)/2
- VZIP dW1iS32, dW1rS32
- VHSUB dT1,dX0i,dX1i @ (b-d)/2
-
- VQDMULH dY0,dW1i,dT2
- VQDMULH dY1,dW1r,dT3
- VQDMULH dY2,dW1i,dT3
- VQDMULH dY3,dW1r,dT2
-
- VQDMULH d18s16,dW0r,dT2
- VQDMULH d19s16,dW0i,dT3
- VQDMULH d20s16,dW0r,dT3
- VQDMULH d21s16,dW0i,dT2
-
- VRHADD dX1r, dY0, dY1
- VHSUB dX1i, dY2, dY3
- VHSUB dX0r, d18s16, d19s16
- VRHADD dX0i, d20s16, d21s16
-
- VADD dY1i,dT1,dX1r
- VSUB dY1r,dT0,dX1i @ F(N/2 -1)
-
- VSUB dY0r,dT0,dX0i @ F(1)
- VADD dY0i,dT1,dX0r
-
- VNEG dY1i,dY1i
- VREV64 qY1, qY1
-
- VST2 {dY0r,dY0i},[argDst],step
- SUB step,step,#32 @ (N/2-4)*4 bytes
- VST2 {dY1r,dY1i},[argDst],stepr
- ADD stepr,stepr,#32
-
- BGT evenOddButterflyLoop
-
- SUB pSrc,pSrc,#4 @ points to the last element.
- SUB argDst,argDst,#4 @ points to the last element.
-
- b lastElement
-
-smallFFTSize:
-
- @ F(0) and F(N/2)
- VLD2 {dX0r[0],dX0i[0]},[pSrc]!
- MOV zero,#0
- VMOV dX0r[1],zero
- MOV step,subFFTSize,LSL #2 @ step = N/2 * 4 bytes
- VMOV dX0i[1],zero
- SUB twStep,step,subFFTSize @ twStep = 3N/8 * 8 bytes
-
- VADD dY0r,dX0r,dX0i @ F(0) = ((Z0.r+Z0.i) , 0)
- MOV step1,subFFTSize,LSL #1 @ step1 = N/2 * 2 bytes
- VSUB dY0i,dX0r,dX0i @ F(N/2) = ((Z0.r-Z0.i) , 0)
- SUBS subFFTSize,subFFTSize,#2
-
-
- VST1 dY0rS32[0],[argDst], step
- ADD pTwiddleTmp,argTwiddle,#4 @ W^2
- VST1 dY0iS32[0],[argDst]!
- ADD argTwiddle,argTwiddle,twStep @ W^1
-
- VDUP dzero,zero
- SUB argDst,argDst,step
-
- BLT End
- BEQ lastElement
-
- SUB step,step,#12
- SUB step1,step1,#4 @ (N/4-1)*8 bytes
-
- @ F(k) = 1/2[Z(k) + Z'(N/2-k)] -j*W^(k) [Z(k) - Z'(N/2-k)]
-
-butterflyLoopSubFFTSize4:
- VLD1 dW0rS32[0], [argTwiddle],step1
- VLD1 dW1rS32[0],[argTwiddle]!
-
- VLD2 {dX0r[0],dX0i[0]},[pSrc]!
- VLD2 {dX0r[1],dX0i[1]},[pSrc],step
- SUB pSrc,pSrc,#4
- SUB argTwiddle,argTwiddle,step1
- VLD2 {dX1r[0],dX1i[0]},[pSrc]!
- VLD2 {dX1r[1],dX1i[1]},[pSrc]!
-
- SUB step1,step1,#4 @ (N/4-2)*4 bytes
- VLD1 dW0iS32[0],[pTwiddleTmp],step1
- VLD1 dW1iS32[0],[pTwiddleTmp]!
- SUB pSrc,pSrc,step
-
- SUB pTwiddleTmp,pTwiddleTmp,step1
- VREV32 dX1r,dX1r
- VREV32 dX1i,dX1i
- SUBS subFFTSize,subFFTSize,#4
-
- VSUB dT2,dX0r,dX1r @ a-c
- SUB step1,step1,#4
- VADD dT3,dX0i,dX1i @ b+d
- VADD dT0,dX0r,dX1r @ a+c
- VSUB dT1,dX0i,dX1i @ b-d
- VHADD dT0,dT0,dzero
- VHADD dT1,dT1,dzero
-
- VTRN dW1r,dW1i
- VTRN dW0r,dW0i
-
- VMULL qT0,dW1r,dT2
- VMLAL qT0,dW1i,dT3
- VMULL qT1,dW1r,dT3
- VMLSL qT1,dW1i,dT2
-
- VMULL qT2,dW0r,dT2
- VMLSL qT2,dW0i,dT3
- VMULL qT3,dW0r,dT3
- VMLAL qT3,dW0i,dT2
-
- VRSHRN dX1r,qT0,#16
- VRSHRN dX1i,qT1,#16
-
- VSUB dY1r,dT0,dX1i @ F(N/2 -1)
- VADD dY1i,dT1,dX1r
- VNEG dY1i,dY1i
-
- VREV32 dY1r,dY1r
- VREV32 dY1i,dY1i
-
- VRSHRN dX0r,qT2,#16
- VRSHRN dX0i,qT3,#16
-
- VSUB dY0r,dT0,dX0i @ F(1)
- VADD dY0i,dT1,dX0r
-
- VST2 {dY0r[0],dY0i[0]},[argDst]!
- VST2 {dY0r[1],dY0i[1]},[argDst],step
- SUB argDst, #4
- VST2 {dY1r[0],dY1i[0]},[argDst]!
- VST2 {dY1r[1],dY1i[1]},[argDst]!
- SUB argDst,argDst,step
- SUB step,step,#16 @ (N/2-4)*4 bytes
-
- BGT butterflyLoopSubFFTSize4
-
- SUB pSrc,pSrc,#4 @ points to the last element.
- SUB argDst,argDst,#4 @ points to the last element.
-
-lastElement:
- @ Last element can be expanded as follows
- @ 1/2[Z(k) + Z'(k)] + j w^k [Z(k) - Z'(k)]
- @ 1/2[(a+jb) + (a-jb)] + j w^k [(a+jb) - (a-jb)]
- @ 1/2[2a+j0] + j (c+jd) [0+j2b]
- @ (a-bc, -bd)
- @ Since (c,d) = (0,1) for the last element, result is just (a,-b)
-
- VLD1 dX0rS32[0],[pSrc]
- VST1 dX0r[0],[argDst]!
- VNEG dX0r,dX0r
- VST1 dX0r[1],[argDst]!
-
-End:
- @ Set return value
- MOV result, #OMX_Sts_NoErr
-
- @ Write function tail
- M_END
-
- .END
diff --git a/dl/sp/src/omxSP_FFTGetBufSize_R_S16.c b/dl/sp/src/omxSP_FFTGetBufSize_R_S16.c
deleted file mode 100644
index a61a374..0000000
--- a/dl/sp/src/omxSP_FFTGetBufSize_R_S16.c
+++ /dev/null
@@ -1,80 +0,0 @@
-/*
- * Copyright (c) 2013 The WebRTC project authors. All Rights Reserved.
- *
- * Use of this source code is governed by a BSD-style license
- * that can be found in the LICENSE file in the root of the source
- * tree. An additional intellectual property rights grant can be found
- * in the file PATENTS. All contributing project authors may
- * be found in the AUTHORS file in the root of the source tree.
- *
- * Some code in this file was originally from file omxSP_FFTGetBufSize_R_S32.c
- * which was licensed as follows.
- * It has been relicensed with permission from the copyright holders.
- */
-
-/*
- * OpenMAX DL: v1.0.2
- * Last Modified Revision:
- * Last Modified Date:
- */
-
-#include "dl/api/armOMX.h"
-#include "dl/api/omxtypes.h"
-#include "dl/sp/api/armSP.h"
-#include "dl/sp/api/omxSP.h"
-
-/**
- * Function: omxSP_FFTGetBufSize_R_S16
- *
- * Description:
- * Computes the size of the specification structure required for the length
- * 2^order real FFT and IFFT functions.
- *
- * Remarks:
- * This function is used in conjunction with the 16-bit functions
- * <FFTFwd_RToCCS_S16_Sfs> and <FFTInv_CCSToR_S16_Sfs>.
- *
- * Parameters:
- * [in] order base-2 logarithm of the length; valid in the range
- * [0,12].
- * [out] pSize pointer to the number of bytes required for the
- * specification structure.
- *
- * Return Value:
- * Standard omxError result. See enumeration for possible result codes.
- *
- */
-
-OMXResult omxSP_FFTGetBufSize_R_S16(
- OMX_INT order,
- OMX_INT *pSize
-) {
- OMX_INT NBy2,N,twiddleSize;
-
- /* Order zero not allowed */
- if (order == 0) {
- return OMX_Sts_BadArgErr;
- }
-
- NBy2 = 1 << (order - 1);
- N = NBy2 << 1;
- twiddleSize = 5 * N / 8; /* 3 / 4 (N / 2) + N / 4 */
-
- /* 2 pointers to store bitreversed array and twiddle factor array */
- *pSize = sizeof(ARMsFFTSpec_R_SC16)
- /* Twiddle factors */
- + sizeof(OMX_SC16) * twiddleSize
- /* Ping Pong buffer for doing the N/2 point complex FFT; */
- /* extra size 'N' as a temporary buf for FFTInv_CCSToR_S16_Sfs */
- + sizeof(OMX_S16) * (N << 1)
- /* Extra bytes to get 32 byte alignment of ptwiddle and pBuf */
- + 62 ;
-
-
- return OMX_Sts_NoErr;
-}
-
-/*****************************************************************************
- * END OF FILE
- *****************************************************************************/
-
diff --git a/dl/sp/src/omxSP_FFTInit_R_S16.c b/dl/sp/src/omxSP_FFTInit_R_S16.c
deleted file mode 100644
index 3a12167..0000000
--- a/dl/sp/src/omxSP_FFTInit_R_S16.c
+++ /dev/null
@@ -1,235 +0,0 @@
-/*
- * Copyright (c) 2013 The WebRTC project authors. All Rights Reserved.
- *
- * Use of this source code is governed by a BSD-style license
- * that can be found in the LICENSE file in the root of the source
- * tree. An additional intellectual property rights grant can be found
- * in the file PATENTS. All contributing project authors may
- * be found in the AUTHORS file in the root of the source tree.
- *
- * Some code in this file was originally from file omxSP_FFTInit_R_S16S32.c
- * which was licensed as follows.
- * It has been relicensed with permission from the copyright holders.
- */
-
-/*
- * OpenMAX DL: v1.0.2
- * Last Modified Revision:
- * Last Modified Date:
- *
- * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
- */
-
-#include "dl/api/armOMX.h"
-#include "dl/api/omxtypes.h"
-#include "dl/sp/api/armSP.h"
-#include "dl/sp/api/omxSP.h"
-
-/**
- * Function: omxSP_FFTInit_R_S16
- *
- * Description:
- * Initialize the real forward-FFT specification information struct.
- *
- * Remarks:
- * This function is used to initialize the specification structures
- * for functions <ippsFFTFwd_RToCCS_S16_Sfs> and
- * <ippsFFTInv_CCSToR_S16_Sfs>. Memory for *pFFTSpec must be
- * allocated prior to calling this function. The number of bytes
- * required for *pFFTSpec can be determined using
- * <FFTGetBufSize_R_S16>.
- *
- * Parameters:
- * [in] order base-2 logarithm of the desired block length;
- * valid in the range [0,12].
- * [out] pFFTFwdSpec pointer to the initialized specification structure.
- *
- * Return Value:
- * Standard omxError result. See enumeration for possible result codes.
- *
- */
-
-OMXResult omxSP_FFTInit_R_S16(
- OMXFFTSpec_R_S16* pFFTSpec,
- OMX_INT order
-) {
- OMX_INT i = 0, j = 0;
- OMX_SC16 *pTwiddle = NULL, *pTwiddle1 = NULL, *pTwiddle2 = NULL;
- OMX_SC16 *pTwiddle3 = NULL, *pTwiddle4 = NULL;
- OMX_S16 *pBuf = NULL;
- OMX_U16 *pBitRev = NULL;
- OMX_U32 pTmp = 0;
- OMX_INT Nby2 = 0, N = 0, M = 0, diff = 0, step = 0;
- OMX_S16 x = 0, y = 0, xNeg = 0;
- OMX_S32 xS32 = 0, yS32 = 0;
- ARMsFFTSpec_R_SC16 *pFFTStruct = NULL;
-
- /* Order zero not allowed */
- if (order == 0) {
- return OMX_Sts_BadArgErr;
- }
-
- /* Do the initializations */
- pFFTStruct = (ARMsFFTSpec_R_SC16*) pFFTSpec;
- Nby2 = 1 << (order - 1);
- N = Nby2 << 1;
- pBitRev = NULL ; /* optimized implementations don't use bitreversal */
- pTwiddle = (OMX_SC16*) (sizeof(ARMsFFTSpec_R_SC16) + (OMX_S8*)pFFTSpec);
-
- /* Align to 32 byte boundary */
- pTmp = ((OMX_U32)pTwiddle)&31; /* (OMX_U32)pTwiddle % 32 */
- if(pTmp != 0) {
- pTwiddle = (OMX_SC16*) ((OMX_S8*)pTwiddle + (32 - pTmp));
- }
-
- pBuf = (OMX_S16*) (sizeof(OMX_SC16) * (5 * N / 8) + (OMX_S8*)pTwiddle);
-
- /* Align to 32 byte boundary */
- pTmp = ((OMX_U32)pBuf)&31; /* (OMX_U32)pBuf % 32 */
- if(pTmp != 0) {
- pBuf = (OMX_SC16*)((OMX_S8*)pBuf + (32 - pTmp));
- }
-
- /*
- * Filling Twiddle factors : exp^(-j*2*PI*k/ (N/2) ) ; k=0,1,2,...,3/4(N/2).
- * N/2 point complex FFT is used to compute N point real FFT.
- * The original twiddle table "armSP_FFT_S32TwiddleTable" is of size
- * (MaxSize/8 + 1). Rest of the values i.e., up to MaxSize are calculated
- * using the symmetries of sin and cos.
- * The max size of the twiddle table needed is 3/4(N/2) for a radix-4 stage.
- *
- * W = (-2 * PI) / N
- * N = 1 << order
- * W = -PI >> (order - 1)
- *
- * Note we use S32 twiddle factor table and round the values to 16 bits.
- */
-
- M = Nby2 >> 3;
- diff = 12 - (order - 1);
- step = 1 << diff; /* Step into the twiddle table for the current order */
-
- xS32 = armSP_FFT_S32TwiddleTable[0];
- yS32 = armSP_FFT_S32TwiddleTable[1];
- x = (xS32 + 0x8000) >> 16;
- y = (yS32 + 0x8000) >> 16;
- xNeg = 0x7FFF;
-
- if((order-1) >= 3) {
- /* i = 0 case */
- pTwiddle[0].Re = x;
- pTwiddle[0].Im = y;
- pTwiddle[2*M].Re = -y;
- pTwiddle[2*M].Im = xNeg;
- pTwiddle[4*M].Re = xNeg;
- pTwiddle[4*M].Im = y;
-
- for (i=1; i<=M; i++){
- OMX_S16 x_neg = 0, y_neg = 0;
- j = i * step;
-
- xS32 = armSP_FFT_S32TwiddleTable[2 * j];
- yS32 = armSP_FFT_S32TwiddleTable[2 * j + 1];
- x = (xS32 + 0x8000) >> 16;
- y = (yS32 + 0x8000) >> 16;
- /* |x_neg = -x| doesn't work when x is 0x8000. */
- x_neg = (-(xS32 + 0x8000)) >> 16;
- y_neg = (-(yS32 + 0x8000)) >> 16;
-
- pTwiddle[i].Re = x;
- pTwiddle[i].Im = y;
- pTwiddle[2* M- i].Re = y_neg;
- pTwiddle[2* M- i].Im = x_neg;
- pTwiddle[2* M+ i].Re = y;
- pTwiddle[2* M+ i].Im = x_neg;
- pTwiddle[4* M- i].Re = x_neg;
- pTwiddle[4* M- i].Im = y;
- pTwiddle[4* M+ i].Re = x_neg;
- pTwiddle[4* M+ i].Im = y_neg;
- pTwiddle[6* M- i].Re = y;
- pTwiddle[6* M- i].Im = x;
- }
- }
- else {
- if ((order - 1) == 2) {
- pTwiddle[0].Re = x;
- pTwiddle[0].Im = y;
- pTwiddle[1].Re = -y;
- pTwiddle[1].Im = xNeg;
- pTwiddle[2].Re = xNeg;
- pTwiddle[2].Im = y;
- }
- if ((order-1) == 1) {
- pTwiddle[0].Re = x;
- pTwiddle[0].Im = y;
- }
- }
-
- /*
- * Now fill the last N/4 values : exp^(-j*2*PI*k/N); k=1,3,5,...,N/2-1.
- * These are used for the final twiddle fix-up for converting complex to
- * real FFT.
- */
-
- M = N >> 3;
- diff = 12 - order;
- step = 1 << diff;
-
- pTwiddle1 = pTwiddle + 3 * N / 8;
- pTwiddle4 = pTwiddle1 + (N / 4 - 1);
- pTwiddle3 = pTwiddle1 + N / 8;
- pTwiddle2 = pTwiddle1 + (N / 8 - 1);
-
- xS32 = armSP_FFT_S32TwiddleTable[0];
- yS32 = armSP_FFT_S32TwiddleTable[1];
- x = (xS32 + 0x8000) >> 16;
- y = (yS32 + 0x8000) >> 16;
- xNeg = 0x7FFF;
-
- if((order) >= 3) {
- for (i = 1; i <= M; i += 2 ) {
- OMX_S16 x_neg = 0, y_neg = 0;
-
- j = i*step;
-
- xS32 = armSP_FFT_S32TwiddleTable[2 * j];
- yS32 = armSP_FFT_S32TwiddleTable[2 * j + 1];
- x = (xS32 + 0x8000) >> 16;
- y = (yS32 + 0x8000) >> 16;
- /* |x_neg = -x| doesn't work when x is 0x8000. */
- x_neg = (-(xS32 + 0x8000)) >> 16;
- y_neg = (-(yS32 + 0x8000)) >> 16;
-
- pTwiddle1[0].Re = x;
- pTwiddle1[0].Im = y;
- pTwiddle1 += 1;
- pTwiddle2[0].Re = y_neg;
- pTwiddle2[0].Im = x_neg;
- pTwiddle2 -= 1;
- pTwiddle3[0].Re = y;
- pTwiddle3[0].Im = x_neg;
- pTwiddle3 += 1;
- pTwiddle4[0].Re = x_neg;
- pTwiddle4[0].Im = y;
- pTwiddle4 -= 1;
- }
- }
- else {
- if (order == 2) {
- pTwiddle1[0].Re = -y;
- pTwiddle1[0].Im = xNeg;
- }
- }
-
- /* Update the structure */
- pFFTStruct->N = N;
- pFFTStruct->pTwiddle = pTwiddle;
- pFFTStruct->pBitRev = pBitRev;
- pFFTStruct->pBuf = pBuf;
-
- return OMX_Sts_NoErr;
-}
-/*****************************************************************************
- * END OF FILE
- *****************************************************************************/
-
diff --git a/dl/sp/src/omxSP_FFTInv_CCSToR_S16_Sfs_s.S b/dl/sp/src/omxSP_FFTInv_CCSToR_S16_Sfs_s.S
deleted file mode 100644
index 805623c..0000000
--- a/dl/sp/src/omxSP_FFTInv_CCSToR_S16_Sfs_s.S
+++ /dev/null
@@ -1,397 +0,0 @@
-@
-@ Copyright (c) 2013 The WebRTC project authors. All Rights Reserved.
-@
-@ Use of this source code is governed by a BSD-style license
-@ that can be found in the LICENSE file in the root of the source
-@ tree. An additional intellectual property rights grant can be found
-@ in the file PATENTS. All contributing project authors may
-@ be found in the AUTHORS file in the root of the source tree.
-@
-@ Some code in this file was originally from file
-@ omxSP_FFTInv_CToC_SC16_Sfs_s.S which was licensed as follows.
-@ It has been relicensed with permission from the copyright holders.
-@
-
-@
-@ File Name: omxSP_FFTInv_CToC_SC16_Sfs_s.s
-@ OpenMAX DL: v1.0.2
-@ Last Modified Revision: 6729
-@ Last Modified Date: Tue, 17 Jul 2007
-@
-@ (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
-@
-
-@
-@ Description:
-@ Compute an inverse FFT for a 16-bit real signal, with complex FFT routines.
-@
-
-#include "dl/api/armCOMM_s.h"
-#include "dl/api/omxtypes_s.h"
-
-.extern armSP_FFTInv_CToC_SC16_Sfs_Radix2_fs_OutOfPlace_unsafe
-.extern armSP_FFTInv_CToC_SC16_Radix2_fs_OutOfPlace_unsafe
-.extern armSP_FFTInv_CToC_SC16_Radix4_fs_OutOfPlace_unsafe
-.extern armSP_FFTInv_CToC_SC16_Radix4_ls_OutOfPlace_unsafe
-.extern armSP_FFTInv_CToC_SC16_Radix8_fs_OutOfPlace_unsafe
-.extern armSP_FFTInv_CToC_SC16_Radix4_OutOfPlace_unsafe
-.extern armSP_FFTInv_CToC_SC16_Sfs_Radix4_fs_OutOfPlace_unsafe
-.extern armSP_FFTInv_CToC_SC16_Sfs_Radix4_ls_OutOfPlace_unsafe
-.extern armSP_FFTInv_CToC_SC16_Sfs_Radix8_fs_OutOfPlace_unsafe
-.extern armSP_FFTInv_CToC_SC16_Sfs_Radix4_OutOfPlace_unsafe
-.extern armSP_FFTInv_CToC_SC16_Sfs_Radix2_OutOfPlace_unsafe
-.extern armSP_FFTInv_CToC_SC16_Radix2_OutOfPlace_unsafe
-.extern armSP_FFTInv_CToC_SC16_Sfs_Radix2_ls_OutOfPlace_unsafe
-.extern armSP_FFTInv_CToC_SC16_Radix2_ls_OutOfPlace_unsafe
-.extern armSP_FFTInv_CToC_SC16_Radix2_ps_OutOfPlace_unsafe
-.extern armSP_FFTInv_CToC_SC16_Sfs_Radix2_ps_OutOfPlace_unsafe
-
-@Input Registers
-#define pSrc r0
-#define pDst r1
-#define pFFTSpec r2
-#define scale r3
-
-@ Output registers
-#define result r0
-
-@Local Scratch Registers
-#define argTwiddle r1
-#define argDst r2
-#define argScale r4
-#define pTwiddle r4
-#define tmpOrder r4
-#define pOut r5
-#define subFFTSize r7
-#define subFFTNum r6
-#define N r6
-#define order r14
-#define diff r9
-@ Total num of radix stages to comple the FFT
-#define count r8
-#define x0r r4
-#define x0i r5
-#define diffMinusOne r2
-#define round r3
-#define pOut1 r2
-#define size r7
-#define step r8
-#define step1 r9
-#define twStep r10
-#define pTwiddleTmp r11
-#define argTwiddle1 r12
-#define zero r14
-
-@ Neon registers
-#define dX0 D0.S32
-#define dShift D1.S32
-#define qShift Q0.s16
-#define dX1 D1.S32
-#define dY0 D2.S32
-#define dY1 D3.S32
-#define dX0r D0.S32
-#define dX0i D1.S32
-#define dX1r D2.S32
-#define dX1i D3.S32
-#define dW0r D4.S32
-#define dW0i D5.S32
-#define dW1r D6.S32
-#define dW1i D7.S32
-#define dT0 D8.S32
-#define dT1 D9.S32
-#define dT2 D10.S32
-#define dT3 D11.S32
-#define qT0 Q6.S64
-#define qT1 Q7.S64
-#define qT0s Q6.S16
-#define qT1s Q7.S16
-#define qT2 Q8.S64
-#define qT3 Q9.S64
-#define dY0r D4.S32
-#define dY0i D5.S32
-#define dY1r D6.S32
-#define dY1i D7.S32
-#define dzero D20.S32
-#define dY2 D4.S32
-#define dY3 D5.S32
-#define dW0 D6.S32
-#define dW1 D7.S32
-#define dW0Tmp D10.S32
-#define dW1Neg D11.S32
-
-
-
- @ Allocate stack memory required by the function
- M_ALLOC4 diffOnStack, 4
-
- @ Write function header
- M_START omxSP_FFTInv_CCSToR_S16_Sfs,r11,d15
-
-@ Structure offsets for the FFTSpec
- .set ARMsFFTSpec_N, 0
- .set ARMsFFTSpec_pBitRev, 4
- .set ARMsFFTSpec_pTwiddle, 8
- .set ARMsFFTSpec_pBuf, 12
-
- @ Define stack arguments
-
- @ Read the size from structure and take log
- LDR N, [pFFTSpec, #ARMsFFTSpec_N]
-
- @ Read other structure parameters
- LDR pTwiddle, [pFFTSpec, #ARMsFFTSpec_pTwiddle]
- LDR pOut, [pFFTSpec, #ARMsFFTSpec_pBuf]
-
- @ N=1 Treat seperately
- CMP N,#1
- BGT sizeGreaterThanOne
- VLD1 dX0[0],[pSrc]
- RSB scale,scale,#0 @ To use VRSHL for right shift by a variable
- VMOV dShift[0],scale
- VRSHL dX0,dShift
- VST1 dX0[0],[pDst]
-
- B End
-
-sizeGreaterThanOne:
-
- @ Call the preTwiddle Radix2 stage before doing the complex IFFT
-
- @ The following conditional BL combination would work since
- @ evenOddButterflyLoop in the first call would set Z flag to zero
-
- CMP scale,#0
- BLEQ armSP_FFTInv_CCSToR_S16_preTwiddleRadix2_unsafe
- BLGT armSP_FFTInv_CCSToR_S16_Sfs_preTwiddleRadix2_unsafe
-
-complexIFFT:
-
- ASR N,N,#1 @ N/2 point complex IFFT
- ADD pSrc,pOut,N,LSL #2 @ set pSrc as pOut1
-
- CLZ order,N @ N = 2^order
- RSB order,order,#31
- MOV subFFTSize,#1
-
- ADD scale,scale,order @ FFTInverse has a final scaling factor by N
-
- CMP order,#3
- BGT orderGreaterthan3 @ order > 3
-
- CMP order,#1
- BGE orderGreaterthan0 @ order > 0
- M_STR scale, diffOnStack,LT @ order = 0
- LDRLT x0r,[pSrc]
- STRLT x0r,[pDst]
- MOVLT pSrc,pDst
- BLT FFTEnd
-
-orderGreaterthan0:
- @ set the buffers appropriately for various orders
- CMP order,#2
- MOVNE argDst,pDst
- MOVEQ argDst,pOut
- MOVEQ pOut,pDst @ Pass the first stage destination in RN5
- MOV argTwiddle,pTwiddle
- @ Store the scale factor and scale at the end
- SUB diff,scale,order
- M_STR diff, diffOnStack
- BGE orderGreaterthan1
- BLLT armSP_FFTInv_CToC_SC16_Sfs_Radix2_fs_OutOfPlace_unsafe @ order = 1
- B FFTEnd
-
-
-orderGreaterthan1:
- MOV tmpOrder,order @ tmpOrder = RN 4
- BL armSP_FFTInv_CToC_SC16_Sfs_Radix2_fs_OutOfPlace_unsafe
- CMP tmpOrder,#2
- BLGT armSP_FFTInv_CToC_SC16_Sfs_Radix2_ps_OutOfPlace_unsafe
- BL armSP_FFTInv_CToC_SC16_Sfs_Radix2_ls_OutOfPlace_unsafe
- B FFTEnd
-
-
-
-
-orderGreaterthan3:
- @ check scale = 0 or scale = order
- SUBS diff, scale, order @ scale > order
- MOVGT scale,order
- BGE specialScaleCase @ scale = 0 or scale = order
- CMP scale,#0
- BEQ specialScaleCase
- B generalScaleCase
-
-specialScaleCase: @ scale = 0 or scale = order and order > 3
-
- TST order, #2 @ Set input args to fft stages
- MOVNE argDst,pDst
- MOVEQ argDst,pOut
- MOVEQ pOut,pDst @ Pass the first stage destination in RN5
- MOV argTwiddle,pTwiddle
-
- CMP diff,#0
- M_STR diff, diffOnStack
- BGE scaleEqualsOrder
-
- @check for even or odd order
- @ NOTE: The following combination of BL's would work fine eventhough the first
- @ BL would corrupt the flags. This is because the end of the "grpZeroSetLoop" loop inside
- @ armSP_FFTInv_CToC_SC16_Radix4_fs_OutOfPlace_unsafe sets the Z flag to EQ
-
- TST order,#0x00000001
- BLEQ armSP_FFTInv_CToC_SC16_Radix4_fs_OutOfPlace_unsafe
- BLNE armSP_FFTInv_CToC_SC16_Radix8_fs_OutOfPlace_unsafe
-
- CMP subFFTNum,#4
- BLT FFTEnd
-
-unscaledRadix4Loop:
- BEQ lastStageUnscaledRadix4
- BL armSP_FFTInv_CToC_SC16_Radix4_OutOfPlace_unsafe
- CMP subFFTNum,#4
- B unscaledRadix4Loop
-
-lastStageUnscaledRadix4:
- BL armSP_FFTInv_CToC_SC16_Radix4_ls_OutOfPlace_unsafe
- B FFTEnd
-
-scaleEqualsOrder:
- @check for even or odd order
- @ NOTE: The following combination of BL's would work fine eventhough the first
- @ BL would corrupt the flags. This is because the end of the "grpZeroSetLoop" loop inside
- @ armSP_FFTInv_CToC_SC32_Radix4_fs_OutOfPlace_unsafe sets the Z flag to EQ
-
- TST order,#0x00000001
- BLEQ armSP_FFTInv_CToC_SC16_Sfs_Radix4_fs_OutOfPlace_unsafe
- BLNE armSP_FFTInv_CToC_SC16_Sfs_Radix8_fs_OutOfPlace_unsafe
-
- CMP subFFTNum,#4
- BLT FFTEnd
-
-scaledRadix4Loop:
- BEQ lastStageScaledRadix4
- BL armSP_FFTInv_CToC_SC16_Sfs_Radix4_OutOfPlace_unsafe
- CMP subFFTNum,#4
- B scaledRadix4Loop
-
-lastStageScaledRadix4:
- BL armSP_FFTInv_CToC_SC16_Sfs_Radix4_ls_OutOfPlace_unsafe
- B FFTEnd
-
-
-
-generalScaleCase: @ 0 < scale < order and order > 3
- @ Determine the correct destination buffer
- SUB diff,order,scale
- TST diff,#0x01
- ADDEQ count,scale,diff,LSR #1 @ count = scale + (order - scale)/2
- MOVNE count,order
- TST count,#0x01 @ Is count even or odd ?
-
- MOVNE argDst,pDst @ Set input args to fft stages
- MOVEQ argDst,pOut
- MOVEQ pOut,pDst @ Pass the first stage destination in RN5
- MOV argTwiddle,pTwiddle
-
- CMP diff,#1
- M_STR diff, diffOnStack
- BEQ scaleps @ scaling including a radix2_ps stage
-
- MOV argScale,scale @ Put scale in RN4 so as to save and restore
- BL armSP_FFTInv_CToC_SC16_Sfs_Radix2_fs_OutOfPlace_unsafe @ scaled first stage
- SUBS argScale,argScale,#1
-
-scaledRadix2Loop:
- BLGT armSP_FFTInv_CToC_SC16_Sfs_Radix2_OutOfPlace_unsafe
- SUBS argScale,argScale,#1 @ save and restore scale (RN4) in the scaled stages
- BGT scaledRadix2Loop
- B outScale
-
-scaleps:
- SUB argScale,scale,#1 @ order>3 and diff=1 => scale >= 3
- BL armSP_FFTInv_CToC_SC16_Sfs_Radix2_fs_OutOfPlace_unsafe @ scaled first stage
- SUBS argScale,argScale,#1
-
-scaledRadix2psLoop:
- BEQ scaledRadix2psStage
- BLGT armSP_FFTInv_CToC_SC16_Sfs_Radix2_OutOfPlace_unsafe
- SUBS argScale,argScale,#1 @ save and restore scale (RN4) in the scaled stages
- BGE scaledRadix2psLoop
-
-scaledRadix2psStage:
- BL armSP_FFTInv_CToC_SC16_Sfs_Radix2_ps_OutOfPlace_unsafe
- B generalLastStageUnscaledRadix2
-
-
-outScale:
- M_LDR diff, diffOnStack
- @check for even or odd order
- TST diff,#0x00000001
- BEQ generalUnscaledRadix4Loop
- B unscaledRadix2Loop
-
-generalUnscaledRadix4Loop:
- CMP subFFTNum,#4
- BEQ generalLastStageUnscaledRadix4
- BL armSP_FFTInv_CToC_SC16_Radix4_OutOfPlace_unsafe
- B generalUnscaledRadix4Loop
-
-generalLastStageUnscaledRadix4:
- BL armSP_FFTInv_CToC_SC16_Radix4_ls_OutOfPlace_unsafe
- B End
-
-unscaledRadix2Loop:
- CMP subFFTNum,#4
- BEQ generalLastTwoStagesUnscaledRadix2
- BL armSP_FFTInv_CToC_SC16_Radix2_OutOfPlace_unsafe
- B unscaledRadix2Loop
-
-generalLastTwoStagesUnscaledRadix2:
- BL armSP_FFTInv_CToC_SC16_Radix2_ps_OutOfPlace_unsafe
-generalLastStageUnscaledRadix2:
- BL armSP_FFTInv_CToC_SC16_Radix2_ls_OutOfPlace_unsafe
- B End
-
-
-FFTEnd: @ Does only the scaling
-
- M_LDR diff, diffOnStack
- CMP diff,#0
- BLE End
-
- RSB diff,diff,#0 @ to use VRSHL for right shift by a variable
- VDUP qShift,diff
-
- @ Use parallel loads for bigger FFT size.
- CMP subFFTSize, #8
- BLT scaleLessFFTData
-
-scaleFFTData:
- VLD1 {qT0s, qT1s},[pSrc:256] @ pSrc contains pDst pointer
- SUBS subFFTSize,subFFTSize,#8
- VSHL qT0s,qShift
- VSHL qT1s,qShift
- VST1 {qT0s, qT1s},[pSrc:256]!
- BGT scaleFFTData
- B End
-
-scaleLessFFTData: @ N = subFFTSize ; dataptr = pDst ; scale = diff
- VLD1 {dX0[0]},[pSrc] @ pSrc contains pDst pointer
- SUBS subFFTSize,subFFTSize,#1
- VRSHL dX0,dShift
- VST1 {dX0[0]},[pSrc]!
- BGT scaleLessFFTData
-
-End:
- @ Set return value
- MOV result, #OMX_Sts_NoErr
-
- @ Write function tail
- M_END
-
-
-
-
-
-
- .END
diff --git a/dl/sp/src/test/test_fft.gyp b/dl/sp/src/test/test_fft.gyp
index 3290550..99b3774 100644
--- a/dl/sp/src/test/test_fft.gyp
+++ b/dl/sp/src/test/test_fft.gyp
@@ -67,19 +67,11 @@
],
},
{
- # Test real 16-bit fixed-point FFT implemented with S32 routines.
- 'target_name': 'test_rfft16_s32',
+ # Test real 16-bit fixed-point FFT
+ 'target_name': 'test_rfft16',
'type': 'executable',
'sources': [
- 'test_rfft16_s32.c',
- ],
- },
- {
- # Test real 16-bit fixed-point FFT implemented with S16 routines.
- 'target_name': 'test_rfft16_s16',
- 'type': 'executable',
- 'sources': [
- 'test_rfft16_s16.c',
+ 'test_rfft16.c',
],
},
{
@@ -115,8 +107,7 @@
'test_fft32',
'test_float_fft',
'test_float_rfft',
- 'test_rfft16_s32',
- 'test_rfft16_s16',
+ 'test_rfft16',
'test_rfft32',
'test_fft_time',
],
diff --git a/dl/sp/src/test/test_fft16.c b/dl/sp/src/test/test_fft16.c
index bedf278..081bf23 100644
--- a/dl/sp/src/test/test_fft16.c
+++ b/dl/sp/src/test/test_fft16.c
@@ -24,7 +24,7 @@
#define MAX_FFT_ORDER 12
int verbose = 0;
-int signal_value = 32767;
+int signal_value = 1024;
int scale_factor = 0;
struct KnownTestFailures known_failures[] = {
diff --git a/dl/sp/src/test/test_fft_time.c b/dl/sp/src/test/test_fft_time.c
index 42431bb..a401594 100644
--- a/dl/sp/src/test/test_fft_time.c
+++ b/dl/sp/src/test/test_fft_time.c
@@ -20,14 +20,9 @@
#include "dl/sp/src/test/aligned_ptr.h"
#include "dl/sp/src/test/gensig.h"
-#define MAX_FFT_ORDER TWIDDLE_TABLE_ORDER
+#define MAX_FFT_ORDER TWIDDLE_TABLE_ORDER
#define MAX_FFT_ORDER_FIXED_POINT 12
-typedef enum {
- rfft16_s16,
- rfft16_s16s32,
-} rfft16_type;
-
void TimeOneFloatFFT(int count, int fft_log_size, float signal_value,
int signal_type);
void TimeFloatFFT(int count, float signal_value, int signal_type);
@@ -38,7 +33,7 @@
int signal_type);
void TimeSC32FFT(int count, float signal_value, int signal_type);
void TimeOneRFFT16(int count, int fft_log_size, float signal_value,
- int signal_type, rfft16_type rfft16_type_selection);
+ int signal_type);
void TimeRFFT16(int count, float signal_value, int signal_type);
void TimeOneRFFT32(int count, int fft_log_size, float signal_value,
int signal_type);
@@ -104,7 +99,7 @@
void main(int argc, char* argv[]) {
int fft_log_size = 4;
- float signal_value = 32767;
+ float signal_value = 1024;
int signal_type = 0;
int test_mode = 1;
int count = 100;
@@ -195,8 +190,7 @@
TimeOneSC32FFT(count, fft_log_size, signal_value, signal_type);
break;
case 3:
- TimeOneRFFT16(count, fft_log_size, signal_value, signal_type, rfft16_s16s32);
- TimeOneRFFT16(count, fft_log_size, signal_value, signal_type, rfft16_s16);
+ TimeOneRFFT16(count, fft_log_size, signal_value, signal_type);
break;
case 4:
TimeOneRFFT32(count, fft_log_size, signal_value, signal_type);
@@ -672,12 +666,8 @@
free(true_fft);
}
-/* Argument rfft16_type_selection:
- * rfft16_s16s32: Calculate RFFT16 with 32 bit complex FFT;
- * otherwise: Calculate RFFT16 with 16 bit complex FFT.
- */
void TimeOneRFFT16(int count, int fft_log_size, float signal_value,
- int signal_type, rfft16_type rfft16_type_selection) {
+ int signal_type) {
OMX_S16* x;
OMX_S32* y;
OMX_S16* z;
@@ -699,8 +689,8 @@
OMX_INT n, fft_spec_buffer_size;
OMXResult status;
- OMXFFTSpec_R_S16 * fft_fwd_spec = NULL;
- OMXFFTSpec_R_S16 * fft_inv_spec = NULL;
+ OMXFFTSpec_R_S16S32 * fft_fwd_spec = NULL;
+ OMXFFTSpec_R_S16S32 * fft_inv_spec = NULL;
int fft_size;
struct timeval start_time;
struct timeval end_time;
@@ -738,20 +728,13 @@
GenerateRealFloatSignal(xr, (OMX_FC32*) yrTrue, fft_size, signal_type,
signal_value);
- if(rfft16_type_selection == rfft16_s16s32) {
- status = omxSP_FFTGetBufSize_R_S16S32(fft_log_size, &fft_spec_buffer_size);
- fft_fwd_spec = malloc(fft_spec_buffer_size);
- fft_inv_spec = malloc(fft_spec_buffer_size);
- status = omxSP_FFTInit_R_S16S32(fft_fwd_spec, fft_log_size);
- status = omxSP_FFTInit_R_S16S32(fft_inv_spec, fft_log_size);
- }
- else {
- status = omxSP_FFTGetBufSize_R_S16(fft_log_size, &fft_spec_buffer_size);
- fft_fwd_spec = malloc(fft_spec_buffer_size);
- fft_inv_spec = malloc(fft_spec_buffer_size);
- status = omxSP_FFTInit_R_S16(fft_fwd_spec, fft_log_size);
- status = omxSP_FFTInit_R_S16(fft_inv_spec, fft_log_size);
- }
+ status = omxSP_FFTGetBufSize_R_S16S32(fft_log_size, &fft_spec_buffer_size);
+
+ fft_fwd_spec = (OMXFFTSpec_R_S16S32*) malloc(fft_spec_buffer_size);
+ fft_inv_spec = (OMXFFTSpec_R_S16S32*) malloc(fft_spec_buffer_size);
+ status = omxSP_FFTInit_R_S16S32(fft_fwd_spec, fft_log_size);
+
+ status = omxSP_FFTInit_R_S16S32(fft_inv_spec, fft_log_size);
if (do_forward_test) {
if (include_conversion) {
@@ -774,14 +757,9 @@
temp16[n] = factor * xr[n];
}
- if(rfft16_type_selection == rfft16_s16s32) {
- status = omxSP_FFTFwd_RToCCS_S16S32_Sfs(x, y,
- (OMXFFTSpec_R_S16S32*)fft_fwd_spec, (OMX_INT) scaleFactor);
- }
- else {
- status = omxSP_FFTFwd_RToCCS_S16_Sfs(x, y,
- (OMXFFTSpec_R_S16*)fft_fwd_spec, (OMX_INT) scaleFactor);
- }
+ status = omxSP_FFTFwd_RToCCS_S16S32_Sfs(x, y, fft_fwd_spec,
+ (OMX_INT) scaleFactor);
+
/*
* Now spend some time converting the fixed-point FFT back to float.
*/
@@ -796,26 +774,15 @@
GetUserTime(&start_time);
for (n = 0; n < count; ++n) {
- if(rfft16_type_selection == rfft16_s16s32) {
- status = omxSP_FFTFwd_RToCCS_S16S32_Sfs(x, y,
- (OMXFFTSpec_R_S16S32*)fft_fwd_spec, (OMX_INT) scaleFactor);
- }
- else {
- status = omxSP_FFTFwd_RToCCS_S16_Sfs(x, y,
- (OMXFFTSpec_R_S16*)fft_fwd_spec, (OMX_INT) scaleFactor);
- }
+ status = omxSP_FFTFwd_RToCCS_S16S32_Sfs(x, y, fft_fwd_spec,
+ (OMX_INT) scaleFactor);
}
GetUserTime(&end_time);
}
elapsed_time = TimeDifference(&start_time, &end_time);
- if(rfft16_type_selection == rfft16_s16s32) {
- PrintResult("Forward RFFT16 (with rfft16_s16s32)", fft_log_size, elapsed_time, count);
- }
- else {
- PrintResult("Forward RFFT16 (with rfft16_s16)", fft_log_size, elapsed_time, count);
- }
+ PrintResult("Forward RFFT16", fft_log_size, elapsed_time, count);
}
if (do_inverse_test) {
@@ -837,14 +804,9 @@
temp32[n] = factor * yrTrue[n];
}
- if(rfft16_type_selection == rfft16_s16s32) {
- status = omxSP_FFTInv_CCSToR_S32S16_Sfs(y, z,
- (OMXFFTSpec_R_S16S32*)fft_inv_spec, 0);
- }
- else {
- status = omxSP_FFTInv_CCSToR_S16_Sfs(y, z,
- (OMXFFTSpec_R_S16*)fft_inv_spec, 0);
- }
+ status = omxSP_FFTFwd_RToCCS_S16S32_Sfs(x, y, fft_fwd_spec,
+ (OMX_INT) scaleFactor);
+
/*
* Spend some time converting the result back to float
*/
@@ -857,26 +819,14 @@
} else {
GetUserTime(&start_time);
for (n = 0; n < count; ++n) {
- if(rfft16_type_selection == rfft16_s16s32) {
- status = omxSP_FFTInv_CCSToR_S32S16_Sfs(y, z,
- (OMXFFTSpec_R_S16S32*)fft_inv_spec, 0);
- }
- else {
- status = omxSP_FFTInv_CCSToR_S16_Sfs(y, z,
- (OMXFFTSpec_R_S16*)fft_inv_spec, 0);
- }
+ status = omxSP_FFTInv_CCSToR_S32S16_Sfs(y, z, fft_inv_spec, 0);
}
GetUserTime(&end_time);
}
elapsed_time = TimeDifference(&start_time, &end_time);
- if(rfft16_type_selection == rfft16_s16s32) {
- PrintResult("Inverse RFFT16 (with rfft16_s16s32)", fft_log_size, elapsed_time, count);
- }
- else {
- PrintResult("Inverse RFFT16 (with rfft16_s16)", fft_log_size, elapsed_time, count);
- }
+ PrintResult("Inverse RFFT16", fft_log_size, elapsed_time, count);
}
FreeAlignedPointer(x_aligned);
@@ -893,18 +843,13 @@
int k;
int max_order = (max_fft_order > MAX_FFT_ORDER_FIXED_POINT)
? MAX_FFT_ORDER_FIXED_POINT : max_fft_order;
- if (verbose == 0)
- printf("RFFT16 (with rfft16_s16s32)\n");
- for (k = min_fft_order; k <= max_order; ++k) {
- int testCount = ComputeCount(count, k);
- TimeOneRFFT16(testCount, k, signal_value, signal_type, 1);
- }
if (verbose == 0)
- printf("RFFT16 (with rfft16_s16)\n");
+ printf("RFFT16\n");
+
for (k = min_fft_order; k <= max_order; ++k) {
int testCount = ComputeCount(count, k);
- TimeOneRFFT16(testCount, k, signal_value, signal_type, 0);
+ TimeOneRFFT16(testCount, k, signal_value, signal_type);
}
}
diff --git a/dl/sp/src/test/test_float_rfft.c b/dl/sp/src/test/test_float_rfft.c
index 20b5e33..cb3262f 100644
--- a/dl/sp/src/test/test_float_rfft.c
+++ b/dl/sp/src/test/test_float_rfft.c
@@ -36,6 +36,8 @@
SetDefaultOptions(&options, 1, MAX_FFT_ORDER);
+ options.signal_value_ = 1024;
+
ProcessCommandLine(&options, argc, argv,
"Test forward and inverse real floating-point FFT\n");
diff --git a/dl/sp/src/test/test_rfft16_s32.c b/dl/sp/src/test/test_rfft16.c
similarity index 97%
rename from dl/sp/src/test/test_rfft16_s32.c
rename to dl/sp/src/test/test_rfft16.c
index f0e86e4..171ccdc 100644
--- a/dl/sp/src/test/test_rfft16_s32.c
+++ b/dl/sp/src/test/test_rfft16.c
@@ -33,8 +33,8 @@
SetDefaultOptions(&options, 1, MAX_FFT_ORDER);
- ProcessCommandLine(&options, argc, argv, "Test forward and inverse real 16 \
- -bit fixed-point FFT, with 32-bit complex FFT routines\n");
+ ProcessCommandLine(&options, argc, argv,
+ "Test forward and inverse real 16-bit fixed-point FFT\n");
verbose = options.verbose_;
signal_value = options.signal_value_;
@@ -54,6 +54,7 @@
info.known_failures_ = 0;
info.forward_threshold_ = 90.12;
info.inverse_threshold_ = 89.28;
+ signal_value = 32767;
RunAllTests(&info);
} else {
TestFFT(options.fft_log_size_,
diff --git a/dl/sp/src/test/test_rfft16_s16.c b/dl/sp/src/test/test_rfft16_s16.c
deleted file mode 100644
index 9a9bc12..0000000
--- a/dl/sp/src/test/test_rfft16_s16.c
+++ /dev/null
@@ -1,302 +0,0 @@
-/*
- * Copyright (c) 2013 The WebRTC project authors. All Rights Reserved.
- *
- * Use of this source code is governed by a BSD-style license
- * that can be found in the LICENSE file in the root of the source
- * tree. An additional intellectual property rights grant can be found
- * in the file PATENTS. All contributing project authors may
- * be found in the AUTHORS file in the root of the source tree.
- */
-
-#include <math.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <time.h>
-#include <unistd.h>
-
-#include "dl/sp/api/armSP.h"
-#include "dl/sp/api/omxSP.h"
-#include "dl/sp/src/test/aligned_ptr.h"
-#include "dl/sp/src/test/compare.h"
-#include "dl/sp/src/test/gensig.h"
-#include "dl/sp/src/test/test_util.h"
-
-#define MAX_FFT_ORDER 12
-
-int verbose = 0;
-int signal_value = 32767;
-int scale_factor = 0;
-
-void TestFFT(int fftLogSize, int scale_factor, int signalType);
-
-void main(int argc, char* argv[]) {
- struct Options options;
-
- SetDefaultOptions(&options, 1, MAX_FFT_ORDER);
-
- options.signal_value_ = signal_value;
- options.scale_factor_ = scale_factor;
-
- ProcessCommandLine(&options, argc, argv, "Test forward and inverse real 16 \
- -bit fixed-point FFT, with 16-bit complex FFT routines\n");
-
- verbose = options.verbose_;
- signal_value = options.signal_value_;
- scale_factor = options.scale_factor_;
-
- if (verbose > 255)
- DumpOptions(stderr, &options);
-
- if (options.test_mode_) {
- struct TestInfo info;
-
- info.real_only_ = options.real_only_;
- info.max_fft_order_ = options.max_fft_order_;
- info.min_fft_order_ = options.min_fft_order_;
- info.do_forward_tests_ = options.do_forward_tests_;
- info.do_inverse_tests_ = options.do_inverse_tests_;
- /* No known failures */
- info.known_failures_ = 0;
- info.forward_threshold_ = 45;
- info.inverse_threshold_ = 14;
-
- RunAllTests(&info);
- } else {
- TestFFT(options.fft_log_size_,
- options.signal_type_,
- options.scale_factor_);
- }
-}
-
-void GenerateSignal(struct ComplexFloat* fft,
- float* x_true, int size, int sigtype) {
- int k;
- struct ComplexFloat *test_signal;
-
- test_signal = (struct ComplexFloat*) malloc(sizeof(*test_signal) * size);
- GenerateTestSignalAndFFT(test_signal, fft, size, sigtype, signal_value, 1);
-
- /*
- * Convert the complex result to what we want
- */
-
- for (k = 0; k < size; ++k) {
- x_true[k] = test_signal[k].Re;
- }
-
- free(test_signal);
-}
-
-void TestFFT(int fft_log_size, int signal_type, int scale_factor) {
- struct SnrResult snr;
-
- RunOneForwardTest(fft_log_size, signal_type, signal_value, &snr);
- printf("Forward float FFT\n");
- printf("SNR: real part %f dB\n", snr.real_snr_);
- printf(" imag part %f dB\n", snr.imag_snr_);
- printf(" complex part %f dB\n", snr.complex_snr_);
-
- RunOneInverseTest(fft_log_size, signal_type, signal_value, &snr);
- printf("Inverse float FFT\n");
- printf("SNR: %f dB\n", snr.real_snr_);
-}
-
-float RunOneForwardTest(int fft_log_size, int signal_type,
- float unused_signal_value,
- struct SnrResult* snr) {
- OMX_S16* x;
- OMX_SC16* y;
-
- struct AlignedPtr* x_aligned;
- struct AlignedPtr* y_aligned;
-
- float* x_true;
- struct ComplexFloat* y_true;
- OMX_SC16* y_scaled;
-
- OMX_INT n, fft_spec_buffer_size;
- OMXResult status;
- OMXFFTSpec_R_S16 * fft_fwd_spec = NULL;
- int fft_size;
-
- /*
- * To get good FFT results, set the forward FFT scale factor
- * to be the same as the order.
- */
- scale_factor = fft_log_size;
-
- fft_size = 1 << fft_log_size;
-
- status = omxSP_FFTGetBufSize_R_S16(fft_log_size, &fft_spec_buffer_size);
- if (verbose > 63) {
- printf("fft_spec_buffer_size = %d\n", fft_spec_buffer_size);
- }
-
- fft_fwd_spec = (OMXFFTSpec_R_S16*) malloc(fft_spec_buffer_size);
- status = omxSP_FFTInit_R_S16(fft_fwd_spec, fft_log_size);
- if (status) {
- fprintf(stderr, "Failed to init forward FFT: status = %d\n", status);
- exit(1);
- }
-
- x_aligned = AllocAlignedPointer(32, sizeof(*x) * fft_size);
- y_aligned = AllocAlignedPointer(32, sizeof(*y) * (fft_size + 2));
-
- x = x_aligned->aligned_pointer_;
- y = y_aligned->aligned_pointer_;
-
- x_true = (float*) malloc(sizeof(*x_true) * fft_size);
- y_true = (struct ComplexFloat*) malloc(sizeof(*y_true) * (fft_size / 2 + 1));
- y_scaled = (OMX_SC16*) malloc(sizeof(*y_true) * (fft_size / 2 + 1));
-
- GenerateSignal(y_true, x_true, fft_size, signal_type);
- for (n = 0; n < fft_size; ++n) {
- x[n] = 0.5 + x_true[n];
- }
-
- {
- float scale = 1 << fft_log_size;
-
- for (n = 0; n < fft_size; ++n) {
- y_scaled[n].Re = 0.5 + y_true[n].Re / scale;
- y_scaled[n].Im = 0.5 + y_true[n].Im / scale;
- }
- }
-
- if (verbose > 63) {
- printf("Signal\n");
- DumpArrayReal16("x", fft_size, x);
-
- printf("Expected FFT output\n");
- DumpArrayComplex16("y", fft_size / 2 + 1, y_scaled);
- }
-
- status = omxSP_FFTFwd_RToCCS_S16_Sfs(x, (OMX_S16*) y, fft_fwd_spec, scale_factor);
- if (status) {
- fprintf(stderr, "Forward FFT failed: status = %d\n", status);
- exit(1);
- }
-
- if (verbose > 63) {
- printf("FFT Output\n");
- DumpArrayComplex16("y", fft_size / 2 + 1, y);
- }
-
- CompareComplex16(snr, y, y_scaled, fft_size / 2 + 1);
-
- FreeAlignedPointer(x_aligned);
- FreeAlignedPointer(y_aligned);
- free(fft_fwd_spec);
-
- return snr->complex_snr_;
-}
-
-float RunOneInverseTest(int fft_log_size, int signal_type,
- float unused_signal_value,
- struct SnrResult* snr) {
- OMX_S16* x_scaled;
- OMX_S16* z;
- OMX_SC16* y;
- OMX_SC16* y_scaled;
-
- struct AlignedPtr* y_aligned;
- struct AlignedPtr* z_aligned;
-
- float* x_true;
- struct ComplexFloat* y_true;
-
- OMX_INT n, fft_spec_buffer_size;
- OMXResult status;
- OMXFFTSpec_R_S16 * fft_inv_spec = NULL;
- int fft_size;
-
- fft_size = 1 << fft_log_size;
-
- status = omxSP_FFTGetBufSize_R_S16(fft_log_size, &fft_spec_buffer_size);
- if (verbose > 3) {
- printf("fft_spec_buffer_size = %d\n", fft_spec_buffer_size);
- }
-
- fft_inv_spec = (OMXFFTSpec_R_S16*)malloc(fft_spec_buffer_size);
- status = omxSP_FFTInit_R_S16(fft_inv_spec, fft_log_size);
- if (status) {
- fprintf(stderr, "Failed to init backward FFT: status = %d\n", status);
- exit(1);
- }
-
- y_aligned = AllocAlignedPointer(32, sizeof(*y) * (fft_size / 2 + 1));
- z_aligned = AllocAlignedPointer(32, sizeof(*z) * fft_size);
-
- x_true = (float*) malloc(sizeof(*x_true) * fft_size);
- x_scaled = (OMX_S16*) malloc(sizeof(*x_scaled) * fft_size);
- y_true = (struct ComplexFloat*) malloc(sizeof(*y_true) * fft_size);
- y_scaled = y_aligned->aligned_pointer_;
- z = z_aligned->aligned_pointer_;
-
- GenerateSignal(y_true, x_true, fft_size, signal_type);
-
- {
- /*
- * To get max accuracy, scale the input to the inverse FFT up
- * to use as many bits as we can.
- */
- float scale = 1;
- float max = 0;
-
- for (n = 0; n < fft_size / 2 + 1; ++n) {
- float val;
- val = fabs(y_true[n].Re);
- if (val > max) {
- max = val;
- }
- val = fabs(y_true[n].Im);
- if (val > max) {
- max = val;
- }
- }
-
- scale = 16384 / max;
- if (verbose > 63)
- printf("Inverse FFT input scaled factor %g\n", scale);
-
- /*
- * Scale both the true FFT signal and the input so we can
- * compare them correctly later
- */
- for (n = 0; n < fft_size / 2 + 1; ++n) {
- y_scaled[n].Re = (OMX_S16)(0.5 + y_true[n].Re * scale);
- y_scaled[n].Im = (OMX_S16)(0.5 + y_true[n].Im * scale);
- }
- for (n = 0; n < fft_size; ++n) {
- x_scaled[n] = 0.5 + x_true[n] * scale;
- }
- }
-
-
- if (verbose > 63) {
- printf("Inverse FFT Input Signal\n");
- DumpArrayComplex16("y", fft_size / 2 + 1, y_scaled);
-
- printf("Expected Inverse FFT output\n");
- DumpArrayReal16("x", fft_size, x_scaled);
- }
-
- status = omxSP_FFTInv_CCSToR_S16_Sfs((OMX_S32*) y_scaled, z, fft_inv_spec, 0);
- if (status) {
- fprintf(stderr, "Inverse FFT failed: status = %d\n", status);
- exit(1);
- }
-
- if (verbose > 63) {
- printf("Actual Inverse FFT Output\n");
- DumpArrayReal16("z", fft_size, z);
- }
-
- CompareReal16(snr, z, x_scaled, fft_size);
-
- FreeAlignedPointer(y_aligned);
- FreeAlignedPointer(z_aligned);
- free(fft_inv_spec);
-
- return snr->real_snr_;
-}
diff --git a/dl/sp/src/test/test_util.c b/dl/sp/src/test/test_util.c
index 69830b6..88d697b 100644
--- a/dl/sp/src/test/test_util.c
+++ b/dl/sp/src/test/test_util.c
@@ -97,7 +97,7 @@
options->fft_log_size_ = 4;
options->scale_factor_ = 0;
options->signal_type_ = 0;
- options->signal_value_ = 32767;
+ options->signal_value_ = 1024;
options->signal_value_given_ = 0;
}
@@ -382,7 +382,7 @@
const OMX_SC16* array) {
int n;
- printf("%4s\t%10s.re[n]\t%10s.im[n]\n", "n", array_name, array_name);
+ printf("%4s\t%10s.re[n]\t%10s.im[n]\n", "n", array_name);
for (n = 0; n < count; ++n) {
printf("%4d\t%16d\t%16d\n", n, array[n].Re, array[n].Im);
}