blob: eec05e9a9b5a53740967d02f701531f44ec40272 [file] [log] [blame]
/*
* Copyright (c) 2014 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include <arm_neon.h>
#include "dl/api/omxtypes.h"
#include "dl/sp/api/armSP.h"
#include "dl/sp/api/omxSP.h"
extern void armSP_FFTInv_CToC_FC32_Radix2_fs_OutOfPlace(
const OMX_FC32* pSrc,
OMX_FC32* pDst,
OMX_FC32* pTwiddle,
long* subFFTNum,
long* subFFTSize);
extern void armSP_FFTInv_CToC_FC32_Radix2_ls_OutOfPlace(
const OMX_FC32* pSrc,
OMX_FC32* pDst,
OMX_FC32* pTwiddle,
long* subFFTNum,
long* subFFTSize);
extern void armSP_FFTInv_CToC_FC32_Radix2_OutOfPlace(
const OMX_FC32* pSrc,
OMX_FC32* pDst,
OMX_FC32* pTwiddle,
long* subFFTNum,
long* subFFTSize);
extern void armSP_FFTInv_CToC_FC32_Radix4_fs_OutOfPlace(
const OMX_FC32* pSrc,
OMX_FC32* pDst,
OMX_FC32* pTwiddle,
long* subFFTNum,
long* subFFTSize);
extern void armSP_FFTInv_CToC_FC32_Radix4_OutOfPlace(
const OMX_FC32* pSrc,
OMX_FC32* pDst,
OMX_FC32* pTwiddle,
long* subFFTNum,
long* subFFTSize);
extern void armSP_FFTInv_CToC_FC32_Radix4_ls_OutOfPlace(
const OMX_FC32* pSrc,
OMX_FC32* pDst,
OMX_FC32* pTwiddle,
long* subFFTNum,
long* subFFTSize);
extern void armSP_FFTInv_CToC_FC32_Radix8_fs_OutOfPlace(
const OMX_FC32* pSrc,
OMX_FC32* pDst,
OMX_FC32* pTwiddle,
long* subFFTNum,
long* subFFTSize);
/*
* Scale FFT data by 1/|length|. |length| must be a power of two
*/
static inline ScaleFFTData(OMX_FC32* fftData, unsigned length) {
float32_t* data = (float32_t*)fftData;
float32_t scale = 1.0f / length;
/*
* Do two complex elements at a time because |length| is always
* greater than or equal to 2 (order >= 1)
*/
do {
float32x4_t x = vld1q_f32(data);
length -= 2;
x = vmulq_n_f32(x, scale);
vst1q_f32(data, x);
data += 4;
} while (length > 0);
}
/**
* Function: omxSP_FFTInv_CToC_FC32
*
* Description:
* These functions compute an inverse FFT for a complex signal of
* length of 2^order, where 0 <= order <= 15. Transform length is
* determined by the specification structure, which must be
* initialized prior to calling the FFT function using the appropriate
* helper, i.e., <FFTInit_C_FC32>. The relationship between the input
* and output sequences can be expressed in terms of the IDFT, i.e.:
*
* x[n] = SUM[k=0,...,N-1] X[k].e^(jnk.2.pi/N)
* n=0,1,2,...N-1
* N=2^order.
*
* Input Arguments:
* pSrc - pointer to the complex-valued input signal, of length 2^order ;
* must be aligned on a 32-byte boundary.
* pFFTSpec - pointer to the preallocated and initialized specification
* structure
*
* Output Arguments:
* order
* pDst - pointer to the complex-valued output signal, of length 2^order;
* must be aligned on a 32-byte boundary.
*
* Return Value:
*
* OMX_Sts_NoErr - no error
* OMX_Sts_BadArgErr - returned if one or more of the following conditions
* is true:
* - one or more of the following pointers is NULL: pSrc, pDst, or
* pFFTSpec.
* - pSrc or pDst is not 32-byte aligned
*
*/
OMXResult omxSP_FFTInv_CToC_FC32_Sfs(const OMX_FC32* pSrc,
OMX_FC32* pDst,
const OMXFFTSpec_C_FC32* pFFTSpec) {
ARMsFFTSpec_FC32* spec = (ARMsFFTSpec_FC32*)pFFTSpec;
int order;
long subFFTSize;
long subFFTNum;
OMX_FC32* pTwiddle;
OMX_FC32* pOut;
/*
* Check args are not NULL and the source and destination pointers
* are properly aligned.
*/
if (!validateParametersFC32(pSrc, pDst, spec))
return OMX_Sts_BadArgErr;
order = fastlog2(spec->N);
subFFTSize = 1;
subFFTNum = spec->N;
pTwiddle = spec->pTwiddle;
pOut = spec->pBuf;
if (order > 3) {
OMX_FC32* argDst;
/*
* Set up argDst and pOut appropriately so that pOut = pDst for
* the very last FFT stage.
*/
if ((order & 2) == 0) {
argDst = pOut;
pOut = pDst;
} else {
argDst = pDst;
}
/*
* Odd order uses a radix 8 first stage; even order, a radix 4
* first stage.
*/
if (order & 1) {
armSP_FFTInv_CToC_FC32_Radix8_fs_OutOfPlace(
pSrc, argDst, pTwiddle, &subFFTNum, &subFFTSize);
} else {
armSP_FFTInv_CToC_FC32_Radix4_fs_OutOfPlace(
pSrc, argDst, pTwiddle, &subFFTNum, &subFFTSize);
}
/*
* Now use radix 4 stages to finish rest of the FFT
*/
if (subFFTNum >= 4) {
while (subFFTNum > 4) {
OMX_FC32* tmp;
armSP_FFTInv_CToC_FC32_Radix4_OutOfPlace(
argDst, pOut, pTwiddle, &subFFTNum, &subFFTSize);
/*
* Swap argDst and pOut
*/
tmp = pOut;
pOut = argDst;
argDst = tmp;
}
armSP_FFTInv_CToC_FC32_Radix4_ls_OutOfPlace(
argDst, pOut, pTwiddle, &subFFTNum, &subFFTSize);
}
} else if (order == 3) {
armSP_FFTInv_CToC_FC32_Radix2_fs_OutOfPlace(
pSrc, pDst, pTwiddle, &subFFTNum, &subFFTSize);
armSP_FFTInv_CToC_FC32_Radix2_OutOfPlace(
pDst, pOut, pTwiddle, &subFFTNum, &subFFTSize);
armSP_FFTInv_CToC_FC32_Radix2_ls_OutOfPlace(
pOut, pDst, pTwiddle, &subFFTNum, &subFFTSize);
} else if (order == 2) {
armSP_FFTInv_CToC_FC32_Radix2_fs_OutOfPlace(
pSrc, pOut, pTwiddle, &subFFTNum, &subFFTSize);
armSP_FFTInv_CToC_FC32_Radix2_ls_OutOfPlace(
pOut, pDst, pTwiddle, &subFFTNum, &subFFTSize);
} else {
/* Order = 1 */
armSP_FFTInv_CToC_FC32_Radix2_fs_OutOfPlace(
pSrc, pDst, pTwiddle, &subFFTNum, &subFFTSize);
}
ScaleFFTData(pDst, spec->N);
return OMX_Sts_NoErr;
}