blob: 771c6714aa08d0af4c05678757421814f691f7e3 [file] [log] [blame]
/*
* Copyright (c) 2014 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include <arm_neon.h>
#include "dl/api/omxtypes.h"
#include "dl/sp/api/armSP.h"
#include "dl/sp/api/omxSP.h"
extern void armSP_FFTInv_CToC_FC32_Radix2_fs_OutOfPlace(
const OMX_FC32* pSrc,
OMX_FC32* pDst,
OMX_FC32* pTwiddle,
long* subFFTNum,
long* subFFTSize);
extern void armSP_FFTInv_CToC_FC32_Radix2_ls_OutOfPlace(
const OMX_FC32* pSrc,
OMX_FC32* pDst,
OMX_FC32* pTwiddle,
long* subFFTNum,
long* subFFTSize);
extern void armSP_FFTInv_CToC_FC32_Radix2_OutOfPlace(
const OMX_FC32* pSrc,
OMX_FC32* pDst,
OMX_FC32* pTwiddle,
long* subFFTNum,
long* subFFTSize);
extern void armSP_FFTInv_CToC_FC32_Radix4_fs_OutOfPlace(
const OMX_FC32* pSrc,
OMX_FC32* pDst,
OMX_FC32* pTwiddle,
long* subFFTNum,
long* subFFTSize);
extern void armSP_FFTInv_CToC_FC32_Radix4_OutOfPlace(
const OMX_FC32* pSrc,
OMX_FC32* pDst,
OMX_FC32* pTwiddle,
long* subFFTNum,
long* subFFTSize);
extern void armSP_FFTInv_CToC_FC32_Radix4_ls_OutOfPlace(
const OMX_FC32* pSrc,
OMX_FC32* pDst,
OMX_FC32* pTwiddle,
long* subFFTNum,
long* subFFTSize);
extern void armSP_FFTInv_CToC_FC32_Radix8_fs_OutOfPlace(
const OMX_FC32* pSrc,
OMX_FC32* pDst,
OMX_FC32* pTwiddle,
long* subFFTNum,
long* subFFTSize);
extern void armSP_FFTInv_CCSToR_F32_preTwiddleRadix2(
const OMX_F32* pSrc,
const OMX_FC32* pTwiddle,
OMX_F32* pBuf,
long N);
/*
* Scale FFT data by 1/|length|. |length| must be a power of two
*/
static inline void ScaleRFFTData(OMX_F32* fftData, unsigned length) {
float32_t* data = (float32_t*)fftData;
float32_t scale = 2.0f / length;
if (length >= 4) {
/*
* Do 4 float elements at a time because |length| is always a
* multiple of 4 when |length| >= 4.
*
* TODO(rtoy): Figure out how to process 8 elements at a time
* using intrinsics or replace this with inline assembly.
*/
do {
float32x4_t x = vld1q_f32(data);
length -= 4;
x = vmulq_n_f32(x, scale);
vst1q_f32(data, x);
data += 4;
} while (length > 0);
} else if (length == 2) {
float32x2_t x = vld1_f32(data);
x = vmul_n_f32(x, scale);
vst1_f32(data, x);
} else {
fftData[0] *= scale;
}
}
/**
* Function: omxSP_FFTInv_CCSToR_F32_Sfs
*
* Description:
* These functions compute the inverse FFT for a conjugate-symmetric input
* sequence. Transform length is determined by the specification structure,
* which must be initialized prior to calling the FFT function using
* <FFTInit_R_F32>. For a transform of length M, the input sequence is
* represented using a packed CCS vector of length M+2, and is organized
* as follows:
*
* Index: 0 1 2 3 4 5 . . . M-2 M-1 M M+1
* Comp: R[0] 0 R[1] I[1] R[2] I[2] . . . R[M/2-1] I[M/2-1] R[M/2] 0
*
* where R[n] and I[n], respectively, denote the real and imaginary
* components for FFT bin n. Bins are numbered from 0 to M/2, where M
* is the FFT length. Bin index 0 corresponds to the DC component,
* and bin index M/2 corresponds to the foldover frequency.
*
* Input Arguments:
* pSrc - pointer to the complex-valued input sequence represented
* using CCS format, of length (2^order) + 2; must be aligned on a
* 32-byte boundary.
* pFFTSpec - pointer to the preallocated and initialized
* specification structure
*
* Output Arguments:
* pDst - pointer to the real-valued output sequence, of length
* 2^order ; must be aligned on a 32-byte boundary.
*
* Return Value:
*
* OMX_Sts_NoErr - no error
* OMX_Sts_BadArgErr - bad arguments if one or more of the
* following is true:
* - pSrc, pDst, or pFFTSpec is NULL
* - pSrc or pDst is not aligned on a 32-byte boundary
*
*/
OMXResult omxSP_FFTInv_CCSToR_F32_Sfs(
const OMX_F32* pSrc,
OMX_F32* pDst,
const OMXFFTSpec_R_F32* pFFTSpec) {
ARMsFFTSpec_R_FC32* spec = (ARMsFFTSpec_R_FC32*)pFFTSpec;
int order;
long subFFTSize;
long subFFTNum;
OMX_FC32* pTwiddle;
OMX_FC32* pOut;
OMX_FC32* pComplexSrc;
OMX_FC32* pComplexDst = (OMX_FC32*) pDst;
/*
* Check args are not NULL and the source and destination pointers
* are properly aligned.
*/
if (!validateParametersF32(pSrc, pDst, spec))
return OMX_Sts_BadArgErr;
/*
* Preprocess the input before calling the complex inverse FFT. The
* result is actually stored in the second half of the temp buffer
* in pFFTSpec.
*/
if (spec->N > 1)
armSP_FFTInv_CCSToR_F32_preTwiddleRadix2(
pSrc, spec->pTwiddle, spec->pBuf, spec->N);
/*
* Do a complex inverse FFT of half size.
*/
order = fastlog2(spec->N) - 1;
subFFTSize = 1;
subFFTNum = spec->N >> 1;
pTwiddle = spec->pTwiddle;
/*
* The pBuf is split in half. The first half is the temp buffer. The
* second half holds the source data that was placed there by
* armSP_FFTInv_CCSToR_F32_preTwiddleRadix2_unsafe.
*/
pOut = (OMX_FC32*) spec->pBuf;
pComplexSrc = pOut + (1 << order);
if (order > 3) {
OMX_FC32* argDst;
/*
* Set up argDst and pOut appropriately so that pOut = pDst for
* the very last FFT stage.
*/
if ((order & 2) == 0) {
argDst = pOut;
pOut = pComplexDst;
} else {
argDst = pComplexDst;
}
/*
* Odd order uses a radix 8 first stage; even order, a radix 4
* first stage.
*/
if (order & 1) {
armSP_FFTInv_CToC_FC32_Radix8_fs_OutOfPlace(
pComplexSrc, argDst, pTwiddle, &subFFTNum, &subFFTSize);
} else {
armSP_FFTInv_CToC_FC32_Radix4_fs_OutOfPlace(
pComplexSrc, argDst, pTwiddle, &subFFTNum, &subFFTSize);
}
/*
* Now use radix 4 stages to finish rest of the FFT
*/
if (subFFTNum >= 4) {
while (subFFTNum > 4) {
OMX_FC32* tmp;
armSP_FFTInv_CToC_FC32_Radix4_OutOfPlace(
argDst, pOut, pTwiddle, &subFFTNum, &subFFTSize);
/*
* Swap argDst and pOut
*/
tmp = pOut;
pOut = argDst;
argDst = tmp;
}
armSP_FFTInv_CToC_FC32_Radix4_ls_OutOfPlace(
argDst, pOut, pTwiddle, &subFFTNum, &subFFTSize);
}
} else if (order == 3) {
armSP_FFTInv_CToC_FC32_Radix2_fs_OutOfPlace(
pComplexSrc, pComplexDst, pTwiddle, &subFFTNum, &subFFTSize);
armSP_FFTInv_CToC_FC32_Radix2_OutOfPlace(
pComplexDst, pOut, pTwiddle, &subFFTNum, &subFFTSize);
armSP_FFTInv_CToC_FC32_Radix2_ls_OutOfPlace(
pOut, pComplexDst, pTwiddle, &subFFTNum, &subFFTSize);
} else if (order == 2) {
armSP_FFTInv_CToC_FC32_Radix2_fs_OutOfPlace(
pComplexSrc, pOut, pTwiddle, &subFFTNum, &subFFTSize);
armSP_FFTInv_CToC_FC32_Radix2_ls_OutOfPlace(
pOut, pComplexDst, pTwiddle, &subFFTNum, &subFFTSize);
} else if (order == 1) {
armSP_FFTInv_CToC_FC32_Radix2_fs_OutOfPlace(
pComplexSrc, pComplexDst, pTwiddle, &subFFTNum, &subFFTSize);
} else {
/* Order = 0 */
*pComplexDst = *pComplexSrc;
}
ScaleRFFTData(pDst, spec->N);
return OMX_Sts_NoErr;
}