This is to fix security issues found in Openmax SC16 code.
Verified with SNR testing code in Openmax folder.

R=aedla@chromium.org, rtoy@google.com

Review URL: https://webrtc-codereview.appspot.com/1715004

git-svn-id: http://webrtc.googlecode.com/svn/deps/third_party/openmax@4353 4adac7df-926f-26a2-2b94-8c16560cd09d
diff --git a/dl/sp/api/omxSP.h b/dl/sp/api/omxSP.h
index 40abe38..13c64e3 100644
--- a/dl/sp/api/omxSP.h
+++ b/dl/sp/api/omxSP.h
@@ -1887,8 +1887,7 @@
  *          must be aligned on a 32-byte boundary. 
  *   pFFTSpec - pointer to the preallocated and initialized specification 
  *            structure 
- *   scaleFactor - scale factor of the output. Valid value is 0
- *          only.
+ *   scaleFactor - scale factor of the output. Valid range is [0,16].
  *
  * Output Arguments:
  *   order 
diff --git a/dl/sp/src/armSP_FFT_CToC_SC16_Radix4_fs_unsafe_s.S b/dl/sp/src/armSP_FFT_CToC_SC16_Radix4_fs_unsafe_s.S
index cdb42a9..1b0217e 100644
--- a/dl/sp/src/armSP_FFT_CToC_SC16_Radix4_fs_unsafe_s.S
+++ b/dl/sp/src/armSP_FFT_CToC_SC16_Radix4_fs_unsafe_s.S
@@ -142,7 +142,6 @@
         RSB     setStep,setStep,#16                   @// setStep = - 3*pointStep+16
 
 
-        VLD2    {dXr3,dXi3},[pSrc :128],setStep            @//  data[3]
         MOV     subFFTSize,#4                         @// subFFTSize = 1 for the first stage
 
 
@@ -158,6 +157,7 @@
 
 grpZeroSetLoop\name:
 
+        VLD2    {dXr3,dXi3},[pSrc :128],setStep            @//  data[3]
 
         .ifeqs "\scaled", "TRUE"
 
@@ -178,9 +178,6 @@
             VLD2    {dXr1,dXi1},[pSrc :128],step1          @//  data[1]
             VHADD    qZ0,qY0,qY1             @// y0
 
-            VLD2    {dXr3,dXi3},[pSrc :128],setStep
-
-
             .ifeqs  "\inverse", "TRUE"
 
                 VHSUB    dZr3,dYr2,dYi3                  @// y3
@@ -235,9 +232,6 @@
             VLD2    {dXr1,dXi1},[pSrc :128],step1          @//  data[1]
             VADD    qZ0,qY0,qY1             @// y0
 
-            VLD2    {dXr3,dXi3},[pSrc :128],setStep
-
-
             .ifeqs  "\inverse", "TRUE"
 
                 VSUB    dZr3,dYr2,dYi3                  @// y3
diff --git a/dl/sp/src/armSP_FFT_CToC_SC16_Radix4_ls_unsafe_s.S b/dl/sp/src/armSP_FFT_CToC_SC16_Radix4_ls_unsafe_s.S
index 23e2c37..c5ef7a0 100644
--- a/dl/sp/src/armSP_FFT_CToC_SC16_Radix4_ls_unsafe_s.S
+++ b/dl/sp/src/armSP_FFT_CToC_SC16_Radix4_ls_unsafe_s.S
@@ -163,7 +163,6 @@
         @// Define stack arguments
 
         MOV     pw2,pTwiddle
-        VLD4 {dW2r,dW2i,dTmp0,dTmp1},[pw2 :256]!
 
         MOV     pw3,pTwiddle
         MOV     pw1,pTwiddle
@@ -171,43 +170,48 @@
         @// pOut0+outPointStep == increment of 4*outPointStep bytes
         MOV     outPointStep,subFFTSize,LSL #2
 
-        VLD3 {dW3rS32,dTmp1S32,dTmp2S32},[pw3 :64]!
         MOV     subFFTNum,#1                            @//after the last stage
         LSL     grpCount,subFFTSize,#2
 
 
         @// Update grpCount and grpSize rightaway
-        VLD3 {dW3iS32,dTmp2S32,dTmp3S32},[pw3 :64]!
 
         @// update subFFTSize for the next stage
         MOV     subFFTSize,grpCount
         MOV     dstStep,outPointStep,LSL #1
 
-        VLD2 {dW1r,dW1i}, [pw1 :128]!
-
-
         ADD     dstStep,dstStep,outPointStep                @// dstStep = 3*outPointStep
         RSB     dstStep,dstStep,#16                         @// dstStep = - 3*outPointStep+16
 
-        VLD4     {dButterfly1Real02,dButterfly1Imag02,dButterfly1Real13,dButterfly1Imag13},[pSrc :256]! @// AC.r AC.i BD.r BD.i
-        VLD4     {dButterfly2Real02,dButterfly2Imag02,dButterfly2Real13,dButterfly2Imag13},[pSrc :256]! @// AC.r AC.i BD.r BD.i
-
         @// Process 4 groups at a time
 
 grpLoop\name:
+        VLD4     {dButterfly1Real02,dButterfly1Imag02,dButterfly1Real13,dButterfly1Imag13},[pSrc :256]! @// AC.r AC.i BD.r BD.i
+        VLD4     {dButterfly2Real02,dButterfly2Imag02,dButterfly2Real13,dButterfly2Imag13},[pSrc :256]! @// AC.r AC.i BD.r BD.i
 
+        @// Load the second twiddle for 4 groups : w^2
+        @// w^2 twiddle (2i+0,2i+2,2i+4,2i+6)   for group 0,1,2,3
+        VLD4 {dW2r,dW2i,dTmp0,dTmp1},[pw2 :256]!
+
+        VUZP     dButterfly1Real13, dButterfly2Real13        @// B.r D.r
+
+        @// Load the third twiddle for 4 groups : w^3
+        @// w^3 twiddle (3i+0,3i+3,3i+6,3i+9)   for group 0,1,2,3
+        VLD3 {dW3rS32,dTmp1S32,dTmp2S32},[pw3 :64]!
+
+        VUZP     dButterfly1Imag13, dButterfly2Imag13        @// B.i D.i
+        VUZP     dButterfly1Real02, dButterfly2Real02        @// A.r C.r
+
+        VLD3 {dW3iS32,dTmp2S32,dTmp3S32},[pw3 :64]!
+
+        VUZP     dButterfly1Imag02, dButterfly2Imag02        @// A.i C.i
+
+        VLD2 {dW1r,dW1i}, [pw1 :128]!
 
         @// Rearrange the third twiddle
         VUZP    dW3r,dW3i
         SUBS    grpCount,grpCount,#16                    @// grpCount is multiplied by 4
 
-
-        VUZP     dButterfly1Real13, dButterfly2Real13        @// B.r D.r
-        VUZP     dButterfly1Imag13, dButterfly2Imag13        @// B.i D.i
-        VUZP     dButterfly1Real02, dButterfly2Real02        @// A.r C.r
-        VUZP     dButterfly1Imag02, dButterfly2Imag02        @// A.i C.i
-
-
         .ifeqs  "\inverse", "TRUE"
             VMULL   qT0,dXr1,dW1r
             VMLAL   qT0,dXi1,dW1i                       @// real part
@@ -225,8 +229,6 @@
         @// Load the first twiddle for 4 groups : w^1
         @// w^1 twiddle (i+0,i+1,i+2,i+3)       for group 0,1,2,3
 
-        VLD2 {dW1r,dW1i}, [pw1 :128]!
-
         .ifeqs  "\inverse", "TRUE"
             VMULL   qT2,dXr2,dW2r
             VMLAL   qT2,dXi2,dW2i                       @// real part
@@ -260,24 +262,12 @@
 
         .ENDIF
 
-        @// Load the second twiddle for 4 groups : w^2
-        @// w^2 twiddle (2i+0,2i+2,2i+4,2i+6)   for group 0,1,2,3
-        VLD4 {dW2r,dW2i,dTmp0,dTmp1},[pw2 :256]!
-
-
         VRSHRN  dZr2,qT2,#15
         VRSHRN  dZi2,qT3,#15
 
-        @// Load the third twiddle for 4 groups : w^3
-        @// w^3 twiddle (3i+0,3i+3,3i+6,3i+9)   for group 0,1,2,3
-
-        VLD3 {dW3rS32,dTmp1S32,dTmp2S32},[pw3 :64]!
-
         VRSHRN  dZr3,qT0,#15
         VRSHRN  dZi3,qT1,#15
 
-        VLD3 {dW3iS32,dTmp2S32,dTmp3S32},[pw3 :64]!
-
         .ifeqs "\scaled", "TRUE"
 
             @// finish first stage of 4 point FFT
@@ -285,7 +275,6 @@
             VHADD    qY0,qX0,qZ2
             VHSUB    qY2,qX0,qZ2
             VHADD    qY1,qZ1,qZ3
-            VLD4    {dButterfly1Real02,dButterfly1Imag02,dButterfly1Real13,dButterfly1Imag13},[pSrc :256]! @// AC.r AC.i BD.r BD.i
 
             VHSUB    qY3,qZ1,qZ3
 
@@ -293,7 +282,6 @@
 
             VHSUB    qZ0,qY2,qY1
             VHADD    qZ2,qY2,qY1
-            VLD4     {dButterfly2Real02,dButterfly2Imag02,dButterfly2Real13,dButterfly2Imag13},[pSrc :256]! @// AC.r AC.i BD.r BD.i
 
 
             .ifeqs "\inverse", "TRUE"
@@ -329,7 +317,6 @@
             VADD    qY0,qX0,qZ2
             VSUB    qY2,qX0,qZ2
             VADD    qY1,qZ1,qZ3
-            VLD4    {dButterfly1Real02,dButterfly1Imag02,dButterfly1Real13,dButterfly1Imag13},[pSrc :256]! @// AC.r AC.i BD.r BD.i
 
             VSUB    qY3,qZ1,qZ3
 
@@ -337,7 +324,6 @@
 
             VSUB    qZ0,qY2,qY1
             VADD    qZ2,qY2,qY1
-            VLD4     {dButterfly2Real02,dButterfly2Imag02,dButterfly2Real13,dButterfly2Imag13},[pSrc :256]! @// AC.r AC.i BD.r BD.i
 
 
             .ifeqs "\inverse", "TRUE"
@@ -376,7 +362,6 @@
 
         @// Reset and Swap pSrc and pDst for the next stage
         MOV     pTmp,pDst
-        SUB     pSrc,pSrc,#64                       @// Extra increment currently done in the loop
         SUB     pDst,pSrc,outPointStep,LSL #2       @// pDst -= size; pSrc -= 4*size bytes
         SUB     pSrc,pTmp,outPointStep
 
diff --git a/dl/sp/src/armSP_FFT_CToC_SC16_Radix4_unsafe_s.S b/dl/sp/src/armSP_FFT_CToC_SC16_Radix4_unsafe_s.S
index 0eba385..0cb4701 100644
--- a/dl/sp/src/armSP_FFT_CToC_SC16_Radix4_unsafe_s.S
+++ b/dl/sp/src/armSP_FFT_CToC_SC16_Radix4_unsafe_s.S
@@ -154,7 +154,6 @@
         MOV     srcStep,pointStep,LSL #1                    @// srcStep = 2*pointStep
         VLD1     dW2,[pTwiddle :64]                             @//[wi | wr]
         ADD     setStep,srcStep,pointStep                   @// setStep = 3*pointStep
-        SUB     srcStep,srcStep,#16                         @// srcStep = 2*pointStep-16
         VLD1     dW3,[pTwiddle :64]
         @//RSB     setStep,setStep,#16                      @// setStep = - 3*pointStep+16
         RSB     setStep,setStep,#0                          @// setStep = - 3*pointStep
@@ -167,26 +166,23 @@
 
 grpLoop\name:
 
-        VLD2    {dXr0,dXi0},[pSrc :128],pointStep          @//  data[0]
         ADD      stepTwiddle,stepTwiddle,pointStep
-        VLD2    {dXr1,dXi1},[pSrc :128],pointStep          @//  data[1]
         ADD      pTwiddle,pTwiddle,stepTwiddle               @// set pTwiddle to the first point
-        VLD2    {dXr2,dXi2},[pSrc :128],pointStep          @//  data[2]
         MOV      twStep,stepTwiddle,LSL #2
-        VLD2    {dXr3,dXi3},[pSrc :128],setStep            @//  data[3] & reset pSrc
 
         SUB      twStep,stepTwiddle,twStep                   @// twStep = -3*stepTwiddle
 
 
         MOV      setCount,pointStep,LSR #2
-        ADD     pSrc,pSrc,#16                         @// set pSrc to data[0] of the next set
-        ADD     pSrc,pSrc,pointStep                   @// increment to data[1] of the next set
+        ADD      pSrc,pSrc,pointStep                   @// increment to data[1] of the next set
 
         @// Loop on the sets : 4 at a time
 
 setLoop\name:
+        VLD2    {dXr1,dXi1},[pSrc :128],pointStep         @//  data[1]
+        VLD2    {dXr2,dXi2},[pSrc :128],pointStep         @//  data[2]
 
-        SUBS    setCount,setCount,#4                    @// decrement the loop counter
+        SUBS    setCount,setCount,#4                      @// decrement the loop counter
 
         .ifeqs  "\inverse", "TRUE"
             VMULL   qT0,dXr1,dW1[0]
@@ -202,8 +198,6 @@
 
         .ENDIF
 
-        VLD2    {dXr1,dXi1},[pSrc :128],pointStep          @//  data[1]
-
         .ifeqs  "\inverse", "TRUE"
             VMULL   qT2,dXr2,dW2[0]
             VMLAL   qT2,dXi2,dW2[1]                       @// real part
@@ -218,11 +212,13 @@
 
         .ENDIF
 
+        VLD2    {dXr3,dXi3},[pSrc :128],setStep            @//  data[3] & update pSrc for the next set
+
         VRSHRN  dZr1,qT0,#15
         VRSHRN  dZi1,qT1,#15
 
-
-        VLD2    {dXr2,dXi2},[pSrc :128],pointStep          @//  data[2]
+        VLD2    {dXr0,dXi0},[pSrc :128],pointStep          @//  data[0]
+        ADD     pSrc,pSrc,#16                              @// set pSrc to data[1] of the next set
 
         .ifeqs  "\inverse", "TRUE"
             VMULL   qT0,dXr3,dW3[0]
@@ -244,7 +240,6 @@
 
         VRSHRN  dZr3,qT0,#15
         VRSHRN  dZi3,qT1,#15
-        VLD2    {dXr3,dXi3},[pSrc :128],setStep            @//  data[3] & update pSrc for the next set
 
 
         .ifeqs "\scaled", "TRUE"
@@ -253,7 +248,6 @@
             VHADD    qY0,qX0,qZ2
             VHSUB    qY2,qX0,qZ2
 
-            VLD2    {dXr0,dXi0},[pSrc :128]!          @//  data[0]
             VHADD    qY1,qZ1,qZ3
             VHSUB    qY3,qZ1,qZ3
 
@@ -303,7 +297,6 @@
             VADD    qY0,qX0,qZ2
             VSUB    qY2,qX0,qZ2
 
-            VLD2    {dXr0,dXi0},[pSrc]!          @//  data[0]
             VADD    qY1,qZ1,qZ3
             VSUB    qY3,qZ1,qZ3
 
@@ -351,7 +344,6 @@
 
         .ENDIF
 
-        ADD     pSrc,pSrc,pointStep                         @// increment to data[1] of the next set
         BGT     setLoop\name
 
         VLD1     dW1,[pTwiddle :64],stepTwiddle                 @//[wi | wr]
diff --git a/dl/sp/src/armSP_FFT_CToC_SC16_Radix8_fs_unsafe_s.S b/dl/sp/src/armSP_FFT_CToC_SC16_Radix8_fs_unsafe_s.S
index 588c319..ba7c344 100644
--- a/dl/sp/src/armSP_FFT_CToC_SC16_Radix8_fs_unsafe_s.S
+++ b/dl/sp/src/armSP_FFT_CToC_SC16_Radix8_fs_unsafe_s.S
@@ -233,12 +233,12 @@
         VLD2    {dXr4,dXi4},[pSrc :128],pointStep          @//  data[4]
         VLD2    {dXr5,dXi5},[pSrc :128],pointStep          @//  data[5]
         VLD2    {dXr6,dXi6},[pSrc :128],pointStep          @//  data[6]
-        VLD2    {dXr7,dXi7},[pSrc :128],setStep            @//  data[7] & update pSrc for the next set
-                                                      @//  setStep = -7*pointStep + 16
         @// grp = 0 a special case since all the twiddle factors are 1
         @// Loop on the sets : 4 sets at a time
 
 grpZeroSetLoop\name:
+        VLD2    {dXr7,dXi7},[pSrc :128],setStep            @//  data[7] & update pSrc for the next set
+                                                           @//  setStep = -7*pointStep + 16
 
         @// Decrement setcount
         SUBS    setCount,setCount,#4                    @// decrement the set loop counter
@@ -348,9 +348,6 @@
                 VSUB    dVi7,dVi7,dT1
                 SUB     pDst, pDst, step2                           @// set pDst to y1
 
-                VLD2    {dXr7,dXi7},[pSrc :128],setStep            @//  data[7]
-
-
                 VHSUB    dYr3,dVr3,dVr7
                 VHSUB    dYi3,dVi3,dVi7
                 VST2    {dYr1,dYi1},[pDst :128],step1                    @// store y1
@@ -388,7 +385,6 @@
 
                 VSUB    dVr5,dT1,dVi5                               @// a * V5
                 VADD    dVi5,dT1,dVi5
-                VLD2    {dXr7,dXi7},[pSrc :128],setStep            @//  data[7]
 
                 VHSUB    qY5,qV1,qV5
 
@@ -514,9 +510,6 @@
                 VSUB    dVi7,dVi7,dT1
                 SUB     pDst, pDst, step2                           @// set pDst to y1
 
-                VLD2    {dXr7,dXi7},[pSrc :128],setStep            @//  data[7]
-
-
                 VSUB    dYr3,dVr3,dVr7
                 VSUB    dYi3,dVi3,dVi7
                 VST2    {dYr1,dYi1},[pDst :128],step1                    @// store y1
@@ -554,7 +547,6 @@
 
                 VSUB    dVr5,dT1,dVi5                               @// a * V5
                 VADD    dVi5,dT1,dVi5
-                VLD2    {dXr7,dXi7},[pSrc :128],setStep            @//  data[7]
 
                 VSUB    qY5,qV1,qV5