| niklase@google.com | 470e71d | 2011-07-07 08:21:25 | [diff] [blame] | 1 | /* |
| 2 | * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. |
| 3 | * |
| 4 | * Use of this source code is governed by a BSD-style license |
| 5 | * that can be found in the LICENSE file in the root of the source |
| 6 | * tree. An additional intellectual property rights grant can be found |
| 7 | * in the file PATENTS. All contributing project authors may |
| 8 | * be found in the AUTHORS file in the root of the source tree. |
| 9 | */ |
| 10 | |
| 11 | /* |
| 12 | * fft.c |
| 13 | * |
| 14 | * Fast Fourier Transform |
| 15 | * |
| 16 | */ |
| 17 | |
| 18 | |
| Mirko Bonadei | 08973eed | 2018-02-01 13:12:55 | [diff] [blame] | 19 | #include "modules/audio_coding/codecs/isac/fix/source/fft.h" |
| niklase@google.com | 470e71d | 2011-07-07 08:21:25 | [diff] [blame] | 20 | |
| kwiberg | a6ca518 | 2017-01-30 13:28:54 | [diff] [blame] | 21 | static const int16_t kSortTabFft[240] = { |
| niklase@google.com | 470e71d | 2011-07-07 08:21:25 | [diff] [blame] | 22 | 0, 60, 120, 180, 20, 80, 140, 200, 40, 100, 160, 220, |
| 23 | 4, 64, 124, 184, 24, 84, 144, 204, 44, 104, 164, 224, |
| 24 | 8, 68, 128, 188, 28, 88, 148, 208, 48, 108, 168, 228, |
| 25 | 12, 72, 132, 192, 32, 92, 152, 212, 52, 112, 172, 232, |
| 26 | 16, 76, 136, 196, 36, 96, 156, 216, 56, 116, 176, 236, |
| 27 | 1, 61, 121, 181, 21, 81, 141, 201, 41, 101, 161, 221, |
| 28 | 5, 65, 125, 185, 25, 85, 145, 205, 45, 105, 165, 225, |
| 29 | 9, 69, 129, 189, 29, 89, 149, 209, 49, 109, 169, 229, |
| 30 | 13, 73, 133, 193, 33, 93, 153, 213, 53, 113, 173, 233, |
| 31 | 17, 77, 137, 197, 37, 97, 157, 217, 57, 117, 177, 237, |
| 32 | 2, 62, 122, 182, 22, 82, 142, 202, 42, 102, 162, 222, |
| 33 | 6, 66, 126, 186, 26, 86, 146, 206, 46, 106, 166, 226, |
| 34 | 10, 70, 130, 190, 30, 90, 150, 210, 50, 110, 170, 230, |
| 35 | 14, 74, 134, 194, 34, 94, 154, 214, 54, 114, 174, 234, |
| 36 | 18, 78, 138, 198, 38, 98, 158, 218, 58, 118, 178, 238, |
| 37 | 3, 63, 123, 183, 23, 83, 143, 203, 43, 103, 163, 223, |
| 38 | 7, 67, 127, 187, 27, 87, 147, 207, 47, 107, 167, 227, |
| 39 | 11, 71, 131, 191, 31, 91, 151, 211, 51, 111, 171, 231, |
| 40 | 15, 75, 135, 195, 35, 95, 155, 215, 55, 115, 175, 235, |
| 41 | 19, 79, 139, 199, 39, 99, 159, 219, 59, 119, 179, 239 |
| 42 | }; |
| 43 | |
| 44 | /* Cosine table in Q14 */ |
| kwiberg | a6ca518 | 2017-01-30 13:28:54 | [diff] [blame] | 45 | static const int16_t kCosTabFfftQ14[240] = { |
| niklase@google.com | 470e71d | 2011-07-07 08:21:25 | [diff] [blame] | 46 | 16384, 16378, 16362, 16333, 16294, 16244, 16182, 16110, 16026, 15931, 15826, 15709, |
| 47 | 15582, 15444, 15296, 15137, 14968, 14788, 14598, 14399, 14189, 13970, 13741, 13502, |
| 48 | 13255, 12998, 12733, 12458, 12176, 11885, 11585, 11278, 10963, 10641, 10311, 9974, |
| 49 | 9630, 9280, 8923, 8561, 8192, 7818, 7438, 7053, 6664, 6270, 5872, 5469, |
| 50 | 5063, 4653, 4240, 3825, 3406, 2986, 2563, 2139, 1713, 1285, 857, 429, |
| 51 | 0, -429, -857, -1285, -1713, -2139, -2563, -2986, -3406, -3825, -4240, -4653, |
| 52 | -5063, -5469, -5872, -6270, -6664, -7053, -7438, -7818, -8192, -8561, -8923, -9280, |
| 53 | -9630, -9974, -10311, -10641, -10963, -11278, -11585, -11885, -12176, -12458, -12733, -12998, |
| 54 | -13255, -13502, -13741, -13970, -14189, -14399, -14598, -14788, -14968, -15137, -15296, -15444, |
| 55 | -15582, -15709, -15826, -15931, -16026, -16110, -16182, -16244, -16294, -16333, -16362, -16378, |
| 56 | -16384, -16378, -16362, -16333, -16294, -16244, -16182, -16110, -16026, -15931, -15826, -15709, |
| 57 | -15582, -15444, -15296, -15137, -14968, -14788, -14598, -14399, -14189, -13970, -13741, -13502, |
| 58 | -13255, -12998, -12733, -12458, -12176, -11885, -11585, -11278, -10963, -10641, -10311, -9974, |
| 59 | -9630, -9280, -8923, -8561, -8192, -7818, -7438, -7053, -6664, -6270, -5872, -5469, |
| 60 | -5063, -4653, -4240, -3825, -3406, -2986, -2563, -2139, -1713, -1285, -857, -429, |
| 61 | 0, 429, 857, 1285, 1713, 2139, 2563, 2986, 3406, 3825, 4240, 4653, |
| 62 | 5063, 5469, 5872, 6270, 6664, 7053, 7438, 7818, 8192, 8561, 8923, 9280, |
| 63 | 9630, 9974, 10311, 10641, 10963, 11278, 11585, 11885, 12176, 12458, 12733, 12998, |
| 64 | 13255, 13502, 13741, 13970, 14189, 14399, 14598, 14788, 14968, 15137, 15296, 15444, |
| 65 | 15582, 15709, 15826, 15931, 16026, 16110, 16182, 16244, 16294, 16333, 16362, 16378 |
| 66 | }; |
| 67 | |
| 68 | |
| 69 | |
| 70 | /* Uses 16x16 mul, without rounding, which is faster. Uses WEBRTC_SPL_MUL_16_16_RSFT */ |
| pbos@webrtc.org | 0946a56 | 2013-04-09 00:28:06 | [diff] [blame] | 71 | int16_t WebRtcIsacfix_FftRadix16Fastest(int16_t RexQx[], int16_t ImxQx[], int16_t iSign) { |
| niklase@google.com | 470e71d | 2011-07-07 08:21:25 | [diff] [blame] | 72 | |
| pbos@webrtc.org | 0946a56 | 2013-04-09 00:28:06 | [diff] [blame] | 73 | int16_t dd, ee, ff, gg, hh, ii; |
| 74 | int16_t k0, k1, k2, k3, k4, kk; |
| 75 | int16_t tmp116, tmp216; |
| niklase@google.com | 470e71d | 2011-07-07 08:21:25 | [diff] [blame] | 76 | |
| pbos@webrtc.org | 0946a56 | 2013-04-09 00:28:06 | [diff] [blame] | 77 | int16_t ccc1Q14, ccc2Q14, ccc3Q14, sss1Q14, sss2Q14, sss3Q14; |
| 78 | int16_t sss60Q14, ccc72Q14, sss72Q14; |
| 79 | int16_t aaQx, ajQx, akQx, ajmQx, ajpQx, akmQx, akpQx; |
| 80 | int16_t bbQx, bjQx, bkQx, bjmQx, bjpQx, bkmQx, bkpQx; |
| niklase@google.com | 470e71d | 2011-07-07 08:21:25 | [diff] [blame] | 81 | |
| pbos@webrtc.org | 0946a56 | 2013-04-09 00:28:06 | [diff] [blame] | 82 | int16_t ReDATAQx[240], ImDATAQx[240]; |
| niklase@google.com | 470e71d | 2011-07-07 08:21:25 | [diff] [blame] | 83 | |
| 84 | sss60Q14 = kCosTabFfftQ14[20]; |
| 85 | ccc72Q14 = kCosTabFfftQ14[48]; |
| 86 | sss72Q14 = kCosTabFfftQ14[12]; |
| 87 | |
| 88 | if (iSign < 0) { |
| 89 | sss72Q14 = -sss72Q14; |
| 90 | sss60Q14 = -sss60Q14; |
| 91 | } |
| 92 | /* Complexity is: 10 cycles */ |
| 93 | |
| 94 | /* compute fourier transform */ |
| 95 | |
| 96 | // transform for factor of 4 |
| 97 | for (kk=0; kk<60; kk++) { |
| 98 | k0 = kk; |
| 99 | k1 = k0 + 60; |
| 100 | k2 = k1 + 60; |
| 101 | k3 = k2 + 60; |
| 102 | |
| 103 | akpQx = RexQx[k0] + RexQx[k2]; |
| 104 | akmQx = RexQx[k0] - RexQx[k2]; |
| 105 | ajpQx = RexQx[k1] + RexQx[k3]; |
| 106 | ajmQx = RexQx[k1] - RexQx[k3]; |
| 107 | bkpQx = ImxQx[k0] + ImxQx[k2]; |
| 108 | bkmQx = ImxQx[k0] - ImxQx[k2]; |
| 109 | bjpQx = ImxQx[k1] + ImxQx[k3]; |
| 110 | bjmQx = ImxQx[k1] - ImxQx[k3]; |
| 111 | |
| 112 | RexQx[k0] = akpQx + ajpQx; |
| 113 | ImxQx[k0] = bkpQx + bjpQx; |
| 114 | ajpQx = akpQx - ajpQx; |
| 115 | bjpQx = bkpQx - bjpQx; |
| 116 | if (iSign < 0) { |
| 117 | akpQx = akmQx + bjmQx; |
| 118 | bkpQx = bkmQx - ajmQx; |
| 119 | akmQx -= bjmQx; |
| 120 | bkmQx += ajmQx; |
| 121 | } else { |
| 122 | akpQx = akmQx - bjmQx; |
| 123 | bkpQx = bkmQx + ajmQx; |
| 124 | akmQx += bjmQx; |
| 125 | bkmQx -= ajmQx; |
| 126 | } |
| 127 | |
| 128 | ccc1Q14 = kCosTabFfftQ14[kk]; |
| bjornv@webrtc.org | bc2bb34 | 2015-03-13 12:58:03 | [diff] [blame] | 129 | ccc2Q14 = kCosTabFfftQ14[2 * kk]; |
| 130 | ccc3Q14 = kCosTabFfftQ14[3 * kk]; |
| 131 | sss1Q14 = kCosTabFfftQ14[kk + 60]; |
| 132 | sss2Q14 = kCosTabFfftQ14[2 * kk + 60]; |
| 133 | sss3Q14 = kCosTabFfftQ14[3 * kk + 60]; |
| niklase@google.com | 470e71d | 2011-07-07 08:21:25 | [diff] [blame] | 134 | if (iSign==1) { |
| 135 | sss1Q14 = -sss1Q14; |
| 136 | sss2Q14 = -sss2Q14; |
| 137 | sss3Q14 = -sss3Q14; |
| 138 | } |
| 139 | |
| 140 | //Do several multiplications like Q14*Q16>>14 = Q16 |
| 141 | // RexQ16[k1] = akpQ16 * ccc1Q14 - bkpQ16 * sss1Q14; |
| 142 | // RexQ16[k2] = ajpQ16 * ccc2Q14 - bjpQ16 * sss2Q14; |
| 143 | // RexQ16[k3] = akmQ16 * ccc3Q14 - bkmQ16 * sss3Q14; |
| 144 | // ImxQ16[k1] = akpQ16 * sss1Q14 + bkpQ16 * ccc1Q14; |
| 145 | // ImxQ16[k2] = ajpQ16 * sss2Q14 + bjpQ16 * ccc2Q14; |
| 146 | // ImxQ16[k3] = akmQ16 * sss3Q14 + bkmQ16 * ccc3Q14; |
| 147 | |
| pbos@webrtc.org | 0946a56 | 2013-04-09 00:28:06 | [diff] [blame] | 148 | RexQx[k1] = (int16_t)WEBRTC_SPL_MUL_16_16_RSFT(ccc1Q14, akpQx, 14) - |
| 149 | (int16_t)WEBRTC_SPL_MUL_16_16_RSFT(sss1Q14, bkpQx, 14); // 6 non-mul + 2 mul cycles, i.e. 8 cycles (6+2*7=20 cycles if 16x32mul) |
| 150 | RexQx[k2] = (int16_t)WEBRTC_SPL_MUL_16_16_RSFT(ccc2Q14, ajpQx, 14) - |
| 151 | (int16_t)WEBRTC_SPL_MUL_16_16_RSFT(sss2Q14, bjpQx, 14); |
| 152 | RexQx[k3] = (int16_t)WEBRTC_SPL_MUL_16_16_RSFT(ccc3Q14, akmQx, 14) - |
| 153 | (int16_t)WEBRTC_SPL_MUL_16_16_RSFT(sss3Q14, bkmQx, 14); |
| 154 | ImxQx[k1] = (int16_t)WEBRTC_SPL_MUL_16_16_RSFT(sss1Q14, akpQx, 14) + |
| 155 | (int16_t)WEBRTC_SPL_MUL_16_16_RSFT(ccc1Q14, bkpQx, 14); |
| 156 | ImxQx[k2] = (int16_t)WEBRTC_SPL_MUL_16_16_RSFT(sss2Q14, ajpQx, 14) + |
| 157 | (int16_t)WEBRTC_SPL_MUL_16_16_RSFT(ccc2Q14, bjpQx, 14); |
| 158 | ImxQx[k3] = (int16_t)WEBRTC_SPL_MUL_16_16_RSFT(sss3Q14, akmQx, 14) + |
| 159 | (int16_t)WEBRTC_SPL_MUL_16_16_RSFT(ccc3Q14, bkmQx, 14); |
| niklase@google.com | 470e71d | 2011-07-07 08:21:25 | [diff] [blame] | 160 | //This mul segment needs 6*8 = 48 cycles for 16x16 muls, but 6*20 = 120 cycles for 16x32 muls |
| 161 | |
| 162 | |
| 163 | } |
| 164 | /* Complexity is: 51+48 = 99 cycles for 16x16 muls, but 51+120 = 171 cycles for 16x32 muls*/ |
| 165 | |
| 166 | // transform for factor of 3 |
| 167 | kk=0; |
| 168 | k1=20; |
| 169 | k2=40; |
| 170 | |
| 171 | for (hh=0; hh<4; hh++) { |
| 172 | for (ii=0; ii<20; ii++) { |
| 173 | akQx = RexQx[kk]; |
| 174 | bkQx = ImxQx[kk]; |
| 175 | ajQx = RexQx[k1] + RexQx[k2]; |
| 176 | bjQx = ImxQx[k1] + ImxQx[k2]; |
| 177 | RexQx[kk] = akQx + ajQx; |
| 178 | ImxQx[kk] = bkQx + bjQx; |
| henrik.lundin@webrtc.org | 2a26734 | 2015-01-12 20:52:21 | [diff] [blame] | 179 | tmp116 = ajQx >> 1; |
| 180 | tmp216 = bjQx >> 1; |
| 181 | akQx = akQx - tmp116; |
| 182 | bkQx = bkQx - tmp216; |
| niklase@google.com | 470e71d | 2011-07-07 08:21:25 | [diff] [blame] | 183 | tmp116 = RexQx[k1] - RexQx[k2]; |
| 184 | tmp216 = ImxQx[k1] - ImxQx[k2]; |
| 185 | |
| pbos@webrtc.org | 0946a56 | 2013-04-09 00:28:06 | [diff] [blame] | 186 | ajQx = (int16_t)WEBRTC_SPL_MUL_16_16_RSFT(sss60Q14, tmp116, 14); // Q14*Qx>>14 = Qx |
| 187 | bjQx = (int16_t)WEBRTC_SPL_MUL_16_16_RSFT(sss60Q14, tmp216, 14); // Q14*Qx>>14 = Qx |
| niklase@google.com | 470e71d | 2011-07-07 08:21:25 | [diff] [blame] | 188 | RexQx[k1] = akQx - bjQx; |
| 189 | RexQx[k2] = akQx + bjQx; |
| 190 | ImxQx[k1] = bkQx + ajQx; |
| 191 | ImxQx[k2] = bkQx - ajQx; |
| 192 | |
| 193 | kk++; |
| 194 | k1++; |
| 195 | k2++; |
| 196 | } |
| 197 | /* Complexity : (31+6)*20 = 740 cycles for 16x16 muls, but (31+18)*20 = 980 cycles for 16x32 muls*/ |
| 198 | kk=kk+40; |
| 199 | k1=k1+40; |
| 200 | k2=k2+40; |
| 201 | } |
| 202 | /* Complexity : 4*(740+3) = 2972 cycles for 16x16 muls, but 4*(980+3) = 3932 cycles for 16x32 muls*/ |
| 203 | |
| 204 | /* multiply by rotation factor for odd factor 3 or 5 (not for 4) |
| 205 | Same code (duplicated) for both ii=2 and ii=3 */ |
| 206 | kk = 1; |
| 207 | ee = 0; |
| 208 | ff = 0; |
| 209 | |
| 210 | for (gg=0; gg<19; gg++) { |
| 211 | kk += 20; |
| 212 | ff = ff+4; |
| 213 | for (hh=0; hh<2; hh++) { |
| bjornv@webrtc.org | bc2bb34 | 2015-03-13 12:58:03 | [diff] [blame] | 214 | ee = ff + hh * ff; |
| niklase@google.com | 470e71d | 2011-07-07 08:21:25 | [diff] [blame] | 215 | dd = ee + 60; |
| 216 | ccc2Q14 = kCosTabFfftQ14[ee]; |
| 217 | sss2Q14 = kCosTabFfftQ14[dd]; |
| 218 | if (iSign==1) { |
| 219 | sss2Q14 = -sss2Q14; |
| 220 | } |
| 221 | for (ii=0; ii<4; ii++) { |
| 222 | akQx = RexQx[kk]; |
| 223 | bkQx = ImxQx[kk]; |
| pbos@webrtc.org | 0946a56 | 2013-04-09 00:28:06 | [diff] [blame] | 224 | RexQx[kk] = (int16_t)WEBRTC_SPL_MUL_16_16_RSFT(ccc2Q14, akQx, 14) - // Q14*Qx>>14 = Qx |
| 225 | (int16_t)WEBRTC_SPL_MUL_16_16_RSFT(sss2Q14, bkQx, 14); |
| 226 | ImxQx[kk] = (int16_t)WEBRTC_SPL_MUL_16_16_RSFT(sss2Q14, akQx, 14) + // Q14*Qx>>14 = Qx |
| 227 | (int16_t)WEBRTC_SPL_MUL_16_16_RSFT(ccc2Q14, bkQx, 14); |
| niklase@google.com | 470e71d | 2011-07-07 08:21:25 | [diff] [blame] | 228 | |
| 229 | |
| 230 | kk += 60; |
| 231 | } |
| 232 | kk = kk - 220; |
| 233 | } |
| 234 | // Complexity: 2*(13+5+4*13+2) = 144 for 16x16 muls, but 2*(13+5+4*33+2) = 304 cycles for 16x32 muls |
| 235 | kk = kk - 59; |
| 236 | } |
| 237 | // Complexity: 19*144 = 2736 for 16x16 muls, but 19*304 = 5776 cycles for 16x32 muls |
| 238 | |
| 239 | // transform for factor of 5 |
| 240 | kk = 0; |
| 241 | ccc2Q14 = kCosTabFfftQ14[96]; |
| 242 | sss2Q14 = kCosTabFfftQ14[84]; |
| 243 | if (iSign==1) { |
| 244 | sss2Q14 = -sss2Q14; |
| 245 | } |
| 246 | |
| 247 | for (hh=0; hh<4; hh++) { |
| 248 | for (ii=0; ii<12; ii++) { |
| 249 | k1 = kk + 4; |
| 250 | k2 = k1 + 4; |
| 251 | k3 = k2 + 4; |
| 252 | k4 = k3 + 4; |
| 253 | |
| 254 | akpQx = RexQx[k1] + RexQx[k4]; |
| 255 | akmQx = RexQx[k1] - RexQx[k4]; |
| 256 | bkpQx = ImxQx[k1] + ImxQx[k4]; |
| 257 | bkmQx = ImxQx[k1] - ImxQx[k4]; |
| 258 | ajpQx = RexQx[k2] + RexQx[k3]; |
| 259 | ajmQx = RexQx[k2] - RexQx[k3]; |
| 260 | bjpQx = ImxQx[k2] + ImxQx[k3]; |
| 261 | bjmQx = ImxQx[k2] - ImxQx[k3]; |
| 262 | aaQx = RexQx[kk]; |
| 263 | bbQx = ImxQx[kk]; |
| 264 | RexQx[kk] = aaQx + akpQx + ajpQx; |
| 265 | ImxQx[kk] = bbQx + bkpQx + bjpQx; |
| 266 | |
| pbos@webrtc.org | 0946a56 | 2013-04-09 00:28:06 | [diff] [blame] | 267 | akQx = (int16_t)WEBRTC_SPL_MUL_16_16_RSFT(ccc72Q14, akpQx, 14) + |
| 268 | (int16_t)WEBRTC_SPL_MUL_16_16_RSFT(ccc2Q14, ajpQx, 14) + aaQx; |
| 269 | bkQx = (int16_t)WEBRTC_SPL_MUL_16_16_RSFT(ccc72Q14, bkpQx, 14) + |
| 270 | (int16_t)WEBRTC_SPL_MUL_16_16_RSFT(ccc2Q14, bjpQx, 14) + bbQx; |
| 271 | ajQx = (int16_t)WEBRTC_SPL_MUL_16_16_RSFT(sss72Q14, akmQx, 14) + |
| 272 | (int16_t)WEBRTC_SPL_MUL_16_16_RSFT(sss2Q14, ajmQx, 14); |
| 273 | bjQx = (int16_t)WEBRTC_SPL_MUL_16_16_RSFT(sss72Q14, bkmQx, 14) + |
| 274 | (int16_t)WEBRTC_SPL_MUL_16_16_RSFT(sss2Q14, bjmQx, 14); |
| niklase@google.com | 470e71d | 2011-07-07 08:21:25 | [diff] [blame] | 275 | // 32+4*8=64 or 32+4*20=112 |
| 276 | |
| 277 | RexQx[k1] = akQx - bjQx; |
| 278 | RexQx[k4] = akQx + bjQx; |
| 279 | ImxQx[k1] = bkQx + ajQx; |
| 280 | ImxQx[k4] = bkQx - ajQx; |
| 281 | |
| pbos@webrtc.org | 0946a56 | 2013-04-09 00:28:06 | [diff] [blame] | 282 | akQx = (int16_t)WEBRTC_SPL_MUL_16_16_RSFT(ccc2Q14, akpQx, 14) + |
| 283 | (int16_t)WEBRTC_SPL_MUL_16_16_RSFT(ccc72Q14, ajpQx, 14) + aaQx; |
| 284 | bkQx = (int16_t)WEBRTC_SPL_MUL_16_16_RSFT(ccc2Q14, bkpQx, 14) + |
| 285 | (int16_t)WEBRTC_SPL_MUL_16_16_RSFT(ccc72Q14, bjpQx, 14) + bbQx; |
| 286 | ajQx = (int16_t)WEBRTC_SPL_MUL_16_16_RSFT(sss2Q14, akmQx, 14) - |
| 287 | (int16_t)WEBRTC_SPL_MUL_16_16_RSFT(sss72Q14, ajmQx, 14); |
| 288 | bjQx = (int16_t)WEBRTC_SPL_MUL_16_16_RSFT(sss2Q14, bkmQx, 14) - |
| 289 | (int16_t)WEBRTC_SPL_MUL_16_16_RSFT(sss72Q14, bjmQx, 14); |
| niklase@google.com | 470e71d | 2011-07-07 08:21:25 | [diff] [blame] | 290 | // 8+4*8=40 or 8+4*20=88 |
| 291 | |
| 292 | RexQx[k2] = akQx - bjQx; |
| 293 | RexQx[k3] = akQx + bjQx; |
| 294 | ImxQx[k2] = bkQx + ajQx; |
| 295 | ImxQx[k3] = bkQx - ajQx; |
| 296 | |
| 297 | kk = k4 + 4; |
| 298 | } |
| 299 | // Complexity: 12*(64+40+10) = 1368 for 16x16 muls, but 12*(112+88+10) = 2520 cycles for 16x32 muls |
| 300 | kk -= 239; |
| 301 | } |
| 302 | // Complexity: 4*1368 = 5472 for 16x16 muls, but 4*2520 = 10080 cycles for 16x32 muls |
| 303 | |
| 304 | /* multiply by rotation factor for odd factor 3 or 5 (not for 4) |
| 305 | Same code (duplicated) for both ii=2 and ii=3 */ |
| 306 | kk = 1; |
| 307 | ee=0; |
| 308 | |
| 309 | for (gg=0; gg<3; gg++) { |
| 310 | kk += 4; |
| bjornv@webrtc.org | bc2bb34 | 2015-03-13 12:58:03 | [diff] [blame] | 311 | dd = 12 + 12 * gg; |
| niklase@google.com | 470e71d | 2011-07-07 08:21:25 | [diff] [blame] | 312 | ff = 0; |
| 313 | for (hh=0; hh<4; hh++) { |
| 314 | ff = ff+dd; |
| 315 | ee = ff+60; |
| 316 | for (ii=0; ii<12; ii++) { |
| 317 | akQx = RexQx[kk]; |
| 318 | bkQx = ImxQx[kk]; |
| 319 | |
| 320 | ccc2Q14 = kCosTabFfftQ14[ff]; |
| 321 | sss2Q14 = kCosTabFfftQ14[ee]; |
| 322 | |
| 323 | if (iSign==1) { |
| 324 | sss2Q14 = -sss2Q14; |
| 325 | } |
| 326 | |
| pbos@webrtc.org | 0946a56 | 2013-04-09 00:28:06 | [diff] [blame] | 327 | RexQx[kk] = (int16_t)WEBRTC_SPL_MUL_16_16_RSFT(ccc2Q14, akQx, 14) - |
| 328 | (int16_t)WEBRTC_SPL_MUL_16_16_RSFT(sss2Q14, bkQx, 14); |
| 329 | ImxQx[kk] = (int16_t)WEBRTC_SPL_MUL_16_16_RSFT(sss2Q14, akQx, 14) + |
| 330 | (int16_t)WEBRTC_SPL_MUL_16_16_RSFT(ccc2Q14, bkQx, 14); |
| niklase@google.com | 470e71d | 2011-07-07 08:21:25 | [diff] [blame] | 331 | |
| 332 | kk += 20; |
| 333 | } |
| 334 | kk = kk - 236; |
| 335 | // Complexity: 12*(12+12) = 288 for 16x16 muls, but 12*(12+32) = 528 cycles for 16x32 muls |
| 336 | } |
| 337 | kk = kk - 19; |
| 338 | // Complexity: 4*288+6 for 16x16 muls, but 4*528+6 cycles for 16x32 muls |
| 339 | } |
| 340 | // Complexity: 3*4*288+6 = 3462 for 16x16 muls, but 3*4*528+6 = 6342 cycles for 16x32 muls |
| 341 | |
| 342 | |
| 343 | // last transform for factor of 4 */ |
| 344 | for (kk=0; kk<240; kk=kk+4) { |
| 345 | k1 = kk + 1; |
| 346 | k2 = k1 + 1; |
| 347 | k3 = k2 + 1; |
| 348 | |
| 349 | akpQx = RexQx[kk] + RexQx[k2]; |
| 350 | akmQx = RexQx[kk] - RexQx[k2]; |
| 351 | ajpQx = RexQx[k1] + RexQx[k3]; |
| 352 | ajmQx = RexQx[k1] - RexQx[k3]; |
| 353 | bkpQx = ImxQx[kk] + ImxQx[k2]; |
| 354 | bkmQx = ImxQx[kk] - ImxQx[k2]; |
| 355 | bjpQx = ImxQx[k1] + ImxQx[k3]; |
| 356 | bjmQx = ImxQx[k1] - ImxQx[k3]; |
| 357 | RexQx[kk] = akpQx + ajpQx; |
| 358 | ImxQx[kk] = bkpQx + bjpQx; |
| 359 | ajpQx = akpQx - ajpQx; |
| 360 | bjpQx = bkpQx - bjpQx; |
| 361 | if (iSign < 0) { |
| 362 | akpQx = akmQx + bjmQx; |
| 363 | bkpQx = bkmQx - ajmQx; |
| 364 | akmQx -= bjmQx; |
| 365 | bkmQx += ajmQx; |
| 366 | } else { |
| 367 | akpQx = akmQx - bjmQx; |
| 368 | bkpQx = bkmQx + ajmQx; |
| 369 | akmQx += bjmQx; |
| 370 | bkmQx -= ajmQx; |
| 371 | } |
| 372 | RexQx[k1] = akpQx; |
| 373 | RexQx[k2] = ajpQx; |
| 374 | RexQx[k3] = akmQx; |
| 375 | ImxQx[k1] = bkpQx; |
| 376 | ImxQx[k2] = bjpQx; |
| 377 | ImxQx[k3] = bkmQx; |
| 378 | } |
| 379 | // Complexity: 60*45 = 2700 for 16x16 muls, but 60*45 = 2700 cycles for 16x32 muls |
| 380 | |
| 381 | /* permute the results to normal order */ |
| 382 | for (ii=0; ii<240; ii++) { |
| 383 | ReDATAQx[ii]=RexQx[ii]; |
| 384 | ImDATAQx[ii]=ImxQx[ii]; |
| 385 | } |
| 386 | // Complexity: 240*2=480 cycles |
| 387 | |
| 388 | for (ii=0; ii<240; ii++) { |
| 389 | RexQx[ii]=ReDATAQx[kSortTabFft[ii]]; |
| 390 | ImxQx[ii]=ImDATAQx[kSortTabFft[ii]]; |
| 391 | } |
| 392 | // Complexity: 240*2*2=960 cycles |
| 393 | |
| 394 | // Total complexity: |
| 395 | // 16x16 16x32 |
| 396 | // Complexity: 10 10 |
| 397 | // Complexity: 99 171 |
| 398 | // Complexity: 2972 3932 |
| 399 | // Complexity: 2736 5776 |
| 400 | // Complexity: 5472 10080 |
| 401 | // Complexity: 3462 6342 |
| 402 | // Complexity: 2700 2700 |
| 403 | // Complexity: 480 480 |
| 404 | // Complexity: 960 960 |
| 405 | // ======================= |
| 406 | // 18891 30451 |
| 407 | // |
| 408 | // If this FFT is called 2 time each frame, i.e. 67 times per second, it will correspond to |
| 409 | // a C54 complexity of 67*18891/1000000 = 1.27 MIPS with 16x16-muls, and 67*30451/1000000 = |
| 410 | // = 2.04 MIPS with 16x32-muls. Note that this routine somtimes is called 6 times during the |
| 411 | // encoding of a frame, i.e. the max complexity would be 7/2*1.27 = 4.4 MIPS for the 16x16 mul case. |
| 412 | |
| 413 | |
| 414 | return 0; |
| 415 | } |