STM32F769IDiscovery  1.00
uDANTE Audio Networking with STM32F7 DISCO board
arm_cfft_radix4_q31.c
Go to the documentation of this file.
1 /* ----------------------------------------------------------------------
2 * Copyright (C) 2010-2014 ARM Limited. All rights reserved.
3 *
4 * $Date: 19. March 2015
5 * $Revision: V.1.4.5
6 *
7 * Project: CMSIS DSP Library
8 * Title: arm_cfft_radix4_q31.c
9 *
10 * Description: This file has function definition of Radix-4 FFT & IFFT function and
11 * In-place bit reversal using bit reversal table
12 *
13 * Target Processor: Cortex-M4/Cortex-M3/Cortex-M0
14 *
15 * Redistribution and use in source and binary forms, with or without
16 * modification, are permitted provided that the following conditions
17 * are met:
18 * - Redistributions of source code must retain the above copyright
19 * notice, this list of conditions and the following disclaimer.
20 * - Redistributions in binary form must reproduce the above copyright
21 * notice, this list of conditions and the following disclaimer in
22 * the documentation and/or other materials provided with the
23 * distribution.
24 * - Neither the name of ARM LIMITED nor the names of its contributors
25 * may be used to endorse or promote products derived from this
26 * software without specific prior written permission.
27 *
28 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
29 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
30 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
31 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
32 * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
33 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
34 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
35 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
36 * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
37 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
38 * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
39 * POSSIBILITY OF SUCH DAMAGE.
40 * -------------------------------------------------------------------- */
41 
42 #include "arm_math.h"
43 
45 q31_t * pSrc,
46 uint32_t fftLen,
47 q31_t * pCoef,
48 uint32_t twidCoefModifier);
49 
51 q31_t * pSrc,
52 uint32_t fftLen,
53 q31_t * pCoef,
54 uint32_t twidCoefModifier);
55 
57 q31_t * pSrc,
58 uint32_t fftLen,
59 uint16_t bitRevFactor,
60 uint16_t * pBitRevTab);
61 
92  q31_t * pSrc)
93 {
94  if(S->ifftFlag == 1u)
95  {
96  /* Complex IFFT radix-4 */
98  S->twidCoefModifier);
99  }
100  else
101  {
102  /* Complex FFT radix-4 */
104  S->twidCoefModifier);
105  }
106 
107 
108  if(S->bitReverseFlag == 1u)
109  {
110  /* Bit Reversal */
112  }
113 
114 }
115 
120 /*
121 * Radix-4 FFT algorithm used is :
122 *
123 * Input real and imaginary data:
124 * x(n) = xa + j * ya
125 * x(n+N/4 ) = xb + j * yb
126 * x(n+N/2 ) = xc + j * yc
127 * x(n+3N 4) = xd + j * yd
128 *
129 *
130 * Output real and imaginary data:
131 * x(4r) = xa'+ j * ya'
132 * x(4r+1) = xb'+ j * yb'
133 * x(4r+2) = xc'+ j * yc'
134 * x(4r+3) = xd'+ j * yd'
135 *
136 *
137 * Twiddle factors for radix-4 FFT:
138 * Wn = co1 + j * (- si1)
139 * W2n = co2 + j * (- si2)
140 * W3n = co3 + j * (- si3)
141 *
142 * Butterfly implementation:
143 * xa' = xa + xb + xc + xd
144 * ya' = ya + yb + yc + yd
145 * xb' = (xa+yb-xc-yd)* co1 + (ya-xb-yc+xd)* (si1)
146 * yb' = (ya-xb-yc+xd)* co1 - (xa+yb-xc-yd)* (si1)
147 * xc' = (xa-xb+xc-xd)* co2 + (ya-yb+yc-yd)* (si2)
148 * yc' = (ya-yb+yc-yd)* co2 - (xa-xb+xc-xd)* (si2)
149 * xd' = (xa-yb-xc+yd)* co3 + (ya+xb-yc-xd)* (si3)
150 * yd' = (ya+xb-yc-xd)* co3 - (xa-yb-xc+yd)* (si3)
151 *
152 */
153 
164  q31_t * pSrc,
165  uint32_t fftLen,
166  q31_t * pCoef,
167  uint32_t twidCoefModifier)
168 {
169 #if defined(ARM_MATH_CM7)
170  uint32_t n1, n2, ia1, ia2, ia3, i0, i1, i2, i3, j, k;
171  q31_t t1, t2, r1, r2, s1, s2, co1, co2, co3, si1, si2, si3;
172 
173  q31_t xa, xb, xc, xd;
174  q31_t ya, yb, yc, yd;
175  q31_t xa_out, xb_out, xc_out, xd_out;
176  q31_t ya_out, yb_out, yc_out, yd_out;
177 
178  q31_t *ptr1;
179  q63_t xaya, xbyb, xcyc, xdyd;
180  /* Total process is divided into three stages */
181 
182  /* process first stage, middle stages, & last stage */
183 
184 
185  /* start of first stage process */
186 
187  /* Initializations for the first stage */
188  n2 = fftLen;
189  n1 = n2;
190  /* n2 = fftLen/4 */
191  n2 >>= 2u;
192  i0 = 0u;
193  ia1 = 0u;
194 
195  j = n2;
196 
197  /* Calculation of first stage */
198  do
199  {
200  /* index calculation for the input as, */
201  /* pSrc[i0 + 0], pSrc[i0 + fftLen/4], pSrc[i0 + fftLen/2u], pSrc[i0 + 3fftLen/4] */
202  i1 = i0 + n2;
203  i2 = i1 + n2;
204  i3 = i2 + n2;
205 
206  /* input is in 1.31(q31) format and provide 4 guard bits for the input */
207 
208  /* Butterfly implementation */
209  /* xa + xc */
210  r1 = (pSrc[(2u * i0)] >> 4u) + (pSrc[(2u * i2)] >> 4u);
211  /* xa - xc */
212  r2 = (pSrc[2u * i0] >> 4u) - (pSrc[2u * i2] >> 4u);
213 
214  /* xb + xd */
215  t1 = (pSrc[2u * i1] >> 4u) + (pSrc[2u * i3] >> 4u);
216 
217  /* ya + yc */
218  s1 = (pSrc[(2u * i0) + 1u] >> 4u) + (pSrc[(2u * i2) + 1u] >> 4u);
219  /* ya - yc */
220  s2 = (pSrc[(2u * i0) + 1u] >> 4u) - (pSrc[(2u * i2) + 1u] >> 4u);
221 
222  /* xa' = xa + xb + xc + xd */
223  pSrc[2u * i0] = (r1 + t1);
224  /* (xa + xc) - (xb + xd) */
225  r1 = r1 - t1;
226  /* yb + yd */
227  t2 = (pSrc[(2u * i1) + 1u] >> 4u) + (pSrc[(2u * i3) + 1u] >> 4u);
228 
229  /* ya' = ya + yb + yc + yd */
230  pSrc[(2u * i0) + 1u] = (s1 + t2);
231 
232  /* (ya + yc) - (yb + yd) */
233  s1 = s1 - t2;
234 
235  /* yb - yd */
236  t1 = (pSrc[(2u * i1) + 1u] >> 4u) - (pSrc[(2u * i3) + 1u] >> 4u);
237  /* xb - xd */
238  t2 = (pSrc[2u * i1] >> 4u) - (pSrc[2u * i3] >> 4u);
239 
240  /* index calculation for the coefficients */
241  ia2 = 2u * ia1;
242  co2 = pCoef[ia2 * 2u];
243  si2 = pCoef[(ia2 * 2u) + 1u];
244 
245  /* xc' = (xa-xb+xc-xd)co2 + (ya-yb+yc-yd)(si2) */
246  pSrc[2u * i1] = (((int32_t) (((q63_t) r1 * co2) >> 32)) +
247  ((int32_t) (((q63_t) s1 * si2) >> 32))) << 1u;
248 
249  /* yc' = (ya-yb+yc-yd)co2 - (xa-xb+xc-xd)(si2) */
250  pSrc[(2u * i1) + 1u] = (((int32_t) (((q63_t) s1 * co2) >> 32)) -
251  ((int32_t) (((q63_t) r1 * si2) >> 32))) << 1u;
252 
253  /* (xa - xc) + (yb - yd) */
254  r1 = r2 + t1;
255  /* (xa - xc) - (yb - yd) */
256  r2 = r2 - t1;
257 
258  /* (ya - yc) - (xb - xd) */
259  s1 = s2 - t2;
260  /* (ya - yc) + (xb - xd) */
261  s2 = s2 + t2;
262 
263  co1 = pCoef[ia1 * 2u];
264  si1 = pCoef[(ia1 * 2u) + 1u];
265 
266  /* xb' = (xa+yb-xc-yd)co1 + (ya-xb-yc+xd)(si1) */
267  pSrc[2u * i2] = (((int32_t) (((q63_t) r1 * co1) >> 32)) +
268  ((int32_t) (((q63_t) s1 * si1) >> 32))) << 1u;
269 
270  /* yb' = (ya-xb-yc+xd)co1 - (xa+yb-xc-yd)(si1) */
271  pSrc[(2u * i2) + 1u] = (((int32_t) (((q63_t) s1 * co1) >> 32)) -
272  ((int32_t) (((q63_t) r1 * si1) >> 32))) << 1u;
273 
274  /* index calculation for the coefficients */
275  ia3 = 3u * ia1;
276  co3 = pCoef[ia3 * 2u];
277  si3 = pCoef[(ia3 * 2u) + 1u];
278 
279  /* xd' = (xa-yb-xc+yd)co3 + (ya+xb-yc-xd)(si3) */
280  pSrc[2u * i3] = (((int32_t) (((q63_t) r2 * co3) >> 32)) +
281  ((int32_t) (((q63_t) s2 * si3) >> 32))) << 1u;
282 
283  /* yd' = (ya+xb-yc-xd)co3 - (xa-yb-xc+yd)(si3) */
284  pSrc[(2u * i3) + 1u] = (((int32_t) (((q63_t) s2 * co3) >> 32)) -
285  ((int32_t) (((q63_t) r2 * si3) >> 32))) << 1u;
286 
287  /* Twiddle coefficients index modifier */
288  ia1 = ia1 + twidCoefModifier;
289 
290  /* Updating input index */
291  i0 = i0 + 1u;
292 
293  } while(--j);
294 
295  /* end of first stage process */
296 
297  /* data is in 5.27(q27) format */
298 
299 
300  /* start of Middle stages process */
301 
302 
303  /* each stage in middle stages provides two down scaling of the input */
304 
305  twidCoefModifier <<= 2u;
306 
307 
308  for (k = fftLen / 4u; k > 4u; k >>= 2u)
309  {
310  /* Initializations for the first stage */
311  n1 = n2;
312  n2 >>= 2u;
313  ia1 = 0u;
314 
315  /* Calculation of first stage */
316  for (j = 0u; j <= (n2 - 1u); j++)
317  {
318  /* index calculation for the coefficients */
319  ia2 = ia1 + ia1;
320  ia3 = ia2 + ia1;
321  co1 = pCoef[ia1 * 2u];
322  si1 = pCoef[(ia1 * 2u) + 1u];
323  co2 = pCoef[ia2 * 2u];
324  si2 = pCoef[(ia2 * 2u) + 1u];
325  co3 = pCoef[ia3 * 2u];
326  si3 = pCoef[(ia3 * 2u) + 1u];
327  /* Twiddle coefficients index modifier */
328  ia1 = ia1 + twidCoefModifier;
329 
330  for (i0 = j; i0 < fftLen; i0 += n1)
331  {
332  /* index calculation for the input as, */
333  /* pSrc[i0 + 0], pSrc[i0 + fftLen/4], pSrc[i0 + fftLen/2u], pSrc[i0 + 3fftLen/4] */
334  i1 = i0 + n2;
335  i2 = i1 + n2;
336  i3 = i2 + n2;
337 
338  /* Butterfly implementation */
339  /* xa + xc */
340  r1 = pSrc[2u * i0] + pSrc[2u * i2];
341  /* xa - xc */
342  r2 = pSrc[2u * i0] - pSrc[2u * i2];
343 
344  /* ya + yc */
345  s1 = pSrc[(2u * i0) + 1u] + pSrc[(2u * i2) + 1u];
346  /* ya - yc */
347  s2 = pSrc[(2u * i0) + 1u] - pSrc[(2u * i2) + 1u];
348 
349  /* xb + xd */
350  t1 = pSrc[2u * i1] + pSrc[2u * i3];
351 
352  /* xa' = xa + xb + xc + xd */
353  pSrc[2u * i0] = (r1 + t1) >> 2u;
354  /* xa + xc -(xb + xd) */
355  r1 = r1 - t1;
356 
357  /* yb + yd */
358  t2 = pSrc[(2u * i1) + 1u] + pSrc[(2u * i3) + 1u];
359  /* ya' = ya + yb + yc + yd */
360  pSrc[(2u * i0) + 1u] = (s1 + t2) >> 2u;
361 
362  /* (ya + yc) - (yb + yd) */
363  s1 = s1 - t2;
364 
365  /* (yb - yd) */
366  t1 = pSrc[(2u * i1) + 1u] - pSrc[(2u * i3) + 1u];
367  /* (xb - xd) */
368  t2 = pSrc[2u * i1] - pSrc[2u * i3];
369 
370  /* xc' = (xa-xb+xc-xd)co2 + (ya-yb+yc-yd)(si2) */
371  pSrc[2u * i1] = (((int32_t) (((q63_t) r1 * co2) >> 32)) +
372  ((int32_t) (((q63_t) s1 * si2) >> 32))) >> 1u;
373 
374  /* yc' = (ya-yb+yc-yd)co2 - (xa-xb+xc-xd)(si2) */
375  pSrc[(2u * i1) + 1u] = (((int32_t) (((q63_t) s1 * co2) >> 32)) -
376  ((int32_t) (((q63_t) r1 * si2) >> 32))) >> 1u;
377 
378  /* (xa - xc) + (yb - yd) */
379  r1 = r2 + t1;
380  /* (xa - xc) - (yb - yd) */
381  r2 = r2 - t1;
382 
383  /* (ya - yc) - (xb - xd) */
384  s1 = s2 - t2;
385  /* (ya - yc) + (xb - xd) */
386  s2 = s2 + t2;
387 
388  /* xb' = (xa+yb-xc-yd)co1 + (ya-xb-yc+xd)(si1) */
389  pSrc[2u * i2] = (((int32_t) (((q63_t) r1 * co1) >> 32)) +
390  ((int32_t) (((q63_t) s1 * si1) >> 32))) >> 1u;
391 
392  /* yb' = (ya-xb-yc+xd)co1 - (xa+yb-xc-yd)(si1) */
393  pSrc[(2u * i2) + 1u] = (((int32_t) (((q63_t) s1 * co1) >> 32)) -
394  ((int32_t) (((q63_t) r1 * si1) >> 32))) >> 1u;
395 
396  /* xd' = (xa-yb-xc+yd)co3 + (ya+xb-yc-xd)(si3) */
397  pSrc[2u * i3] = (((int32_t) (((q63_t) r2 * co3) >> 32)) +
398  ((int32_t) (((q63_t) s2 * si3) >> 32))) >> 1u;
399 
400  /* yd' = (ya+xb-yc-xd)co3 - (xa-yb-xc+yd)(si3) */
401  pSrc[(2u * i3) + 1u] = (((int32_t) (((q63_t) s2 * co3) >> 32)) -
402  ((int32_t) (((q63_t) r2 * si3) >> 32))) >> 1u;
403  }
404  }
405  twidCoefModifier <<= 2u;
406  }
407 #else
408  uint32_t n1, n2, ia1, ia2, ia3, i0, j, k;
409  q31_t t1, t2, r1, r2, s1, s2, co1, co2, co3, si1, si2, si3;
410 
411  q31_t xa, xb, xc, xd;
412  q31_t ya, yb, yc, yd;
413  q31_t xa_out, xb_out, xc_out, xd_out;
414  q31_t ya_out, yb_out, yc_out, yd_out;
415 
416  q31_t *ptr1;
417  q31_t *pSi0;
418  q31_t *pSi1;
419  q31_t *pSi2;
420  q31_t *pSi3;
421  q63_t xaya, xbyb, xcyc, xdyd;
422  /* Total process is divided into three stages */
423 
424  /* process first stage, middle stages, & last stage */
425 
426 
427  /* start of first stage process */
428 
429  /* Initializations for the first stage */
430  n2 = fftLen;
431  n1 = n2;
432  /* n2 = fftLen/4 */
433  n2 >>= 2u;
434 
435  ia1 = 0u;
436 
437  j = n2;
438 
439  pSi0 = pSrc;
440  pSi1 = pSi0 + 2 * n2;
441  pSi2 = pSi1 + 2 * n2;
442  pSi3 = pSi2 + 2 * n2;
443 
444  /* Calculation of first stage */
445  do
446  {
447  /* input is in 1.31(q31) format and provide 4 guard bits for the input */
448 
449  /* Butterfly implementation */
450  /* xa + xc */
451  r1 = (pSi0[0] >> 4u) + (pSi2[0] >> 4u);
452  /* xa - xc */
453  r2 = (pSi0[0] >> 4u) - (pSi2[0] >> 4u);
454 
455  /* xb + xd */
456  t1 = (pSi1[0] >> 4u) + (pSi3[0] >> 4u);
457 
458  /* ya + yc */
459  s1 = (pSi0[1] >> 4u) + (pSi2[1] >> 4u);
460  /* ya - yc */
461  s2 = (pSi0[1] >> 4u) - (pSi2[1] >> 4u);
462 
463  /* xa' = xa + xb + xc + xd */
464  *pSi0++ = (r1 + t1);
465  /* (xa + xc) - (xb + xd) */
466  r1 = r1 - t1;
467  /* yb + yd */
468  t2 = (pSi1[1] >> 4u) + (pSi3[1] >> 4u);
469 
470  /* ya' = ya + yb + yc + yd */
471  *pSi0++ = (s1 + t2);
472 
473  /* (ya + yc) - (yb + yd) */
474  s1 = s1 - t2;
475 
476  /* yb - yd */
477  t1 = (pSi1[1] >> 4u) - (pSi3[1] >> 4u);
478  /* xb - xd */
479  t2 = (pSi1[0] >> 4u) - (pSi3[0] >> 4u);
480 
481  /* index calculation for the coefficients */
482  ia2 = 2u * ia1;
483  co2 = pCoef[ia2 * 2u];
484  si2 = pCoef[(ia2 * 2u) + 1u];
485 
486  /* xc' = (xa-xb+xc-xd)co2 + (ya-yb+yc-yd)(si2) */
487  *pSi1++ = (((int32_t) (((q63_t) r1 * co2) >> 32)) +
488  ((int32_t) (((q63_t) s1 * si2) >> 32))) << 1u;
489 
490  /* yc' = (ya-yb+yc-yd)co2 - (xa-xb+xc-xd)(si2) */
491  *pSi1++ = (((int32_t) (((q63_t) s1 * co2) >> 32)) -
492  ((int32_t) (((q63_t) r1 * si2) >> 32))) << 1u;
493 
494  /* (xa - xc) + (yb - yd) */
495  r1 = r2 + t1;
496  /* (xa - xc) - (yb - yd) */
497  r2 = r2 - t1;
498 
499  /* (ya - yc) - (xb - xd) */
500  s1 = s2 - t2;
501  /* (ya - yc) + (xb - xd) */
502  s2 = s2 + t2;
503 
504  co1 = pCoef[ia1 * 2u];
505  si1 = pCoef[(ia1 * 2u) + 1u];
506 
507  /* xb' = (xa+yb-xc-yd)co1 + (ya-xb-yc+xd)(si1) */
508  *pSi2++ = (((int32_t) (((q63_t) r1 * co1) >> 32)) +
509  ((int32_t) (((q63_t) s1 * si1) >> 32))) << 1u;
510 
511  /* yb' = (ya-xb-yc+xd)co1 - (xa+yb-xc-yd)(si1) */
512  *pSi2++ = (((int32_t) (((q63_t) s1 * co1) >> 32)) -
513  ((int32_t) (((q63_t) r1 * si1) >> 32))) << 1u;
514 
515  /* index calculation for the coefficients */
516  ia3 = 3u * ia1;
517  co3 = pCoef[ia3 * 2u];
518  si3 = pCoef[(ia3 * 2u) + 1u];
519 
520  /* xd' = (xa-yb-xc+yd)co3 + (ya+xb-yc-xd)(si3) */
521  *pSi3++ = (((int32_t) (((q63_t) r2 * co3) >> 32)) +
522  ((int32_t) (((q63_t) s2 * si3) >> 32))) << 1u;
523 
524  /* yd' = (ya+xb-yc-xd)co3 - (xa-yb-xc+yd)(si3) */
525  *pSi3++ = (((int32_t) (((q63_t) s2 * co3) >> 32)) -
526  ((int32_t) (((q63_t) r2 * si3) >> 32))) << 1u;
527 
528  /* Twiddle coefficients index modifier */
529  ia1 = ia1 + twidCoefModifier;
530 
531  } while(--j);
532 
533  /* end of first stage process */
534 
535  /* data is in 5.27(q27) format */
536 
537 
538  /* start of Middle stages process */
539 
540 
541  /* each stage in middle stages provides two down scaling of the input */
542 
543  twidCoefModifier <<= 2u;
544 
545 
546  for (k = fftLen / 4u; k > 4u; k >>= 2u)
547  {
548  /* Initializations for the first stage */
549  n1 = n2;
550  n2 >>= 2u;
551  ia1 = 0u;
552 
553  /* Calculation of first stage */
554  for (j = 0u; j <= (n2 - 1u); j++)
555  {
556  /* index calculation for the coefficients */
557  ia2 = ia1 + ia1;
558  ia3 = ia2 + ia1;
559  co1 = pCoef[ia1 * 2u];
560  si1 = pCoef[(ia1 * 2u) + 1u];
561  co2 = pCoef[ia2 * 2u];
562  si2 = pCoef[(ia2 * 2u) + 1u];
563  co3 = pCoef[ia3 * 2u];
564  si3 = pCoef[(ia3 * 2u) + 1u];
565  /* Twiddle coefficients index modifier */
566  ia1 = ia1 + twidCoefModifier;
567 
568  pSi0 = pSrc + 2 * j;
569  pSi1 = pSi0 + 2 * n2;
570  pSi2 = pSi1 + 2 * n2;
571  pSi3 = pSi2 + 2 * n2;
572 
573  for (i0 = j; i0 < fftLen; i0 += n1)
574  {
575  /* Butterfly implementation */
576  /* xa + xc */
577  r1 = pSi0[0] + pSi2[0];
578 
579  /* xa - xc */
580  r2 = pSi0[0] - pSi2[0];
581 
582 
583  /* ya + yc */
584  s1 = pSi0[1] + pSi2[1];
585 
586  /* ya - yc */
587  s2 = pSi0[1] - pSi2[1];
588 
589 
590  /* xb + xd */
591  t1 = pSi1[0] + pSi3[0];
592 
593 
594  /* xa' = xa + xb + xc + xd */
595  pSi0[0] = (r1 + t1) >> 2u;
596  /* xa + xc -(xb + xd) */
597  r1 = r1 - t1;
598 
599  /* yb + yd */
600  t2 = pSi1[1] + pSi3[1];
601 
602  /* ya' = ya + yb + yc + yd */
603  pSi0[1] = (s1 + t2) >> 2u;
604  pSi0 += 2 * n1;
605 
606  /* (ya + yc) - (yb + yd) */
607  s1 = s1 - t2;
608 
609  /* (yb - yd) */
610  t1 = pSi1[1] - pSi3[1];
611 
612  /* (xb - xd) */
613  t2 = pSi1[0] - pSi3[0];
614 
615 
616  /* xc' = (xa-xb+xc-xd)co2 + (ya-yb+yc-yd)(si2) */
617  pSi1[0] = (((int32_t) (((q63_t) r1 * co2) >> 32)) +
618  ((int32_t) (((q63_t) s1 * si2) >> 32))) >> 1u;
619 
620  /* yc' = (ya-yb+yc-yd)co2 - (xa-xb+xc-xd)(si2) */
621  pSi1[1] = (((int32_t) (((q63_t) s1 * co2) >> 32)) -
622  ((int32_t) (((q63_t) r1 * si2) >> 32))) >> 1u;
623  pSi1 += 2 * n1;
624 
625  /* (xa - xc) + (yb - yd) */
626  r1 = r2 + t1;
627  /* (xa - xc) - (yb - yd) */
628  r2 = r2 - t1;
629 
630  /* (ya - yc) - (xb - xd) */
631  s1 = s2 - t2;
632  /* (ya - yc) + (xb - xd) */
633  s2 = s2 + t2;
634 
635  /* xb' = (xa+yb-xc-yd)co1 + (ya-xb-yc+xd)(si1) */
636  pSi2[0] = (((int32_t) (((q63_t) r1 * co1) >> 32)) +
637  ((int32_t) (((q63_t) s1 * si1) >> 32))) >> 1u;
638 
639  /* yb' = (ya-xb-yc+xd)co1 - (xa+yb-xc-yd)(si1) */
640  pSi2[1] = (((int32_t) (((q63_t) s1 * co1) >> 32)) -
641  ((int32_t) (((q63_t) r1 * si1) >> 32))) >> 1u;
642  pSi2 += 2 * n1;
643 
644  /* xd' = (xa-yb-xc+yd)co3 + (ya+xb-yc-xd)(si3) */
645  pSi3[0] = (((int32_t) (((q63_t) r2 * co3) >> 32)) +
646  ((int32_t) (((q63_t) s2 * si3) >> 32))) >> 1u;
647 
648  /* yd' = (ya+xb-yc-xd)co3 - (xa-yb-xc+yd)(si3) */
649  pSi3[1] = (((int32_t) (((q63_t) s2 * co3) >> 32)) -
650  ((int32_t) (((q63_t) r2 * si3) >> 32))) >> 1u;
651  pSi3 += 2 * n1;
652  }
653  }
654  twidCoefModifier <<= 2u;
655  }
656 #endif
657 
658  /* End of Middle stages process */
659 
660  /* data is in 11.21(q21) format for the 1024 point as there are 3 middle stages */
661  /* data is in 9.23(q23) format for the 256 point as there are 2 middle stages */
662  /* data is in 7.25(q25) format for the 64 point as there are 1 middle stage */
663  /* data is in 5.27(q27) format for the 16 point as there are no middle stages */
664 
665 
666  /* start of Last stage process */
667  /* Initializations for the last stage */
668  j = fftLen >> 2;
669  ptr1 = &pSrc[0];
670 
671  /* Calculations of last stage */
672  do
673  {
674 
675 #ifndef ARM_MATH_BIG_ENDIAN
676 
677  /* Read xa (real), ya(imag) input */
678  xaya = *__SIMD64(ptr1)++;
679  xa = (q31_t) xaya;
680  ya = (q31_t) (xaya >> 32);
681 
682  /* Read xb (real), yb(imag) input */
683  xbyb = *__SIMD64(ptr1)++;
684  xb = (q31_t) xbyb;
685  yb = (q31_t) (xbyb >> 32);
686 
687  /* Read xc (real), yc(imag) input */
688  xcyc = *__SIMD64(ptr1)++;
689  xc = (q31_t) xcyc;
690  yc = (q31_t) (xcyc >> 32);
691 
692  /* Read xc (real), yc(imag) input */
693  xdyd = *__SIMD64(ptr1)++;
694  xd = (q31_t) xdyd;
695  yd = (q31_t) (xdyd >> 32);
696 
697 #else
698 
699  /* Read xa (real), ya(imag) input */
700  xaya = *__SIMD64(ptr1)++;
701  ya = (q31_t) xaya;
702  xa = (q31_t) (xaya >> 32);
703 
704  /* Read xb (real), yb(imag) input */
705  xbyb = *__SIMD64(ptr1)++;
706  yb = (q31_t) xbyb;
707  xb = (q31_t) (xbyb >> 32);
708 
709  /* Read xc (real), yc(imag) input */
710  xcyc = *__SIMD64(ptr1)++;
711  yc = (q31_t) xcyc;
712  xc = (q31_t) (xcyc >> 32);
713 
714  /* Read xc (real), yc(imag) input */
715  xdyd = *__SIMD64(ptr1)++;
716  yd = (q31_t) xdyd;
717  xd = (q31_t) (xdyd >> 32);
718 
719 
720 #endif
721 
722  /* xa' = xa + xb + xc + xd */
723  xa_out = xa + xb + xc + xd;
724 
725  /* ya' = ya + yb + yc + yd */
726  ya_out = ya + yb + yc + yd;
727 
728  /* pointer updation for writing */
729  ptr1 = ptr1 - 8u;
730 
731  /* writing xa' and ya' */
732  *ptr1++ = xa_out;
733  *ptr1++ = ya_out;
734 
735  xc_out = (xa - xb + xc - xd);
736  yc_out = (ya - yb + yc - yd);
737 
738  /* writing xc' and yc' */
739  *ptr1++ = xc_out;
740  *ptr1++ = yc_out;
741 
742  xb_out = (xa + yb - xc - yd);
743  yb_out = (ya - xb - yc + xd);
744 
745  /* writing xb' and yb' */
746  *ptr1++ = xb_out;
747  *ptr1++ = yb_out;
748 
749  xd_out = (xa - yb - xc + yd);
750  yd_out = (ya + xb - yc - xd);
751 
752  /* writing xd' and yd' */
753  *ptr1++ = xd_out;
754  *ptr1++ = yd_out;
755 
756 
757  } while(--j);
758 
759  /* output is in 11.21(q21) format for the 1024 point */
760  /* output is in 9.23(q23) format for the 256 point */
761  /* output is in 7.25(q25) format for the 64 point */
762  /* output is in 5.27(q27) format for the 16 point */
763 
764  /* End of last stage process */
765 
766 }
767 
768 
779 /*
780 * Radix-4 IFFT algorithm used is :
781 *
782 * CIFFT uses same twiddle coefficients as CFFT Function
783 * x[k] = x[n] + (j)k * x[n + fftLen/4] + (-1)k * x[n+fftLen/2] + (-j)k * x[n+3*fftLen/4]
784 *
785 *
786 * IFFT is implemented with following changes in equations from FFT
787 *
788 * Input real and imaginary data:
789 * x(n) = xa + j * ya
790 * x(n+N/4 ) = xb + j * yb
791 * x(n+N/2 ) = xc + j * yc
792 * x(n+3N 4) = xd + j * yd
793 *
794 *
795 * Output real and imaginary data:
796 * x(4r) = xa'+ j * ya'
797 * x(4r+1) = xb'+ j * yb'
798 * x(4r+2) = xc'+ j * yc'
799 * x(4r+3) = xd'+ j * yd'
800 *
801 *
802 * Twiddle factors for radix-4 IFFT:
803 * Wn = co1 + j * (si1)
804 * W2n = co2 + j * (si2)
805 * W3n = co3 + j * (si3)
806 
807 * The real and imaginary output values for the radix-4 butterfly are
808 * xa' = xa + xb + xc + xd
809 * ya' = ya + yb + yc + yd
810 * xb' = (xa-yb-xc+yd)* co1 - (ya+xb-yc-xd)* (si1)
811 * yb' = (ya+xb-yc-xd)* co1 + (xa-yb-xc+yd)* (si1)
812 * xc' = (xa-xb+xc-xd)* co2 - (ya-yb+yc-yd)* (si2)
813 * yc' = (ya-yb+yc-yd)* co2 + (xa-xb+xc-xd)* (si2)
814 * xd' = (xa+yb-xc-yd)* co3 - (ya-xb-yc+xd)* (si3)
815 * yd' = (ya-xb-yc+xd)* co3 + (xa+yb-xc-yd)* (si3)
816 *
817 */
818 
820  q31_t * pSrc,
821  uint32_t fftLen,
822  q31_t * pCoef,
823  uint32_t twidCoefModifier)
824 {
825 #if defined(ARM_MATH_CM7)
826  uint32_t n1, n2, ia1, ia2, ia3, i0, i1, i2, i3, j, k;
827  q31_t t1, t2, r1, r2, s1, s2, co1, co2, co3, si1, si2, si3;
828  q31_t xa, xb, xc, xd;
829  q31_t ya, yb, yc, yd;
830  q31_t xa_out, xb_out, xc_out, xd_out;
831  q31_t ya_out, yb_out, yc_out, yd_out;
832 
833  q31_t *ptr1;
834  q63_t xaya, xbyb, xcyc, xdyd;
835 
836  /* input is be 1.31(q31) format for all FFT sizes */
837  /* Total process is divided into three stages */
838  /* process first stage, middle stages, & last stage */
839 
840  /* Start of first stage process */
841 
842  /* Initializations for the first stage */
843  n2 = fftLen;
844  n1 = n2;
845  /* n2 = fftLen/4 */
846  n2 >>= 2u;
847  i0 = 0u;
848  ia1 = 0u;
849 
850  j = n2;
851 
852  do
853  {
854 
855  /* input is in 1.31(q31) format and provide 4 guard bits for the input */
856 
857  /* index calculation for the input as, */
858  /* pSrc[i0 + 0], pSrc[i0 + fftLen/4], pSrc[i0 + fftLen/2u], pSrc[i0 + 3fftLen/4] */
859  i1 = i0 + n2;
860  i2 = i1 + n2;
861  i3 = i2 + n2;
862 
863  /* Butterfly implementation */
864  /* xa + xc */
865  r1 = (pSrc[2u * i0] >> 4u) + (pSrc[2u * i2] >> 4u);
866  /* xa - xc */
867  r2 = (pSrc[2u * i0] >> 4u) - (pSrc[2u * i2] >> 4u);
868 
869  /* xb + xd */
870  t1 = (pSrc[2u * i1] >> 4u) + (pSrc[2u * i3] >> 4u);
871 
872  /* ya + yc */
873  s1 = (pSrc[(2u * i0) + 1u] >> 4u) + (pSrc[(2u * i2) + 1u] >> 4u);
874  /* ya - yc */
875  s2 = (pSrc[(2u * i0) + 1u] >> 4u) - (pSrc[(2u * i2) + 1u] >> 4u);
876 
877  /* xa' = xa + xb + xc + xd */
878  pSrc[2u * i0] = (r1 + t1);
879  /* (xa + xc) - (xb + xd) */
880  r1 = r1 - t1;
881  /* yb + yd */
882  t2 = (pSrc[(2u * i1) + 1u] >> 4u) + (pSrc[(2u * i3) + 1u] >> 4u);
883  /* ya' = ya + yb + yc + yd */
884  pSrc[(2u * i0) + 1u] = (s1 + t2);
885 
886  /* (ya + yc) - (yb + yd) */
887  s1 = s1 - t2;
888 
889  /* yb - yd */
890  t1 = (pSrc[(2u * i1) + 1u] >> 4u) - (pSrc[(2u * i3) + 1u] >> 4u);
891  /* xb - xd */
892  t2 = (pSrc[2u * i1] >> 4u) - (pSrc[2u * i3] >> 4u);
893 
894  /* index calculation for the coefficients */
895  ia2 = 2u * ia1;
896  co2 = pCoef[ia2 * 2u];
897  si2 = pCoef[(ia2 * 2u) + 1u];
898 
899  /* xc' = (xa-xb+xc-xd)co2 - (ya-yb+yc-yd)(si2) */
900  pSrc[2u * i1] = (((int32_t) (((q63_t) r1 * co2) >> 32)) -
901  ((int32_t) (((q63_t) s1 * si2) >> 32))) << 1u;
902 
903  /* yc' = (ya-yb+yc-yd)co2 + (xa-xb+xc-xd)(si2) */
904  pSrc[2u * i1 + 1u] = (((int32_t) (((q63_t) s1 * co2) >> 32)) +
905  ((int32_t) (((q63_t) r1 * si2) >> 32))) << 1u;
906 
907  /* (xa - xc) - (yb - yd) */
908  r1 = r2 - t1;
909  /* (xa - xc) + (yb - yd) */
910  r2 = r2 + t1;
911 
912  /* (ya - yc) + (xb - xd) */
913  s1 = s2 + t2;
914  /* (ya - yc) - (xb - xd) */
915  s2 = s2 - t2;
916 
917  co1 = pCoef[ia1 * 2u];
918  si1 = pCoef[(ia1 * 2u) + 1u];
919 
920  /* xb' = (xa+yb-xc-yd)co1 - (ya-xb-yc+xd)(si1) */
921  pSrc[2u * i2] = (((int32_t) (((q63_t) r1 * co1) >> 32)) -
922  ((int32_t) (((q63_t) s1 * si1) >> 32))) << 1u;
923 
924  /* yb' = (ya-xb-yc+xd)co1 + (xa+yb-xc-yd)(si1) */
925  pSrc[(2u * i2) + 1u] = (((int32_t) (((q63_t) s1 * co1) >> 32)) +
926  ((int32_t) (((q63_t) r1 * si1) >> 32))) << 1u;
927 
928  /* index calculation for the coefficients */
929  ia3 = 3u * ia1;
930  co3 = pCoef[ia3 * 2u];
931  si3 = pCoef[(ia3 * 2u) + 1u];
932 
933  /* xd' = (xa-yb-xc+yd)co3 - (ya+xb-yc-xd)(si3) */
934  pSrc[2u * i3] = (((int32_t) (((q63_t) r2 * co3) >> 32)) -
935  ((int32_t) (((q63_t) s2 * si3) >> 32))) << 1u;
936 
937  /* yd' = (ya+xb-yc-xd)co3 + (xa-yb-xc+yd)(si3) */
938  pSrc[(2u * i3) + 1u] = (((int32_t) (((q63_t) s2 * co3) >> 32)) +
939  ((int32_t) (((q63_t) r2 * si3) >> 32))) << 1u;
940 
941  /* Twiddle coefficients index modifier */
942  ia1 = ia1 + twidCoefModifier;
943 
944  /* Updating input index */
945  i0 = i0 + 1u;
946 
947  } while(--j);
948 
949  /* data is in 5.27(q27) format */
950  /* each stage provides two down scaling of the input */
951 
952 
953  /* Start of Middle stages process */
954 
955  twidCoefModifier <<= 2u;
956 
957  /* Calculation of second stage to excluding last stage */
958  for (k = fftLen / 4u; k > 4u; k >>= 2u)
959  {
960  /* Initializations for the first stage */
961  n1 = n2;
962  n2 >>= 2u;
963  ia1 = 0u;
964 
965  for (j = 0; j <= (n2 - 1u); j++)
966  {
967  /* index calculation for the coefficients */
968  ia2 = ia1 + ia1;
969  ia3 = ia2 + ia1;
970  co1 = pCoef[ia1 * 2u];
971  si1 = pCoef[(ia1 * 2u) + 1u];
972  co2 = pCoef[ia2 * 2u];
973  si2 = pCoef[(ia2 * 2u) + 1u];
974  co3 = pCoef[ia3 * 2u];
975  si3 = pCoef[(ia3 * 2u) + 1u];
976  /* Twiddle coefficients index modifier */
977  ia1 = ia1 + twidCoefModifier;
978 
979  for (i0 = j; i0 < fftLen; i0 += n1)
980  {
981  /* index calculation for the input as, */
982  /* pSrc[i0 + 0], pSrc[i0 + fftLen/4], pSrc[i0 + fftLen/2u], pSrc[i0 + 3fftLen/4] */
983  i1 = i0 + n2;
984  i2 = i1 + n2;
985  i3 = i2 + n2;
986 
987  /* Butterfly implementation */
988  /* xa + xc */
989  r1 = pSrc[2u * i0] + pSrc[2u * i2];
990  /* xa - xc */
991  r2 = pSrc[2u * i0] - pSrc[2u * i2];
992 
993  /* ya + yc */
994  s1 = pSrc[(2u * i0) + 1u] + pSrc[(2u * i2) + 1u];
995  /* ya - yc */
996  s2 = pSrc[(2u * i0) + 1u] - pSrc[(2u * i2) + 1u];
997 
998  /* xb + xd */
999  t1 = pSrc[2u * i1] + pSrc[2u * i3];
1000 
1001  /* xa' = xa + xb + xc + xd */
1002  pSrc[2u * i0] = (r1 + t1) >> 2u;
1003  /* xa + xc -(xb + xd) */
1004  r1 = r1 - t1;
1005  /* yb + yd */
1006  t2 = pSrc[(2u * i1) + 1u] + pSrc[(2u * i3) + 1u];
1007  /* ya' = ya + yb + yc + yd */
1008  pSrc[(2u * i0) + 1u] = (s1 + t2) >> 2u;
1009 
1010  /* (ya + yc) - (yb + yd) */
1011  s1 = s1 - t2;
1012 
1013  /* (yb - yd) */
1014  t1 = pSrc[(2u * i1) + 1u] - pSrc[(2u * i3) + 1u];
1015  /* (xb - xd) */
1016  t2 = pSrc[2u * i1] - pSrc[2u * i3];
1017 
1018  /* xc' = (xa-xb+xc-xd)co2 - (ya-yb+yc-yd)(si2) */
1019  pSrc[2u * i1] = (((int32_t) (((q63_t) r1 * co2) >> 32u)) -
1020  ((int32_t) (((q63_t) s1 * si2) >> 32u))) >> 1u;
1021 
1022  /* yc' = (ya-yb+yc-yd)co2 + (xa-xb+xc-xd)(si2) */
1023  pSrc[(2u * i1) + 1u] =
1024  (((int32_t) (((q63_t) s1 * co2) >> 32u)) +
1025  ((int32_t) (((q63_t) r1 * si2) >> 32u))) >> 1u;
1026 
1027  /* (xa - xc) - (yb - yd) */
1028  r1 = r2 - t1;
1029  /* (xa - xc) + (yb - yd) */
1030  r2 = r2 + t1;
1031 
1032  /* (ya - yc) + (xb - xd) */
1033  s1 = s2 + t2;
1034  /* (ya - yc) - (xb - xd) */
1035  s2 = s2 - t2;
1036 
1037  /* xb' = (xa+yb-xc-yd)co1 - (ya-xb-yc+xd)(si1) */
1038  pSrc[2u * i2] = (((int32_t) (((q63_t) r1 * co1) >> 32)) -
1039  ((int32_t) (((q63_t) s1 * si1) >> 32))) >> 1u;
1040 
1041  /* yb' = (ya-xb-yc+xd)co1 + (xa+yb-xc-yd)(si1) */
1042  pSrc[(2u * i2) + 1u] = (((int32_t) (((q63_t) s1 * co1) >> 32)) +
1043  ((int32_t) (((q63_t) r1 * si1) >> 32))) >> 1u;
1044 
1045  /* xd' = (xa-yb-xc+yd)co3 - (ya+xb-yc-xd)(si3) */
1046  pSrc[(2u * i3)] = (((int32_t) (((q63_t) r2 * co3) >> 32)) -
1047  ((int32_t) (((q63_t) s2 * si3) >> 32))) >> 1u;
1048 
1049  /* yd' = (ya+xb-yc-xd)co3 + (xa-yb-xc+yd)(si3) */
1050  pSrc[(2u * i3) + 1u] = (((int32_t) (((q63_t) s2 * co3) >> 32)) +
1051  ((int32_t) (((q63_t) r2 * si3) >> 32))) >> 1u;
1052  }
1053  }
1054  twidCoefModifier <<= 2u;
1055  }
1056 #else
1057  uint32_t n1, n2, ia1, ia2, ia3, i0, j, k;
1058  q31_t t1, t2, r1, r2, s1, s2, co1, co2, co3, si1, si2, si3;
1059  q31_t xa, xb, xc, xd;
1060  q31_t ya, yb, yc, yd;
1061  q31_t xa_out, xb_out, xc_out, xd_out;
1062  q31_t ya_out, yb_out, yc_out, yd_out;
1063 
1064  q31_t *ptr1;
1065  q31_t *pSi0;
1066  q31_t *pSi1;
1067  q31_t *pSi2;
1068  q31_t *pSi3;
1069  q63_t xaya, xbyb, xcyc, xdyd;
1070 
1071  /* input is be 1.31(q31) format for all FFT sizes */
1072  /* Total process is divided into three stages */
1073  /* process first stage, middle stages, & last stage */
1074 
1075  /* Start of first stage process */
1076 
1077  /* Initializations for the first stage */
1078  n2 = fftLen;
1079  n1 = n2;
1080  /* n2 = fftLen/4 */
1081  n2 >>= 2u;
1082 
1083  ia1 = 0u;
1084 
1085  j = n2;
1086 
1087  pSi0 = pSrc;
1088  pSi1 = pSi0 + 2 * n2;
1089  pSi2 = pSi1 + 2 * n2;
1090  pSi3 = pSi2 + 2 * n2;
1091 
1092  do
1093  {
1094  /* Butterfly implementation */
1095  /* xa + xc */
1096  r1 = (pSi0[0] >> 4u) + (pSi2[0] >> 4u);
1097  /* xa - xc */
1098  r2 = (pSi0[0] >> 4u) - (pSi2[0] >> 4u);
1099 
1100  /* xb + xd */
1101  t1 = (pSi1[0] >> 4u) + (pSi3[0] >> 4u);
1102 
1103  /* ya + yc */
1104  s1 = (pSi0[1] >> 4u) + (pSi2[1] >> 4u);
1105  /* ya - yc */
1106  s2 = (pSi0[1] >> 4u) - (pSi2[1] >> 4u);
1107 
1108  /* xa' = xa + xb + xc + xd */
1109  *pSi0++ = (r1 + t1);
1110  /* (xa + xc) - (xb + xd) */
1111  r1 = r1 - t1;
1112  /* yb + yd */
1113  t2 = (pSi1[1] >> 4u) + (pSi3[1] >> 4u);
1114  /* ya' = ya + yb + yc + yd */
1115  *pSi0++ = (s1 + t2);
1116 
1117  /* (ya + yc) - (yb + yd) */
1118  s1 = s1 - t2;
1119 
1120  /* yb - yd */
1121  t1 = (pSi1[1] >> 4u) - (pSi3[1] >> 4u);
1122  /* xb - xd */
1123  t2 = (pSi1[0] >> 4u) - (pSi3[0] >> 4u);
1124 
1125  /* index calculation for the coefficients */
1126  ia2 = 2u * ia1;
1127  co2 = pCoef[ia2 * 2u];
1128  si2 = pCoef[(ia2 * 2u) + 1u];
1129 
1130  /* xc' = (xa-xb+xc-xd)co2 - (ya-yb+yc-yd)(si2) */
1131  *pSi1++ = (((int32_t) (((q63_t) r1 * co2) >> 32)) -
1132  ((int32_t) (((q63_t) s1 * si2) >> 32))) << 1u;
1133 
1134  /* yc' = (ya-yb+yc-yd)co2 + (xa-xb+xc-xd)(si2) */
1135  *pSi1++ = (((int32_t) (((q63_t) s1 * co2) >> 32)) +
1136  ((int32_t) (((q63_t) r1 * si2) >> 32))) << 1u;
1137 
1138  /* (xa - xc) - (yb - yd) */
1139  r1 = r2 - t1;
1140  /* (xa - xc) + (yb - yd) */
1141  r2 = r2 + t1;
1142 
1143  /* (ya - yc) + (xb - xd) */
1144  s1 = s2 + t2;
1145  /* (ya - yc) - (xb - xd) */
1146  s2 = s2 - t2;
1147 
1148  co1 = pCoef[ia1 * 2u];
1149  si1 = pCoef[(ia1 * 2u) + 1u];
1150 
1151  /* xb' = (xa+yb-xc-yd)co1 - (ya-xb-yc+xd)(si1) */
1152  *pSi2++ = (((int32_t) (((q63_t) r1 * co1) >> 32)) -
1153  ((int32_t) (((q63_t) s1 * si1) >> 32))) << 1u;
1154 
1155  /* yb' = (ya-xb-yc+xd)co1 + (xa+yb-xc-yd)(si1) */
1156  *pSi2++ = (((int32_t) (((q63_t) s1 * co1) >> 32)) +
1157  ((int32_t) (((q63_t) r1 * si1) >> 32))) << 1u;
1158 
1159  /* index calculation for the coefficients */
1160  ia3 = 3u * ia1;
1161  co3 = pCoef[ia3 * 2u];
1162  si3 = pCoef[(ia3 * 2u) + 1u];
1163 
1164  /* xd' = (xa-yb-xc+yd)co3 - (ya+xb-yc-xd)(si3) */
1165  *pSi3++ = (((int32_t) (((q63_t) r2 * co3) >> 32)) -
1166  ((int32_t) (((q63_t) s2 * si3) >> 32))) << 1u;
1167 
1168  /* yd' = (ya+xb-yc-xd)co3 + (xa-yb-xc+yd)(si3) */
1169  *pSi3++ = (((int32_t) (((q63_t) s2 * co3) >> 32)) +
1170  ((int32_t) (((q63_t) r2 * si3) >> 32))) << 1u;
1171 
1172  /* Twiddle coefficients index modifier */
1173  ia1 = ia1 + twidCoefModifier;
1174 
1175  } while(--j);
1176 
1177  /* data is in 5.27(q27) format */
1178  /* each stage provides two down scaling of the input */
1179 
1180 
1181  /* Start of Middle stages process */
1182 
1183  twidCoefModifier <<= 2u;
1184 
1185  /* Calculation of second stage to excluding last stage */
1186  for (k = fftLen / 4u; k > 4u; k >>= 2u)
1187  {
1188  /* Initializations for the first stage */
1189  n1 = n2;
1190  n2 >>= 2u;
1191  ia1 = 0u;
1192 
1193  for (j = 0; j <= (n2 - 1u); j++)
1194  {
1195  /* index calculation for the coefficients */
1196  ia2 = ia1 + ia1;
1197  ia3 = ia2 + ia1;
1198  co1 = pCoef[ia1 * 2u];
1199  si1 = pCoef[(ia1 * 2u) + 1u];
1200  co2 = pCoef[ia2 * 2u];
1201  si2 = pCoef[(ia2 * 2u) + 1u];
1202  co3 = pCoef[ia3 * 2u];
1203  si3 = pCoef[(ia3 * 2u) + 1u];
1204  /* Twiddle coefficients index modifier */
1205  ia1 = ia1 + twidCoefModifier;
1206 
1207  pSi0 = pSrc + 2 * j;
1208  pSi1 = pSi0 + 2 * n2;
1209  pSi2 = pSi1 + 2 * n2;
1210  pSi3 = pSi2 + 2 * n2;
1211 
1212  for (i0 = j; i0 < fftLen; i0 += n1)
1213  {
1214  /* Butterfly implementation */
1215  /* xa + xc */
1216  r1 = pSi0[0] + pSi2[0];
1217 
1218  /* xa - xc */
1219  r2 = pSi0[0] - pSi2[0];
1220 
1221 
1222  /* ya + yc */
1223  s1 = pSi0[1] + pSi2[1];
1224 
1225  /* ya - yc */
1226  s2 = pSi0[1] - pSi2[1];
1227 
1228 
1229  /* xb + xd */
1230  t1 = pSi1[0] + pSi3[0];
1231 
1232 
1233  /* xa' = xa + xb + xc + xd */
1234  pSi0[0] = (r1 + t1) >> 2u;
1235  /* xa + xc -(xb + xd) */
1236  r1 = r1 - t1;
1237  /* yb + yd */
1238  t2 = pSi1[1] + pSi3[1];
1239 
1240  /* ya' = ya + yb + yc + yd */
1241  pSi0[1] = (s1 + t2) >> 2u;
1242  pSi0 += 2 * n1;
1243 
1244  /* (ya + yc) - (yb + yd) */
1245  s1 = s1 - t2;
1246 
1247  /* (yb - yd) */
1248  t1 = pSi1[1] - pSi3[1];
1249 
1250  /* (xb - xd) */
1251  t2 = pSi1[0] - pSi3[0];
1252 
1253 
1254  /* xc' = (xa-xb+xc-xd)co2 - (ya-yb+yc-yd)(si2) */
1255  pSi1[0] = (((int32_t) (((q63_t) r1 * co2) >> 32u)) -
1256  ((int32_t) (((q63_t) s1 * si2) >> 32u))) >> 1u;
1257 
1258  /* yc' = (ya-yb+yc-yd)co2 + (xa-xb+xc-xd)(si2) */
1259  pSi1[1] =
1260 
1261  (((int32_t) (((q63_t) s1 * co2) >> 32u)) +
1262  ((int32_t) (((q63_t) r1 * si2) >> 32u))) >> 1u;
1263  pSi1 += 2 * n1;
1264 
1265  /* (xa - xc) - (yb - yd) */
1266  r1 = r2 - t1;
1267  /* (xa - xc) + (yb - yd) */
1268  r2 = r2 + t1;
1269 
1270  /* (ya - yc) + (xb - xd) */
1271  s1 = s2 + t2;
1272  /* (ya - yc) - (xb - xd) */
1273  s2 = s2 - t2;
1274 
1275  /* xb' = (xa+yb-xc-yd)co1 - (ya-xb-yc+xd)(si1) */
1276  pSi2[0] = (((int32_t) (((q63_t) r1 * co1) >> 32)) -
1277  ((int32_t) (((q63_t) s1 * si1) >> 32))) >> 1u;
1278 
1279  /* yb' = (ya-xb-yc+xd)co1 + (xa+yb-xc-yd)(si1) */
1280  pSi2[1] = (((int32_t) (((q63_t) s1 * co1) >> 32)) +
1281  ((int32_t) (((q63_t) r1 * si1) >> 32))) >> 1u;
1282  pSi2 += 2 * n1;
1283 
1284  /* xd' = (xa-yb-xc+yd)co3 - (ya+xb-yc-xd)(si3) */
1285  pSi3[0] = (((int32_t) (((q63_t) r2 * co3) >> 32)) -
1286  ((int32_t) (((q63_t) s2 * si3) >> 32))) >> 1u;
1287 
1288  /* yd' = (ya+xb-yc-xd)co3 + (xa-yb-xc+yd)(si3) */
1289  pSi3[1] = (((int32_t) (((q63_t) s2 * co3) >> 32)) +
1290  ((int32_t) (((q63_t) r2 * si3) >> 32))) >> 1u;
1291  pSi3 += 2 * n1;
1292  }
1293  }
1294  twidCoefModifier <<= 2u;
1295  }
1296 #endif
1297 
1298  /* End of Middle stages process */
1299 
1300  /* data is in 11.21(q21) format for the 1024 point as there are 3 middle stages */
1301  /* data is in 9.23(q23) format for the 256 point as there are 2 middle stages */
1302  /* data is in 7.25(q25) format for the 64 point as there are 1 middle stage */
1303  /* data is in 5.27(q27) format for the 16 point as there are no middle stages */
1304 
1305 
1306  /* Start of last stage process */
1307 
1308 
1309  /* Initializations for the last stage */
1310  j = fftLen >> 2;
1311  ptr1 = &pSrc[0];
1312 
1313  /* Calculations of last stage */
1314  do
1315  {
1316 #ifndef ARM_MATH_BIG_ENDIAN
1317  /* Read xa (real), ya(imag) input */
1318  xaya = *__SIMD64(ptr1)++;
1319  xa = (q31_t) xaya;
1320  ya = (q31_t) (xaya >> 32);
1321 
1322  /* Read xb (real), yb(imag) input */
1323  xbyb = *__SIMD64(ptr1)++;
1324  xb = (q31_t) xbyb;
1325  yb = (q31_t) (xbyb >> 32);
1326 
1327  /* Read xc (real), yc(imag) input */
1328  xcyc = *__SIMD64(ptr1)++;
1329  xc = (q31_t) xcyc;
1330  yc = (q31_t) (xcyc >> 32);
1331 
1332  /* Read xc (real), yc(imag) input */
1333  xdyd = *__SIMD64(ptr1)++;
1334  xd = (q31_t) xdyd;
1335  yd = (q31_t) (xdyd >> 32);
1336 
1337 #else
1338 
1339  /* Read xa (real), ya(imag) input */
1340  xaya = *__SIMD64(ptr1)++;
1341  ya = (q31_t) xaya;
1342  xa = (q31_t) (xaya >> 32);
1343 
1344  /* Read xb (real), yb(imag) input */
1345  xbyb = *__SIMD64(ptr1)++;
1346  yb = (q31_t) xbyb;
1347  xb = (q31_t) (xbyb >> 32);
1348 
1349  /* Read xc (real), yc(imag) input */
1350  xcyc = *__SIMD64(ptr1)++;
1351  yc = (q31_t) xcyc;
1352  xc = (q31_t) (xcyc >> 32);
1353 
1354  /* Read xc (real), yc(imag) input */
1355  xdyd = *__SIMD64(ptr1)++;
1356  yd = (q31_t) xdyd;
1357  xd = (q31_t) (xdyd >> 32);
1358 
1359 
1360 #endif
1361 
1362  /* xa' = xa + xb + xc + xd */
1363  xa_out = xa + xb + xc + xd;
1364 
1365  /* ya' = ya + yb + yc + yd */
1366  ya_out = ya + yb + yc + yd;
1367 
1368  /* pointer updation for writing */
1369  ptr1 = ptr1 - 8u;
1370 
1371  /* writing xa' and ya' */
1372  *ptr1++ = xa_out;
1373  *ptr1++ = ya_out;
1374 
1375  xc_out = (xa - xb + xc - xd);
1376  yc_out = (ya - yb + yc - yd);
1377 
1378  /* writing xc' and yc' */
1379  *ptr1++ = xc_out;
1380  *ptr1++ = yc_out;
1381 
1382  xb_out = (xa - yb - xc + yd);
1383  yb_out = (ya + xb - yc - xd);
1384 
1385  /* writing xb' and yb' */
1386  *ptr1++ = xb_out;
1387  *ptr1++ = yb_out;
1388 
1389  xd_out = (xa + yb - xc - yd);
1390  yd_out = (ya - xb - yc + xd);
1391 
1392  /* writing xd' and yd' */
1393  *ptr1++ = xd_out;
1394  *ptr1++ = yd_out;
1395 
1396  } while(--j);
1397 
1398  /* output is in 11.21(q21) format for the 1024 point */
1399  /* output is in 9.23(q23) format for the 256 point */
1400  /* output is in 7.25(q25) format for the 64 point */
1401  /* output is in 5.27(q27) format for the 16 point */
1402 
1403  /* End of last stage process */
1404 }
void arm_radix4_butterfly_q31(q31_t *pSrc, uint32_t fftLen, q31_t *pCoef, uint32_t twidCoefModifier)
Core function for the Q31 CFFT butterfly process.
int64_t q63_t
64-bit fractional data type in 1.63 format.
Definition: arm_math.h:402
void arm_radix4_butterfly_inverse_q31(q31_t *pSrc, uint32_t fftLen, q31_t *pCoef, uint32_t twidCoefModifier)
Core function for the Q31 CIFFT butterfly process.
Instance structure for the Q31 CFFT/CIFFT function.
Definition: arm_math.h:2027
#define __SIMD64(addr)
Definition: arm_math.h:448
void arm_cfft_radix4_q31(const arm_cfft_radix4_instance_q31 *S, q31_t *pSrc)
Processing function for the Q31 CFFT/CIFFT.
int32_t q31_t
32-bit fractional data type in 1.31 format.
Definition: arm_math.h:397
void arm_bitreversal_q31(q31_t *pSrc, uint32_t fftLen, uint16_t bitRevFactor, uint16_t *pBitRevTab)