STM32F769IDiscovery  1.00
uDANTE Audio Networking with STM32F7 DISCO board
arm_cfft_f32.c
Go to the documentation of this file.
1 /* ----------------------------------------------------------------------
2 * Copyright (C) 2010-2014 ARM Limited. All rights reserved.
3 *
4 * $Date: 19. March 2015
5 * $Revision: V.1.4.5
6 *
7 * Project: CMSIS DSP Library
8 * Title: arm_cfft_f32.c
9 *
10 * Description: Combined Radix Decimation in Frequency CFFT Floating point processing function
11 *
12 * Target Processor: Cortex-M4/Cortex-M3/Cortex-M0
13 *
14 * Redistribution and use in source and binary forms, with or without
15 * modification, are permitted provided that the following conditions
16 * are met:
17 * - Redistributions of source code must retain the above copyright
18 * notice, this list of conditions and the following disclaimer.
19 * - Redistributions in binary form must reproduce the above copyright
20 * notice, this list of conditions and the following disclaimer in
21 * the documentation and/or other materials provided with the
22 * distribution.
23 * - Neither the name of ARM LIMITED nor the names of its contributors
24 * may be used to endorse or promote products derived from this
25 * software without specific prior written permission.
26 *
27 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
28 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
29 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
30 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
31 * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
32 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
33 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
34 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
35 * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
36 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
37 * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
38 * POSSIBILITY OF SUCH DAMAGE.
39 * -------------------------------------------------------------------- */
40 
41 #include "arm_math.h"
42 #include "arm_common_tables.h"
43 
44 extern void arm_radix8_butterfly_f32(
45  float32_t * pSrc,
46  uint16_t fftLen,
47  const float32_t * pCoef,
48  uint16_t twidCoefModifier);
49 
50 extern void arm_bitreversal_32(
51  uint32_t * pSrc,
52  const uint16_t bitRevLen,
53  const uint16_t * pBitRevTable);
54 
208 {
209  uint32_t L = S->fftLen;
210  float32_t * pCol1, * pCol2, * pMid1, * pMid2;
211  float32_t * p2 = p1 + L;
212  const float32_t * tw = (float32_t *) S->pTwiddle;
213  float32_t t1[4], t2[4], t3[4], t4[4], twR, twI;
214  float32_t m0, m1, m2, m3;
215  uint32_t l;
216 
217  pCol1 = p1;
218  pCol2 = p2;
219 
220  // Define new length
221  L >>= 1;
222  // Initialize mid pointers
223  pMid1 = p1 + L;
224  pMid2 = p2 + L;
225 
226  // do two dot Fourier transform
227  for ( l = L >> 2; l > 0; l-- )
228  {
229  t1[0] = p1[0];
230  t1[1] = p1[1];
231  t1[2] = p1[2];
232  t1[3] = p1[3];
233 
234  t2[0] = p2[0];
235  t2[1] = p2[1];
236  t2[2] = p2[2];
237  t2[3] = p2[3];
238 
239  t3[0] = pMid1[0];
240  t3[1] = pMid1[1];
241  t3[2] = pMid1[2];
242  t3[3] = pMid1[3];
243 
244  t4[0] = pMid2[0];
245  t4[1] = pMid2[1];
246  t4[2] = pMid2[2];
247  t4[3] = pMid2[3];
248 
249  *p1++ = t1[0] + t2[0];
250  *p1++ = t1[1] + t2[1];
251  *p1++ = t1[2] + t2[2];
252  *p1++ = t1[3] + t2[3]; // col 1
253 
254  t2[0] = t1[0] - t2[0];
255  t2[1] = t1[1] - t2[1];
256  t2[2] = t1[2] - t2[2];
257  t2[3] = t1[3] - t2[3]; // for col 2
258 
259  *pMid1++ = t3[0] + t4[0];
260  *pMid1++ = t3[1] + t4[1];
261  *pMid1++ = t3[2] + t4[2];
262  *pMid1++ = t3[3] + t4[3]; // col 1
263 
264  t4[0] = t4[0] - t3[0];
265  t4[1] = t4[1] - t3[1];
266  t4[2] = t4[2] - t3[2];
267  t4[3] = t4[3] - t3[3]; // for col 2
268 
269  twR = *tw++;
270  twI = *tw++;
271 
272  // multiply by twiddle factors
273  m0 = t2[0] * twR;
274  m1 = t2[1] * twI;
275  m2 = t2[1] * twR;
276  m3 = t2[0] * twI;
277 
278  // R = R * Tr - I * Ti
279  *p2++ = m0 + m1;
280  // I = I * Tr + R * Ti
281  *p2++ = m2 - m3;
282 
283  // use vertical symmetry
284  // 0.9988 - 0.0491i <==> -0.0491 - 0.9988i
285  m0 = t4[0] * twI;
286  m1 = t4[1] * twR;
287  m2 = t4[1] * twI;
288  m3 = t4[0] * twR;
289 
290  *pMid2++ = m0 - m1;
291  *pMid2++ = m2 + m3;
292 
293  twR = *tw++;
294  twI = *tw++;
295 
296  m0 = t2[2] * twR;
297  m1 = t2[3] * twI;
298  m2 = t2[3] * twR;
299  m3 = t2[2] * twI;
300 
301  *p2++ = m0 + m1;
302  *p2++ = m2 - m3;
303 
304  m0 = t4[2] * twI;
305  m1 = t4[3] * twR;
306  m2 = t4[3] * twI;
307  m3 = t4[2] * twR;
308 
309  *pMid2++ = m0 - m1;
310  *pMid2++ = m2 + m3;
311  }
312 
313  // first col
314  arm_radix8_butterfly_f32( pCol1, L, (float32_t *) S->pTwiddle, 2u);
315  // second col
316  arm_radix8_butterfly_f32( pCol2, L, (float32_t *) S->pTwiddle, 2u);
317 }
318 
320 {
321  uint32_t L = S->fftLen >> 1;
322  float32_t * pCol1, *pCol2, *pCol3, *pCol4, *pEnd1, *pEnd2, *pEnd3, *pEnd4;
323  const float32_t *tw2, *tw3, *tw4;
324  float32_t * p2 = p1 + L;
325  float32_t * p3 = p2 + L;
326  float32_t * p4 = p3 + L;
327  float32_t t2[4], t3[4], t4[4], twR, twI;
328  float32_t p1ap3_0, p1sp3_0, p1ap3_1, p1sp3_1;
329  float32_t m0, m1, m2, m3;
330  uint32_t l, twMod2, twMod3, twMod4;
331 
332  pCol1 = p1; // points to real values by default
333  pCol2 = p2;
334  pCol3 = p3;
335  pCol4 = p4;
336  pEnd1 = p2 - 1; // points to imaginary values by default
337  pEnd2 = p3 - 1;
338  pEnd3 = p4 - 1;
339  pEnd4 = pEnd3 + L;
340 
341  tw2 = tw3 = tw4 = (float32_t *) S->pTwiddle;
342 
343  L >>= 1;
344 
345  // do four dot Fourier transform
346 
347  twMod2 = 2;
348  twMod3 = 4;
349  twMod4 = 6;
350 
351  // TOP
352  p1ap3_0 = p1[0] + p3[0];
353  p1sp3_0 = p1[0] - p3[0];
354  p1ap3_1 = p1[1] + p3[1];
355  p1sp3_1 = p1[1] - p3[1];
356 
357  // col 2
358  t2[0] = p1sp3_0 + p2[1] - p4[1];
359  t2[1] = p1sp3_1 - p2[0] + p4[0];
360  // col 3
361  t3[0] = p1ap3_0 - p2[0] - p4[0];
362  t3[1] = p1ap3_1 - p2[1] - p4[1];
363  // col 4
364  t4[0] = p1sp3_0 - p2[1] + p4[1];
365  t4[1] = p1sp3_1 + p2[0] - p4[0];
366  // col 1
367  *p1++ = p1ap3_0 + p2[0] + p4[0];
368  *p1++ = p1ap3_1 + p2[1] + p4[1];
369 
370  // Twiddle factors are ones
371  *p2++ = t2[0];
372  *p2++ = t2[1];
373  *p3++ = t3[0];
374  *p3++ = t3[1];
375  *p4++ = t4[0];
376  *p4++ = t4[1];
377 
378  tw2 += twMod2;
379  tw3 += twMod3;
380  tw4 += twMod4;
381 
382  for (l = (L - 2) >> 1; l > 0; l-- )
383  {
384  // TOP
385  p1ap3_0 = p1[0] + p3[0];
386  p1sp3_0 = p1[0] - p3[0];
387  p1ap3_1 = p1[1] + p3[1];
388  p1sp3_1 = p1[1] - p3[1];
389  // col 2
390  t2[0] = p1sp3_0 + p2[1] - p4[1];
391  t2[1] = p1sp3_1 - p2[0] + p4[0];
392  // col 3
393  t3[0] = p1ap3_0 - p2[0] - p4[0];
394  t3[1] = p1ap3_1 - p2[1] - p4[1];
395  // col 4
396  t4[0] = p1sp3_0 - p2[1] + p4[1];
397  t4[1] = p1sp3_1 + p2[0] - p4[0];
398  // col 1 - top
399  *p1++ = p1ap3_0 + p2[0] + p4[0];
400  *p1++ = p1ap3_1 + p2[1] + p4[1];
401 
402  // BOTTOM
403  p1ap3_1 = pEnd1[-1] + pEnd3[-1];
404  p1sp3_1 = pEnd1[-1] - pEnd3[-1];
405  p1ap3_0 = pEnd1[0] + pEnd3[0];
406  p1sp3_0 = pEnd1[0] - pEnd3[0];
407  // col 2
408  t2[2] = pEnd2[0] - pEnd4[0] + p1sp3_1;
409  t2[3] = pEnd1[0] - pEnd3[0] - pEnd2[-1] + pEnd4[-1];
410  // col 3
411  t3[2] = p1ap3_1 - pEnd2[-1] - pEnd4[-1];
412  t3[3] = p1ap3_0 - pEnd2[0] - pEnd4[0];
413  // col 4
414  t4[2] = pEnd2[0] - pEnd4[0] - p1sp3_1;
415  t4[3] = pEnd4[-1] - pEnd2[-1] - p1sp3_0;
416  // col 1 - Bottom
417  *pEnd1-- = p1ap3_0 + pEnd2[0] + pEnd4[0];
418  *pEnd1-- = p1ap3_1 + pEnd2[-1] + pEnd4[-1];
419 
420  // COL 2
421  // read twiddle factors
422  twR = *tw2++;
423  twI = *tw2++;
424  // multiply by twiddle factors
425  // let Z1 = a + i(b), Z2 = c + i(d)
426  // => Z1 * Z2 = (a*c - b*d) + i(b*c + a*d)
427 
428  // Top
429  m0 = t2[0] * twR;
430  m1 = t2[1] * twI;
431  m2 = t2[1] * twR;
432  m3 = t2[0] * twI;
433 
434  *p2++ = m0 + m1;
435  *p2++ = m2 - m3;
436  // use vertical symmetry col 2
437  // 0.9997 - 0.0245i <==> 0.0245 - 0.9997i
438  // Bottom
439  m0 = t2[3] * twI;
440  m1 = t2[2] * twR;
441  m2 = t2[2] * twI;
442  m3 = t2[3] * twR;
443 
444  *pEnd2-- = m0 - m1;
445  *pEnd2-- = m2 + m3;
446 
447  // COL 3
448  twR = tw3[0];
449  twI = tw3[1];
450  tw3 += twMod3;
451  // Top
452  m0 = t3[0] * twR;
453  m1 = t3[1] * twI;
454  m2 = t3[1] * twR;
455  m3 = t3[0] * twI;
456 
457  *p3++ = m0 + m1;
458  *p3++ = m2 - m3;
459  // use vertical symmetry col 3
460  // 0.9988 - 0.0491i <==> -0.9988 - 0.0491i
461  // Bottom
462  m0 = -t3[3] * twR;
463  m1 = t3[2] * twI;
464  m2 = t3[2] * twR;
465  m3 = t3[3] * twI;
466 
467  *pEnd3-- = m0 - m1;
468  *pEnd3-- = m3 - m2;
469 
470  // COL 4
471  twR = tw4[0];
472  twI = tw4[1];
473  tw4 += twMod4;
474  // Top
475  m0 = t4[0] * twR;
476  m1 = t4[1] * twI;
477  m2 = t4[1] * twR;
478  m3 = t4[0] * twI;
479 
480  *p4++ = m0 + m1;
481  *p4++ = m2 - m3;
482  // use vertical symmetry col 4
483  // 0.9973 - 0.0736i <==> -0.0736 + 0.9973i
484  // Bottom
485  m0 = t4[3] * twI;
486  m1 = t4[2] * twR;
487  m2 = t4[2] * twI;
488  m3 = t4[3] * twR;
489 
490  *pEnd4-- = m0 - m1;
491  *pEnd4-- = m2 + m3;
492  }
493 
494  //MIDDLE
495  // Twiddle factors are
496  // 1.0000 0.7071-0.7071i -1.0000i -0.7071-0.7071i
497  p1ap3_0 = p1[0] + p3[0];
498  p1sp3_0 = p1[0] - p3[0];
499  p1ap3_1 = p1[1] + p3[1];
500  p1sp3_1 = p1[1] - p3[1];
501 
502  // col 2
503  t2[0] = p1sp3_0 + p2[1] - p4[1];
504  t2[1] = p1sp3_1 - p2[0] + p4[0];
505  // col 3
506  t3[0] = p1ap3_0 - p2[0] - p4[0];
507  t3[1] = p1ap3_1 - p2[1] - p4[1];
508  // col 4
509  t4[0] = p1sp3_0 - p2[1] + p4[1];
510  t4[1] = p1sp3_1 + p2[0] - p4[0];
511  // col 1 - Top
512  *p1++ = p1ap3_0 + p2[0] + p4[0];
513  *p1++ = p1ap3_1 + p2[1] + p4[1];
514 
515  // COL 2
516  twR = tw2[0];
517  twI = tw2[1];
518 
519  m0 = t2[0] * twR;
520  m1 = t2[1] * twI;
521  m2 = t2[1] * twR;
522  m3 = t2[0] * twI;
523 
524  *p2++ = m0 + m1;
525  *p2++ = m2 - m3;
526  // COL 3
527  twR = tw3[0];
528  twI = tw3[1];
529 
530  m0 = t3[0] * twR;
531  m1 = t3[1] * twI;
532  m2 = t3[1] * twR;
533  m3 = t3[0] * twI;
534 
535  *p3++ = m0 + m1;
536  *p3++ = m2 - m3;
537  // COL 4
538  twR = tw4[0];
539  twI = tw4[1];
540 
541  m0 = t4[0] * twR;
542  m1 = t4[1] * twI;
543  m2 = t4[1] * twR;
544  m3 = t4[0] * twI;
545 
546  *p4++ = m0 + m1;
547  *p4++ = m2 - m3;
548 
549  // first col
550  arm_radix8_butterfly_f32( pCol1, L, (float32_t *) S->pTwiddle, 4u);
551  // second col
552  arm_radix8_butterfly_f32( pCol2, L, (float32_t *) S->pTwiddle, 4u);
553  // third col
554  arm_radix8_butterfly_f32( pCol3, L, (float32_t *) S->pTwiddle, 4u);
555  // fourth col
556  arm_radix8_butterfly_f32( pCol4, L, (float32_t *) S->pTwiddle, 4u);
557 }
558 
575  const arm_cfft_instance_f32 * S,
576  float32_t * p1,
577  uint8_t ifftFlag,
578  uint8_t bitReverseFlag)
579 {
580  uint32_t L = S->fftLen, l;
581  float32_t invL, * pSrc;
582 
583  if(ifftFlag == 1u)
584  {
585  /* Conjugate input data */
586  pSrc = p1 + 1;
587  for(l=0; l<L; l++)
588  {
589  *pSrc = -*pSrc;
590  pSrc += 2;
591  }
592  }
593 
594  switch (L)
595  {
596  case 16:
597  case 128:
598  case 1024:
600  break;
601  case 32:
602  case 256:
603  case 2048:
605  break;
606  case 64:
607  case 512:
608  case 4096:
609  arm_radix8_butterfly_f32( p1, L, (float32_t *) S->pTwiddle, 1);
610  break;
611  }
612 
613  if( bitReverseFlag )
614  arm_bitreversal_32((uint32_t*)p1,S->bitRevLength,S->pBitRevTable);
615 
616  if(ifftFlag == 1u)
617  {
618  invL = 1.0f/(float32_t)L;
619  /* Conjugate and scale output data */
620  pSrc = p1;
621  for(l=0; l<L; l++)
622  {
623  *pSrc++ *= invL ;
624  *pSrc = -(*pSrc) * invL;
625  pSrc++;
626  }
627  }
628 }
629 
void arm_cfft_f32(const arm_cfft_instance_f32 *S, float32_t *p1, uint8_t ifftFlag, uint8_t bitReverseFlag)
Processing function for the floating-point complex FFT.
Definition: arm_cfft_f32.c:574
float float32_t
32-bit floating-point type definition.
Definition: arm_math.h:407
const uint16_t * pBitRevTable
Definition: arm_math.h:2145
Instance structure for the floating-point CFFT/CIFFT function.
Definition: arm_math.h:2141
void arm_cfft_radix8by2_f32(arm_cfft_instance_f32 *S, float32_t *p1)
Definition: arm_cfft_f32.c:207
void arm_bitreversal_32(uint32_t *pSrc, const uint16_t bitRevLen, const uint16_t *pBitRevTable)
void arm_cfft_radix8by4_f32(arm_cfft_instance_f32 *S, float32_t *p1)
Definition: arm_cfft_f32.c:319
void arm_radix8_butterfly_f32(float32_t *pSrc, uint16_t fftLen, const float32_t *pCoef, uint16_t twidCoefModifier)
const float32_t * pTwiddle
Definition: arm_math.h:2144
uint32_t ifftFlag
Definition: FFT.c:112