STM32F769IDiscovery  1.00
uDANTE Audio Networking with STM32F7 DISCO board
arm_fir_f32.c
Go to the documentation of this file.
1 /* ----------------------------------------------------------------------
2 * Copyright (C) 2010-2014 ARM Limited. All rights reserved.
3 *
4 * $Date: 19. March 2015
5 * $Revision: V.1.4.5
6 *
7 * Project: CMSIS DSP Library
8 * Title: arm_fir_f32.c
9 *
10 * Description: Floating-point FIR filter processing function.
11 *
12 * Target Processor: Cortex-M4/Cortex-M3/Cortex-M0
13 *
14 * Redistribution and use in source and binary forms, with or without
15 * modification, are permitted provided that the following conditions
16 * are met:
17 * - Redistributions of source code must retain the above copyright
18 * notice, this list of conditions and the following disclaimer.
19 * - Redistributions in binary form must reproduce the above copyright
20 * notice, this list of conditions and the following disclaimer in
21 * the documentation and/or other materials provided with the
22 * distribution.
23 * - Neither the name of ARM LIMITED nor the names of its contributors
24 * may be used to endorse or promote products derived from this
25 * software without specific prior written permission.
26 *
27 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
28 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
29 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
30 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
31 * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
32 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
33 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
34 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
35 * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
36 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
37 * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
38 * POSSIBILITY OF SUCH DAMAGE.
39 * -------------------------------------------------------------------- */
40 
41 #include "arm_math.h"
42 
134 #if defined(ARM_MATH_CM7)
135 
136 void arm_fir_f32(
137 const arm_fir_instance_f32 * S,
138 float32_t * pSrc,
139 float32_t * pDst,
140 uint32_t blockSize)
141 {
142  float32_t *pState = S->pState; /* State pointer */
143  float32_t *pCoeffs = S->pCoeffs; /* Coefficient pointer */
144  float32_t *pStateCurnt; /* Points to the current sample of the state */
145  float32_t *px, *pb; /* Temporary pointers for state and coefficient buffers */
146  float32_t acc0, acc1, acc2, acc3, acc4, acc5, acc6, acc7; /* Accumulators */
147  float32_t x0, x1, x2, x3, x4, x5, x6, x7, c0; /* Temporary variables to hold state and coefficient values */
148  uint32_t numTaps = S->numTaps; /* Number of filter coefficients in the filter */
149  uint32_t i, tapCnt, blkCnt; /* Loop counters */
150 
151  /* S->pState points to state array which contains previous frame (numTaps - 1) samples */
152  /* pStateCurnt points to the location where the new input data should be written */
153  pStateCurnt = &(S->pState[(numTaps - 1u)]);
154 
155  /* Apply loop unrolling and compute 8 output values simultaneously.
156  * The variables acc0 ... acc7 hold output values that are being computed:
157  *
158  * acc0 = b[numTaps-1] * x[n-numTaps-1] + b[numTaps-2] * x[n-numTaps-2] + b[numTaps-3] * x[n-numTaps-3] +...+ b[0] * x[0]
159  * acc1 = b[numTaps-1] * x[n-numTaps] + b[numTaps-2] * x[n-numTaps-1] + b[numTaps-3] * x[n-numTaps-2] +...+ b[0] * x[1]
160  * acc2 = b[numTaps-1] * x[n-numTaps+1] + b[numTaps-2] * x[n-numTaps] + b[numTaps-3] * x[n-numTaps-1] +...+ b[0] * x[2]
161  * acc3 = b[numTaps-1] * x[n-numTaps+2] + b[numTaps-2] * x[n-numTaps+1] + b[numTaps-3] * x[n-numTaps] +...+ b[0] * x[3]
162  */
163  blkCnt = blockSize >> 3;
164 
165  /* First part of the processing with loop unrolling. Compute 8 outputs at a time.
166  ** a second loop below computes the remaining 1 to 7 samples. */
167  while(blkCnt > 0u)
168  {
169  /* Copy four new input samples into the state buffer */
170  *pStateCurnt++ = *pSrc++;
171  *pStateCurnt++ = *pSrc++;
172  *pStateCurnt++ = *pSrc++;
173  *pStateCurnt++ = *pSrc++;
174 
175  /* Set all accumulators to zero */
176  acc0 = 0.0f;
177  acc1 = 0.0f;
178  acc2 = 0.0f;
179  acc3 = 0.0f;
180  acc4 = 0.0f;
181  acc5 = 0.0f;
182  acc6 = 0.0f;
183  acc7 = 0.0f;
184 
185  /* Initialize state pointer */
186  px = pState;
187 
188  /* Initialize coeff pointer */
189  pb = (pCoeffs);
190 
191  /* This is separated from the others to avoid
192  * a call to __aeabi_memmove which would be slower
193  */
194  *pStateCurnt++ = *pSrc++;
195  *pStateCurnt++ = *pSrc++;
196  *pStateCurnt++ = *pSrc++;
197  *pStateCurnt++ = *pSrc++;
198 
199  /* Read the first seven samples from the state buffer: x[n-numTaps], x[n-numTaps-1], x[n-numTaps-2] */
200  x0 = *px++;
201  x1 = *px++;
202  x2 = *px++;
203  x3 = *px++;
204  x4 = *px++;
205  x5 = *px++;
206  x6 = *px++;
207 
208  /* Loop unrolling. Process 8 taps at a time. */
209  tapCnt = numTaps >> 3u;
210 
211  /* Loop over the number of taps. Unroll by a factor of 8.
212  ** Repeat until we've computed numTaps-8 coefficients. */
213  while(tapCnt > 0u)
214  {
215  /* Read the b[numTaps-1] coefficient */
216  c0 = *(pb++);
217 
218  /* Read x[n-numTaps-3] sample */
219  x7 = *(px++);
220 
221  /* acc0 += b[numTaps-1] * x[n-numTaps] */
222  acc0 += x0 * c0;
223 
224  /* acc1 += b[numTaps-1] * x[n-numTaps-1] */
225  acc1 += x1 * c0;
226 
227  /* acc2 += b[numTaps-1] * x[n-numTaps-2] */
228  acc2 += x2 * c0;
229 
230  /* acc3 += b[numTaps-1] * x[n-numTaps-3] */
231  acc3 += x3 * c0;
232 
233  /* acc4 += b[numTaps-1] * x[n-numTaps-4] */
234  acc4 += x4 * c0;
235 
236  /* acc1 += b[numTaps-1] * x[n-numTaps-5] */
237  acc5 += x5 * c0;
238 
239  /* acc2 += b[numTaps-1] * x[n-numTaps-6] */
240  acc6 += x6 * c0;
241 
242  /* acc3 += b[numTaps-1] * x[n-numTaps-7] */
243  acc7 += x7 * c0;
244 
245  /* Read the b[numTaps-2] coefficient */
246  c0 = *(pb++);
247 
248  /* Read x[n-numTaps-4] sample */
249  x0 = *(px++);
250 
251  /* Perform the multiply-accumulate */
252  acc0 += x1 * c0;
253  acc1 += x2 * c0;
254  acc2 += x3 * c0;
255  acc3 += x4 * c0;
256  acc4 += x5 * c0;
257  acc5 += x6 * c0;
258  acc6 += x7 * c0;
259  acc7 += x0 * c0;
260 
261  /* Read the b[numTaps-3] coefficient */
262  c0 = *(pb++);
263 
264  /* Read x[n-numTaps-5] sample */
265  x1 = *(px++);
266 
267  /* Perform the multiply-accumulates */
268  acc0 += x2 * c0;
269  acc1 += x3 * c0;
270  acc2 += x4 * c0;
271  acc3 += x5 * c0;
272  acc4 += x6 * c0;
273  acc5 += x7 * c0;
274  acc6 += x0 * c0;
275  acc7 += x1 * c0;
276 
277  /* Read the b[numTaps-4] coefficient */
278  c0 = *(pb++);
279 
280  /* Read x[n-numTaps-6] sample */
281  x2 = *(px++);
282 
283  /* Perform the multiply-accumulates */
284  acc0 += x3 * c0;
285  acc1 += x4 * c0;
286  acc2 += x5 * c0;
287  acc3 += x6 * c0;
288  acc4 += x7 * c0;
289  acc5 += x0 * c0;
290  acc6 += x1 * c0;
291  acc7 += x2 * c0;
292 
293  /* Read the b[numTaps-4] coefficient */
294  c0 = *(pb++);
295 
296  /* Read x[n-numTaps-6] sample */
297  x3 = *(px++);
298  /* Perform the multiply-accumulates */
299  acc0 += x4 * c0;
300  acc1 += x5 * c0;
301  acc2 += x6 * c0;
302  acc3 += x7 * c0;
303  acc4 += x0 * c0;
304  acc5 += x1 * c0;
305  acc6 += x2 * c0;
306  acc7 += x3 * c0;
307 
308  /* Read the b[numTaps-4] coefficient */
309  c0 = *(pb++);
310 
311  /* Read x[n-numTaps-6] sample */
312  x4 = *(px++);
313 
314  /* Perform the multiply-accumulates */
315  acc0 += x5 * c0;
316  acc1 += x6 * c0;
317  acc2 += x7 * c0;
318  acc3 += x0 * c0;
319  acc4 += x1 * c0;
320  acc5 += x2 * c0;
321  acc6 += x3 * c0;
322  acc7 += x4 * c0;
323 
324  /* Read the b[numTaps-4] coefficient */
325  c0 = *(pb++);
326 
327  /* Read x[n-numTaps-6] sample */
328  x5 = *(px++);
329 
330  /* Perform the multiply-accumulates */
331  acc0 += x6 * c0;
332  acc1 += x7 * c0;
333  acc2 += x0 * c0;
334  acc3 += x1 * c0;
335  acc4 += x2 * c0;
336  acc5 += x3 * c0;
337  acc6 += x4 * c0;
338  acc7 += x5 * c0;
339 
340  /* Read the b[numTaps-4] coefficient */
341  c0 = *(pb++);
342 
343  /* Read x[n-numTaps-6] sample */
344  x6 = *(px++);
345 
346  /* Perform the multiply-accumulates */
347  acc0 += x7 * c0;
348  acc1 += x0 * c0;
349  acc2 += x1 * c0;
350  acc3 += x2 * c0;
351  acc4 += x3 * c0;
352  acc5 += x4 * c0;
353  acc6 += x5 * c0;
354  acc7 += x6 * c0;
355 
356  tapCnt--;
357  }
358 
359  /* If the filter length is not a multiple of 8, compute the remaining filter taps */
360  tapCnt = numTaps % 0x8u;
361 
362  while(tapCnt > 0u)
363  {
364  /* Read coefficients */
365  c0 = *(pb++);
366 
367  /* Fetch 1 state variable */
368  x7 = *(px++);
369 
370  /* Perform the multiply-accumulates */
371  acc0 += x0 * c0;
372  acc1 += x1 * c0;
373  acc2 += x2 * c0;
374  acc3 += x3 * c0;
375  acc4 += x4 * c0;
376  acc5 += x5 * c0;
377  acc6 += x6 * c0;
378  acc7 += x7 * c0;
379 
380  /* Reuse the present sample states for next sample */
381  x0 = x1;
382  x1 = x2;
383  x2 = x3;
384  x3 = x4;
385  x4 = x5;
386  x5 = x6;
387  x6 = x7;
388 
389  /* Decrement the loop counter */
390  tapCnt--;
391  }
392 
393  /* Advance the state pointer by 8 to process the next group of 8 samples */
394  pState = pState + 8;
395 
396  /* The results in the 8 accumulators, store in the destination buffer. */
397  *pDst++ = acc0;
398  *pDst++ = acc1;
399  *pDst++ = acc2;
400  *pDst++ = acc3;
401  *pDst++ = acc4;
402  *pDst++ = acc5;
403  *pDst++ = acc6;
404  *pDst++ = acc7;
405 
406  blkCnt--;
407  }
408 
409  /* If the blockSize is not a multiple of 8, compute any remaining output samples here.
410  ** No loop unrolling is used. */
411  blkCnt = blockSize % 0x8u;
412 
413  while(blkCnt > 0u)
414  {
415  /* Copy one sample at a time into state buffer */
416  *pStateCurnt++ = *pSrc++;
417 
418  /* Set the accumulator to zero */
419  acc0 = 0.0f;
420 
421  /* Initialize state pointer */
422  px = pState;
423 
424  /* Initialize Coefficient pointer */
425  pb = (pCoeffs);
426 
427  i = numTaps;
428 
429  /* Perform the multiply-accumulates */
430  do
431  {
432  acc0 += *px++ * *pb++;
433  i--;
434 
435  } while(i > 0u);
436 
437  /* The result is store in the destination buffer. */
438  *pDst++ = acc0;
439 
440  /* Advance state pointer by 1 for the next sample */
441  pState = pState + 1;
442 
443  blkCnt--;
444  }
445 
446  /* Processing is complete.
447  ** Now copy the last numTaps - 1 samples to the start of the state buffer.
448  ** This prepares the state buffer for the next function call. */
449 
450  /* Points to the start of the state buffer */
451  pStateCurnt = S->pState;
452 
453  tapCnt = (numTaps - 1u) >> 2u;
454 
455  /* copy data */
456  while(tapCnt > 0u)
457  {
458  *pStateCurnt++ = *pState++;
459  *pStateCurnt++ = *pState++;
460  *pStateCurnt++ = *pState++;
461  *pStateCurnt++ = *pState++;
462 
463  /* Decrement the loop counter */
464  tapCnt--;
465  }
466 
467  /* Calculate remaining number of copies */
468  tapCnt = (numTaps - 1u) % 0x4u;
469 
470  /* Copy the remaining q31_t data */
471  while(tapCnt > 0u)
472  {
473  *pStateCurnt++ = *pState++;
474 
475  /* Decrement the loop counter */
476  tapCnt--;
477  }
478 }
479 
480 #elif defined(ARM_MATH_CM0_FAMILY)
481 
482 void arm_fir_f32(
483 const arm_fir_instance_f32 * S,
484 float32_t * pSrc,
485 float32_t * pDst,
486 uint32_t blockSize)
487 {
488  float32_t *pState = S->pState; /* State pointer */
489  float32_t *pCoeffs = S->pCoeffs; /* Coefficient pointer */
490  float32_t *pStateCurnt; /* Points to the current sample of the state */
491  float32_t *px, *pb; /* Temporary pointers for state and coefficient buffers */
492  uint32_t numTaps = S->numTaps; /* Number of filter coefficients in the filter */
493  uint32_t i, tapCnt, blkCnt; /* Loop counters */
494 
495  /* Run the below code for Cortex-M0 */
496 
497  float32_t acc;
498 
499  /* S->pState points to state array which contains previous frame (numTaps - 1) samples */
500  /* pStateCurnt points to the location where the new input data should be written */
501  pStateCurnt = &(S->pState[(numTaps - 1u)]);
502 
503  /* Initialize blkCnt with blockSize */
504  blkCnt = blockSize;
505 
506  while(blkCnt > 0u)
507  {
508  /* Copy one sample at a time into state buffer */
509  *pStateCurnt++ = *pSrc++;
510 
511  /* Set the accumulator to zero */
512  acc = 0.0f;
513 
514  /* Initialize state pointer */
515  px = pState;
516 
517  /* Initialize Coefficient pointer */
518  pb = pCoeffs;
519 
520  i = numTaps;
521 
522  /* Perform the multiply-accumulates */
523  do
524  {
525  /* acc = b[numTaps-1] * x[n-numTaps-1] + b[numTaps-2] * x[n-numTaps-2] + b[numTaps-3] * x[n-numTaps-3] +...+ b[0] * x[0] */
526  acc += *px++ * *pb++;
527  i--;
528 
529  } while(i > 0u);
530 
531  /* The result is store in the destination buffer. */
532  *pDst++ = acc;
533 
534  /* Advance state pointer by 1 for the next sample */
535  pState = pState + 1;
536 
537  blkCnt--;
538  }
539 
540  /* Processing is complete.
541  ** Now copy the last numTaps - 1 samples to the starting of the state buffer.
542  ** This prepares the state buffer for the next function call. */
543 
544  /* Points to the start of the state buffer */
545  pStateCurnt = S->pState;
546 
547  /* Copy numTaps number of values */
548  tapCnt = numTaps - 1u;
549 
550  /* Copy data */
551  while(tapCnt > 0u)
552  {
553  *pStateCurnt++ = *pState++;
554 
555  /* Decrement the loop counter */
556  tapCnt--;
557  }
558 
559 }
560 
561 #else
562 
563 /* Run the below code for Cortex-M4 and Cortex-M3 */
564 
566 const arm_fir_instance_f32 * S,
567 float32_t * pSrc,
568 float32_t * pDst,
569 uint32_t blockSize)
570 {
571  float32_t *pState = S->pState; /* State pointer */
572  float32_t *pCoeffs = S->pCoeffs; /* Coefficient pointer */
573  float32_t *pStateCurnt; /* Points to the current sample of the state */
574  float32_t *px, *pb; /* Temporary pointers for state and coefficient buffers */
575  float32_t acc0, acc1, acc2, acc3, acc4, acc5, acc6, acc7; /* Accumulators */
576  float32_t x0, x1, x2, x3, x4, x5, x6, x7, c0; /* Temporary variables to hold state and coefficient values */
577  uint32_t numTaps = S->numTaps; /* Number of filter coefficients in the filter */
578  uint32_t i, tapCnt, blkCnt; /* Loop counters */
579  float32_t p0,p1,p2,p3,p4,p5,p6,p7; /* Temporary product values */
580 
581  /* S->pState points to state array which contains previous frame (numTaps - 1) samples */
582  /* pStateCurnt points to the location where the new input data should be written */
583  pStateCurnt = &(S->pState[(numTaps - 1u)]);
584 
585  /* Apply loop unrolling and compute 8 output values simultaneously.
586  * The variables acc0 ... acc7 hold output values that are being computed:
587  *
588  * acc0 = b[numTaps-1] * x[n-numTaps-1] + b[numTaps-2] * x[n-numTaps-2] + b[numTaps-3] * x[n-numTaps-3] +...+ b[0] * x[0]
589  * acc1 = b[numTaps-1] * x[n-numTaps] + b[numTaps-2] * x[n-numTaps-1] + b[numTaps-3] * x[n-numTaps-2] +...+ b[0] * x[1]
590  * acc2 = b[numTaps-1] * x[n-numTaps+1] + b[numTaps-2] * x[n-numTaps] + b[numTaps-3] * x[n-numTaps-1] +...+ b[0] * x[2]
591  * acc3 = b[numTaps-1] * x[n-numTaps+2] + b[numTaps-2] * x[n-numTaps+1] + b[numTaps-3] * x[n-numTaps] +...+ b[0] * x[3]
592  */
593  blkCnt = blockSize >> 3;
594 
595  /* First part of the processing with loop unrolling. Compute 8 outputs at a time.
596  ** a second loop below computes the remaining 1 to 7 samples. */
597  while(blkCnt > 0u)
598  {
599  /* Copy four new input samples into the state buffer */
600  *pStateCurnt++ = *pSrc++;
601  *pStateCurnt++ = *pSrc++;
602  *pStateCurnt++ = *pSrc++;
603  *pStateCurnt++ = *pSrc++;
604 
605  /* Set all accumulators to zero */
606  acc0 = 0.0f;
607  acc1 = 0.0f;
608  acc2 = 0.0f;
609  acc3 = 0.0f;
610  acc4 = 0.0f;
611  acc5 = 0.0f;
612  acc6 = 0.0f;
613  acc7 = 0.0f;
614 
615  /* Initialize state pointer */
616  px = pState;
617 
618  /* Initialize coeff pointer */
619  pb = (pCoeffs);
620 
621  /* This is separated from the others to avoid
622  * a call to __aeabi_memmove which would be slower
623  */
624  *pStateCurnt++ = *pSrc++;
625  *pStateCurnt++ = *pSrc++;
626  *pStateCurnt++ = *pSrc++;
627  *pStateCurnt++ = *pSrc++;
628 
629  /* Read the first seven samples from the state buffer: x[n-numTaps], x[n-numTaps-1], x[n-numTaps-2] */
630  x0 = *px++;
631  x1 = *px++;
632  x2 = *px++;
633  x3 = *px++;
634  x4 = *px++;
635  x5 = *px++;
636  x6 = *px++;
637 
638  /* Loop unrolling. Process 8 taps at a time. */
639  tapCnt = numTaps >> 3u;
640 
641  /* Loop over the number of taps. Unroll by a factor of 8.
642  ** Repeat until we've computed numTaps-8 coefficients. */
643  while(tapCnt > 0u)
644  {
645  /* Read the b[numTaps-1] coefficient */
646  c0 = *(pb++);
647 
648  /* Read x[n-numTaps-3] sample */
649  x7 = *(px++);
650 
651  /* acc0 += b[numTaps-1] * x[n-numTaps] */
652  p0 = x0 * c0;
653 
654  /* acc1 += b[numTaps-1] * x[n-numTaps-1] */
655  p1 = x1 * c0;
656 
657  /* acc2 += b[numTaps-1] * x[n-numTaps-2] */
658  p2 = x2 * c0;
659 
660  /* acc3 += b[numTaps-1] * x[n-numTaps-3] */
661  p3 = x3 * c0;
662 
663  /* acc4 += b[numTaps-1] * x[n-numTaps-4] */
664  p4 = x4 * c0;
665 
666  /* acc1 += b[numTaps-1] * x[n-numTaps-5] */
667  p5 = x5 * c0;
668 
669  /* acc2 += b[numTaps-1] * x[n-numTaps-6] */
670  p6 = x6 * c0;
671 
672  /* acc3 += b[numTaps-1] * x[n-numTaps-7] */
673  p7 = x7 * c0;
674 
675  /* Read the b[numTaps-2] coefficient */
676  c0 = *(pb++);
677 
678  /* Read x[n-numTaps-4] sample */
679  x0 = *(px++);
680 
681  acc0 += p0;
682  acc1 += p1;
683  acc2 += p2;
684  acc3 += p3;
685  acc4 += p4;
686  acc5 += p5;
687  acc6 += p6;
688  acc7 += p7;
689 
690 
691  /* Perform the multiply-accumulate */
692  p0 = x1 * c0;
693  p1 = x2 * c0;
694  p2 = x3 * c0;
695  p3 = x4 * c0;
696  p4 = x5 * c0;
697  p5 = x6 * c0;
698  p6 = x7 * c0;
699  p7 = x0 * c0;
700 
701  /* Read the b[numTaps-3] coefficient */
702  c0 = *(pb++);
703 
704  /* Read x[n-numTaps-5] sample */
705  x1 = *(px++);
706 
707  acc0 += p0;
708  acc1 += p1;
709  acc2 += p2;
710  acc3 += p3;
711  acc4 += p4;
712  acc5 += p5;
713  acc6 += p6;
714  acc7 += p7;
715 
716  /* Perform the multiply-accumulates */
717  p0 = x2 * c0;
718  p1 = x3 * c0;
719  p2 = x4 * c0;
720  p3 = x5 * c0;
721  p4 = x6 * c0;
722  p5 = x7 * c0;
723  p6 = x0 * c0;
724  p7 = x1 * c0;
725 
726  /* Read the b[numTaps-4] coefficient */
727  c0 = *(pb++);
728 
729  /* Read x[n-numTaps-6] sample */
730  x2 = *(px++);
731 
732  acc0 += p0;
733  acc1 += p1;
734  acc2 += p2;
735  acc3 += p3;
736  acc4 += p4;
737  acc5 += p5;
738  acc6 += p6;
739  acc7 += p7;
740 
741  /* Perform the multiply-accumulates */
742  p0 = x3 * c0;
743  p1 = x4 * c0;
744  p2 = x5 * c0;
745  p3 = x6 * c0;
746  p4 = x7 * c0;
747  p5 = x0 * c0;
748  p6 = x1 * c0;
749  p7 = x2 * c0;
750 
751  /* Read the b[numTaps-4] coefficient */
752  c0 = *(pb++);
753 
754  /* Read x[n-numTaps-6] sample */
755  x3 = *(px++);
756 
757  acc0 += p0;
758  acc1 += p1;
759  acc2 += p2;
760  acc3 += p3;
761  acc4 += p4;
762  acc5 += p5;
763  acc6 += p6;
764  acc7 += p7;
765 
766  /* Perform the multiply-accumulates */
767  p0 = x4 * c0;
768  p1 = x5 * c0;
769  p2 = x6 * c0;
770  p3 = x7 * c0;
771  p4 = x0 * c0;
772  p5 = x1 * c0;
773  p6 = x2 * c0;
774  p7 = x3 * c0;
775 
776  /* Read the b[numTaps-4] coefficient */
777  c0 = *(pb++);
778 
779  /* Read x[n-numTaps-6] sample */
780  x4 = *(px++);
781 
782  acc0 += p0;
783  acc1 += p1;
784  acc2 += p2;
785  acc3 += p3;
786  acc4 += p4;
787  acc5 += p5;
788  acc6 += p6;
789  acc7 += p7;
790 
791  /* Perform the multiply-accumulates */
792  p0 = x5 * c0;
793  p1 = x6 * c0;
794  p2 = x7 * c0;
795  p3 = x0 * c0;
796  p4 = x1 * c0;
797  p5 = x2 * c0;
798  p6 = x3 * c0;
799  p7 = x4 * c0;
800 
801  /* Read the b[numTaps-4] coefficient */
802  c0 = *(pb++);
803 
804  /* Read x[n-numTaps-6] sample */
805  x5 = *(px++);
806 
807  acc0 += p0;
808  acc1 += p1;
809  acc2 += p2;
810  acc3 += p3;
811  acc4 += p4;
812  acc5 += p5;
813  acc6 += p6;
814  acc7 += p7;
815 
816  /* Perform the multiply-accumulates */
817  p0 = x6 * c0;
818  p1 = x7 * c0;
819  p2 = x0 * c0;
820  p3 = x1 * c0;
821  p4 = x2 * c0;
822  p5 = x3 * c0;
823  p6 = x4 * c0;
824  p7 = x5 * c0;
825 
826  /* Read the b[numTaps-4] coefficient */
827  c0 = *(pb++);
828 
829  /* Read x[n-numTaps-6] sample */
830  x6 = *(px++);
831 
832  acc0 += p0;
833  acc1 += p1;
834  acc2 += p2;
835  acc3 += p3;
836  acc4 += p4;
837  acc5 += p5;
838  acc6 += p6;
839  acc7 += p7;
840 
841  /* Perform the multiply-accumulates */
842  p0 = x7 * c0;
843  p1 = x0 * c0;
844  p2 = x1 * c0;
845  p3 = x2 * c0;
846  p4 = x3 * c0;
847  p5 = x4 * c0;
848  p6 = x5 * c0;
849  p7 = x6 * c0;
850 
851  tapCnt--;
852 
853  acc0 += p0;
854  acc1 += p1;
855  acc2 += p2;
856  acc3 += p3;
857  acc4 += p4;
858  acc5 += p5;
859  acc6 += p6;
860  acc7 += p7;
861  }
862 
863  /* If the filter length is not a multiple of 8, compute the remaining filter taps */
864  tapCnt = numTaps % 0x8u;
865 
866  while(tapCnt > 0u)
867  {
868  /* Read coefficients */
869  c0 = *(pb++);
870 
871  /* Fetch 1 state variable */
872  x7 = *(px++);
873 
874  /* Perform the multiply-accumulates */
875  p0 = x0 * c0;
876  p1 = x1 * c0;
877  p2 = x2 * c0;
878  p3 = x3 * c0;
879  p4 = x4 * c0;
880  p5 = x5 * c0;
881  p6 = x6 * c0;
882  p7 = x7 * c0;
883 
884  /* Reuse the present sample states for next sample */
885  x0 = x1;
886  x1 = x2;
887  x2 = x3;
888  x3 = x4;
889  x4 = x5;
890  x5 = x6;
891  x6 = x7;
892 
893  acc0 += p0;
894  acc1 += p1;
895  acc2 += p2;
896  acc3 += p3;
897  acc4 += p4;
898  acc5 += p5;
899  acc6 += p6;
900  acc7 += p7;
901 
902  /* Decrement the loop counter */
903  tapCnt--;
904  }
905 
906  /* Advance the state pointer by 8 to process the next group of 8 samples */
907  pState = pState + 8;
908 
909  /* The results in the 8 accumulators, store in the destination buffer. */
910  *pDst++ = acc0;
911  *pDst++ = acc1;
912  *pDst++ = acc2;
913  *pDst++ = acc3;
914  *pDst++ = acc4;
915  *pDst++ = acc5;
916  *pDst++ = acc6;
917  *pDst++ = acc7;
918 
919  blkCnt--;
920  }
921 
922  /* If the blockSize is not a multiple of 8, compute any remaining output samples here.
923  ** No loop unrolling is used. */
924  blkCnt = blockSize % 0x8u;
925 
926  while(blkCnt > 0u)
927  {
928  /* Copy one sample at a time into state buffer */
929  *pStateCurnt++ = *pSrc++;
930 
931  /* Set the accumulator to zero */
932  acc0 = 0.0f;
933 
934  /* Initialize state pointer */
935  px = pState;
936 
937  /* Initialize Coefficient pointer */
938  pb = (pCoeffs);
939 
940  i = numTaps;
941 
942  /* Perform the multiply-accumulates */
943  do
944  {
945  acc0 += *px++ * *pb++;
946  i--;
947 
948  } while(i > 0u);
949 
950  /* The result is store in the destination buffer. */
951  *pDst++ = acc0;
952 
953  /* Advance state pointer by 1 for the next sample */
954  pState = pState + 1;
955 
956  blkCnt--;
957  }
958 
959  /* Processing is complete.
960  ** Now copy the last numTaps - 1 samples to the start of the state buffer.
961  ** This prepares the state buffer for the next function call. */
962 
963  /* Points to the start of the state buffer */
964  pStateCurnt = S->pState;
965 
966  tapCnt = (numTaps - 1u) >> 2u;
967 
968  /* copy data */
969  while(tapCnt > 0u)
970  {
971  *pStateCurnt++ = *pState++;
972  *pStateCurnt++ = *pState++;
973  *pStateCurnt++ = *pState++;
974  *pStateCurnt++ = *pState++;
975 
976  /* Decrement the loop counter */
977  tapCnt--;
978  }
979 
980  /* Calculate remaining number of copies */
981  tapCnt = (numTaps - 1u) % 0x4u;
982 
983  /* Copy the remaining q31_t data */
984  while(tapCnt > 0u)
985  {
986  *pStateCurnt++ = *pState++;
987 
988  /* Decrement the loop counter */
989  tapCnt--;
990  }
991 }
992 
993 #endif
994 
float float32_t
32-bit floating-point type definition.
Definition: arm_math.h:407
Instance structure for the floating-point FIR filter.
Definition: arm_math.h:1059
float32_t * pCoeffs
Definition: arm_math.h:1063
void arm_fir_f32(const arm_fir_instance_f32 *S, float32_t *pSrc, float32_t *pDst, uint32_t blockSize)
Processing function for the floating-point FIR filter.
Definition: arm_fir_f32.c:565
float32_t * pState
Definition: arm_math.h:1062