STM32F769IDiscovery  1.00
uDANTE Audio Networking with STM32F7 DISCO board
arm_fir_decimate_q15.c
Go to the documentation of this file.
1 /* ----------------------------------------------------------------------
2 * Copyright (C) 2010-2014 ARM Limited. All rights reserved.
3 *
4 * $Date: 19. March 2015
5 * $Revision: V.1.4.5
6 *
7 * Project: CMSIS DSP Library
8 * Title: arm_fir_decimate_q15.c
9 *
10 * Description: Q15 FIR Decimator.
11 *
12 * Target Processor: Cortex-M4/Cortex-M3/Cortex-M0
13 *
14 * Redistribution and use in source and binary forms, with or without
15 * modification, are permitted provided that the following conditions
16 * are met:
17 * - Redistributions of source code must retain the above copyright
18 * notice, this list of conditions and the following disclaimer.
19 * - Redistributions in binary form must reproduce the above copyright
20 * notice, this list of conditions and the following disclaimer in
21 * the documentation and/or other materials provided with the
22 * distribution.
23 * - Neither the name of ARM LIMITED nor the names of its contributors
24 * may be used to endorse or promote products derived from this
25 * software without specific prior written permission.
26 *
27 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
28 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
29 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
30 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
31 * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
32 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
33 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
34 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
35 * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
36 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
37 * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
38 * POSSIBILITY OF SUCH DAMAGE.
39 * -------------------------------------------------------------------- */
40 
41 #include "arm_math.h"
42 
73 #ifndef ARM_MATH_CM0_FAMILY
74 
75 #ifndef UNALIGNED_SUPPORT_DISABLE
76 
79  q15_t * pSrc,
80  q15_t * pDst,
81  uint32_t blockSize)
82 {
83  q15_t *pState = S->pState; /* State pointer */
84  q15_t *pCoeffs = S->pCoeffs; /* Coefficient pointer */
85  q15_t *pStateCurnt; /* Points to the current sample of the state */
86  q15_t *px; /* Temporary pointer for state buffer */
87  q15_t *pb; /* Temporary pointer coefficient buffer */
88  q31_t x0, x1, c0, c1; /* Temporary variables to hold state and coefficient values */
89  q63_t sum0; /* Accumulators */
90  q63_t acc0, acc1;
91  q15_t *px0, *px1;
92  uint32_t blkCntN3;
93  uint32_t numTaps = S->numTaps; /* Number of taps */
94  uint32_t i, blkCnt, tapCnt, outBlockSize = blockSize / S->M; /* Loop counters */
95 
96 
97  /* S->pState buffer contains previous frame (numTaps - 1) samples */
98  /* pStateCurnt points to the location where the new input data should be written */
99  pStateCurnt = S->pState + (numTaps - 1u);
100 
101 
102  /* Total number of output samples to be computed */
103  blkCnt = outBlockSize / 2;
104  blkCntN3 = outBlockSize - (2 * blkCnt);
105 
106 
107  while(blkCnt > 0u)
108  {
109  /* Copy decimation factor number of new input samples into the state buffer */
110  i = 2 * S->M;
111 
112  do
113  {
114  *pStateCurnt++ = *pSrc++;
115 
116  } while(--i);
117 
118  /* Set accumulator to zero */
119  acc0 = 0;
120  acc1 = 0;
121 
122  /* Initialize state pointer */
123  px0 = pState;
124 
125  px1 = pState + S->M;
126 
127 
128  /* Initialize coeff pointer */
129  pb = pCoeffs;
130 
131  /* Loop unrolling. Process 4 taps at a time. */
132  tapCnt = numTaps >> 2;
133 
134  /* Loop over the number of taps. Unroll by a factor of 4.
135  ** Repeat until we've computed numTaps-4 coefficients. */
136  while(tapCnt > 0u)
137  {
138  /* Read the Read b[numTaps-1] and b[numTaps-2] coefficients */
139  c0 = *__SIMD32(pb)++;
140 
141  /* Read x[n-numTaps-1] and x[n-numTaps-2]sample */
142  x0 = *__SIMD32(px0)++;
143 
144  x1 = *__SIMD32(px1)++;
145 
146  /* Perform the multiply-accumulate */
147  acc0 = __SMLALD(x0, c0, acc0);
148 
149  acc1 = __SMLALD(x1, c0, acc1);
150 
151  /* Read the b[numTaps-3] and b[numTaps-4] coefficient */
152  c0 = *__SIMD32(pb)++;
153 
154  /* Read x[n-numTaps-2] and x[n-numTaps-3] sample */
155  x0 = *__SIMD32(px0)++;
156 
157  x1 = *__SIMD32(px1)++;
158 
159  /* Perform the multiply-accumulate */
160  acc0 = __SMLALD(x0, c0, acc0);
161 
162  acc1 = __SMLALD(x1, c0, acc1);
163 
164  /* Decrement the loop counter */
165  tapCnt--;
166  }
167 
168  /* If the filter length is not a multiple of 4, compute the remaining filter taps */
169  tapCnt = numTaps % 0x4u;
170 
171  while(tapCnt > 0u)
172  {
173  /* Read coefficients */
174  c0 = *pb++;
175 
176  /* Fetch 1 state variable */
177  x0 = *px0++;
178 
179  x1 = *px1++;
180 
181  /* Perform the multiply-accumulate */
182  acc0 = __SMLALD(x0, c0, acc0);
183  acc1 = __SMLALD(x1, c0, acc1);
184 
185  /* Decrement the loop counter */
186  tapCnt--;
187  }
188 
189  /* Advance the state pointer by the decimation factor
190  * to process the next group of decimation factor number samples */
191  pState = pState + S->M * 2;
192 
193  /* Store filter output, smlad returns the values in 2.14 format */
194  /* so downsacle by 15 to get output in 1.15 */
195  *pDst++ = (q15_t) (__SSAT((acc0 >> 15), 16));
196  *pDst++ = (q15_t) (__SSAT((acc1 >> 15), 16));
197 
198  /* Decrement the loop counter */
199  blkCnt--;
200  }
201 
202 
203 
204  while(blkCntN3 > 0u)
205  {
206  /* Copy decimation factor number of new input samples into the state buffer */
207  i = S->M;
208 
209  do
210  {
211  *pStateCurnt++ = *pSrc++;
212 
213  } while(--i);
214 
215  /*Set sum to zero */
216  sum0 = 0;
217 
218  /* Initialize state pointer */
219  px = pState;
220 
221  /* Initialize coeff pointer */
222  pb = pCoeffs;
223 
224  /* Loop unrolling. Process 4 taps at a time. */
225  tapCnt = numTaps >> 2;
226 
227  /* Loop over the number of taps. Unroll by a factor of 4.
228  ** Repeat until we've computed numTaps-4 coefficients. */
229  while(tapCnt > 0u)
230  {
231  /* Read the Read b[numTaps-1] and b[numTaps-2] coefficients */
232  c0 = *__SIMD32(pb)++;
233 
234  /* Read x[n-numTaps-1] and x[n-numTaps-2]sample */
235  x0 = *__SIMD32(px)++;
236 
237  /* Read the b[numTaps-3] and b[numTaps-4] coefficient */
238  c1 = *__SIMD32(pb)++;
239 
240  /* Perform the multiply-accumulate */
241  sum0 = __SMLALD(x0, c0, sum0);
242 
243  /* Read x[n-numTaps-2] and x[n-numTaps-3] sample */
244  x0 = *__SIMD32(px)++;
245 
246  /* Perform the multiply-accumulate */
247  sum0 = __SMLALD(x0, c1, sum0);
248 
249  /* Decrement the loop counter */
250  tapCnt--;
251  }
252 
253  /* If the filter length is not a multiple of 4, compute the remaining filter taps */
254  tapCnt = numTaps % 0x4u;
255 
256  while(tapCnt > 0u)
257  {
258  /* Read coefficients */
259  c0 = *pb++;
260 
261  /* Fetch 1 state variable */
262  x0 = *px++;
263 
264  /* Perform the multiply-accumulate */
265  sum0 = __SMLALD(x0, c0, sum0);
266 
267  /* Decrement the loop counter */
268  tapCnt--;
269  }
270 
271  /* Advance the state pointer by the decimation factor
272  * to process the next group of decimation factor number samples */
273  pState = pState + S->M;
274 
275  /* Store filter output, smlad returns the values in 2.14 format */
276  /* so downsacle by 15 to get output in 1.15 */
277  *pDst++ = (q15_t) (__SSAT((sum0 >> 15), 16));
278 
279  /* Decrement the loop counter */
280  blkCntN3--;
281  }
282 
283  /* Processing is complete.
284  ** Now copy the last numTaps - 1 samples to the satrt of the state buffer.
285  ** This prepares the state buffer for the next function call. */
286 
287  /* Points to the start of the state buffer */
288  pStateCurnt = S->pState;
289 
290  i = (numTaps - 1u) >> 2u;
291 
292  /* copy data */
293  while(i > 0u)
294  {
295  *__SIMD32(pStateCurnt)++ = *__SIMD32(pState)++;
296  *__SIMD32(pStateCurnt)++ = *__SIMD32(pState)++;
297 
298  /* Decrement the loop counter */
299  i--;
300  }
301 
302  i = (numTaps - 1u) % 0x04u;
303 
304  /* copy data */
305  while(i > 0u)
306  {
307  *pStateCurnt++ = *pState++;
308 
309  /* Decrement the loop counter */
310  i--;
311  }
312 }
313 
314 #else
315 
316 
319  q15_t * pSrc,
320  q15_t * pDst,
321  uint32_t blockSize)
322 {
323  q15_t *pState = S->pState; /* State pointer */
324  q15_t *pCoeffs = S->pCoeffs; /* Coefficient pointer */
325  q15_t *pStateCurnt; /* Points to the current sample of the state */
326  q15_t *px; /* Temporary pointer for state buffer */
327  q15_t *pb; /* Temporary pointer coefficient buffer */
328  q15_t x0, x1, c0; /* Temporary variables to hold state and coefficient values */
329  q63_t sum0; /* Accumulators */
330  q63_t acc0, acc1;
331  q15_t *px0, *px1;
332  uint32_t blkCntN3;
333  uint32_t numTaps = S->numTaps; /* Number of taps */
334  uint32_t i, blkCnt, tapCnt, outBlockSize = blockSize / S->M; /* Loop counters */
335 
336 
337  /* S->pState buffer contains previous frame (numTaps - 1) samples */
338  /* pStateCurnt points to the location where the new input data should be written */
339  pStateCurnt = S->pState + (numTaps - 1u);
340 
341 
342  /* Total number of output samples to be computed */
343  blkCnt = outBlockSize / 2;
344  blkCntN3 = outBlockSize - (2 * blkCnt);
345 
346  while(blkCnt > 0u)
347  {
348  /* Copy decimation factor number of new input samples into the state buffer */
349  i = 2 * S->M;
350 
351  do
352  {
353  *pStateCurnt++ = *pSrc++;
354 
355  } while(--i);
356 
357  /* Set accumulator to zero */
358  acc0 = 0;
359  acc1 = 0;
360 
361  /* Initialize state pointer */
362  px0 = pState;
363 
364  px1 = pState + S->M;
365 
366 
367  /* Initialize coeff pointer */
368  pb = pCoeffs;
369 
370  /* Loop unrolling. Process 4 taps at a time. */
371  tapCnt = numTaps >> 2;
372 
373  /* Loop over the number of taps. Unroll by a factor of 4.
374  ** Repeat until we've computed numTaps-4 coefficients. */
375  while(tapCnt > 0u)
376  {
377  /* Read the Read b[numTaps-1] coefficients */
378  c0 = *pb++;
379 
380  /* Read x[n-numTaps-1] for sample 0 and for sample 1 */
381  x0 = *px0++;
382  x1 = *px1++;
383 
384  /* Perform the multiply-accumulate */
385  acc0 += x0 * c0;
386  acc1 += x1 * c0;
387 
388  /* Read the b[numTaps-2] coefficient */
389  c0 = *pb++;
390 
391  /* Read x[n-numTaps-2] for sample 0 and sample 1 */
392  x0 = *px0++;
393  x1 = *px1++;
394 
395  /* Perform the multiply-accumulate */
396  acc0 += x0 * c0;
397  acc1 += x1 * c0;
398 
399  /* Read the b[numTaps-3] coefficients */
400  c0 = *pb++;
401 
402  /* Read x[n-numTaps-3] for sample 0 and sample 1 */
403  x0 = *px0++;
404  x1 = *px1++;
405 
406  /* Perform the multiply-accumulate */
407  acc0 += x0 * c0;
408  acc1 += x1 * c0;
409 
410  /* Read the b[numTaps-4] coefficient */
411  c0 = *pb++;
412 
413  /* Read x[n-numTaps-4] for sample 0 and sample 1 */
414  x0 = *px0++;
415  x1 = *px1++;
416 
417  /* Perform the multiply-accumulate */
418  acc0 += x0 * c0;
419  acc1 += x1 * c0;
420 
421  /* Decrement the loop counter */
422  tapCnt--;
423  }
424 
425  /* If the filter length is not a multiple of 4, compute the remaining filter taps */
426  tapCnt = numTaps % 0x4u;
427 
428  while(tapCnt > 0u)
429  {
430  /* Read coefficients */
431  c0 = *pb++;
432 
433  /* Fetch 1 state variable */
434  x0 = *px0++;
435  x1 = *px1++;
436 
437  /* Perform the multiply-accumulate */
438  acc0 += x0 * c0;
439  acc1 += x1 * c0;
440 
441  /* Decrement the loop counter */
442  tapCnt--;
443  }
444 
445  /* Advance the state pointer by the decimation factor
446  * to process the next group of decimation factor number samples */
447  pState = pState + S->M * 2;
448 
449  /* Store filter output, smlad returns the values in 2.14 format */
450  /* so downsacle by 15 to get output in 1.15 */
451 
452  *pDst++ = (q15_t) (__SSAT((acc0 >> 15), 16));
453  *pDst++ = (q15_t) (__SSAT((acc1 >> 15), 16));
454 
455  /* Decrement the loop counter */
456  blkCnt--;
457  }
458 
459  while(blkCntN3 > 0u)
460  {
461  /* Copy decimation factor number of new input samples into the state buffer */
462  i = S->M;
463 
464  do
465  {
466  *pStateCurnt++ = *pSrc++;
467 
468  } while(--i);
469 
470  /*Set sum to zero */
471  sum0 = 0;
472 
473  /* Initialize state pointer */
474  px = pState;
475 
476  /* Initialize coeff pointer */
477  pb = pCoeffs;
478 
479  /* Loop unrolling. Process 4 taps at a time. */
480  tapCnt = numTaps >> 2;
481 
482  /* Loop over the number of taps. Unroll by a factor of 4.
483  ** Repeat until we've computed numTaps-4 coefficients. */
484  while(tapCnt > 0u)
485  {
486  /* Read the Read b[numTaps-1] coefficients */
487  c0 = *pb++;
488 
489  /* Read x[n-numTaps-1] and sample */
490  x0 = *px++;
491 
492  /* Perform the multiply-accumulate */
493  sum0 += x0 * c0;
494 
495  /* Read the b[numTaps-2] coefficient */
496  c0 = *pb++;
497 
498  /* Read x[n-numTaps-2] and sample */
499  x0 = *px++;
500 
501  /* Perform the multiply-accumulate */
502  sum0 += x0 * c0;
503 
504  /* Read the b[numTaps-3] coefficients */
505  c0 = *pb++;
506 
507  /* Read x[n-numTaps-3] sample */
508  x0 = *px++;
509 
510  /* Perform the multiply-accumulate */
511  sum0 += x0 * c0;
512 
513  /* Read the b[numTaps-4] coefficient */
514  c0 = *pb++;
515 
516  /* Read x[n-numTaps-4] sample */
517  x0 = *px++;
518 
519  /* Perform the multiply-accumulate */
520  sum0 += x0 * c0;
521 
522  /* Decrement the loop counter */
523  tapCnt--;
524  }
525 
526  /* If the filter length is not a multiple of 4, compute the remaining filter taps */
527  tapCnt = numTaps % 0x4u;
528 
529  while(tapCnt > 0u)
530  {
531  /* Read coefficients */
532  c0 = *pb++;
533 
534  /* Fetch 1 state variable */
535  x0 = *px++;
536 
537  /* Perform the multiply-accumulate */
538  sum0 += x0 * c0;
539 
540  /* Decrement the loop counter */
541  tapCnt--;
542  }
543 
544  /* Advance the state pointer by the decimation factor
545  * to process the next group of decimation factor number samples */
546  pState = pState + S->M;
547 
548  /* Store filter output, smlad returns the values in 2.14 format */
549  /* so downsacle by 15 to get output in 1.15 */
550  *pDst++ = (q15_t) (__SSAT((sum0 >> 15), 16));
551 
552  /* Decrement the loop counter */
553  blkCntN3--;
554  }
555 
556  /* Processing is complete.
557  ** Now copy the last numTaps - 1 samples to the satrt of the state buffer.
558  ** This prepares the state buffer for the next function call. */
559 
560  /* Points to the start of the state buffer */
561  pStateCurnt = S->pState;
562 
563  i = (numTaps - 1u) >> 2u;
564 
565  /* copy data */
566  while(i > 0u)
567  {
568  *pStateCurnt++ = *pState++;
569  *pStateCurnt++ = *pState++;
570  *pStateCurnt++ = *pState++;
571  *pStateCurnt++ = *pState++;
572 
573  /* Decrement the loop counter */
574  i--;
575  }
576 
577  i = (numTaps - 1u) % 0x04u;
578 
579  /* copy data */
580  while(i > 0u)
581  {
582  *pStateCurnt++ = *pState++;
583 
584  /* Decrement the loop counter */
585  i--;
586  }
587 }
588 
589 
590 #endif /* #ifndef UNALIGNED_SUPPORT_DISABLE */
591 
592 #else
593 
594 
597  q15_t * pSrc,
598  q15_t * pDst,
599  uint32_t blockSize)
600 {
601  q15_t *pState = S->pState; /* State pointer */
602  q15_t *pCoeffs = S->pCoeffs; /* Coefficient pointer */
603  q15_t *pStateCurnt; /* Points to the current sample of the state */
604  q15_t *px; /* Temporary pointer for state buffer */
605  q15_t *pb; /* Temporary pointer coefficient buffer */
606  q31_t x0, c0; /* Temporary variables to hold state and coefficient values */
607  q63_t sum0; /* Accumulators */
608  uint32_t numTaps = S->numTaps; /* Number of taps */
609  uint32_t i, blkCnt, tapCnt, outBlockSize = blockSize / S->M; /* Loop counters */
610 
611 
612 
613 /* Run the below code for Cortex-M0 */
614 
615  /* S->pState buffer contains previous frame (numTaps - 1) samples */
616  /* pStateCurnt points to the location where the new input data should be written */
617  pStateCurnt = S->pState + (numTaps - 1u);
618 
619  /* Total number of output samples to be computed */
620  blkCnt = outBlockSize;
621 
622  while(blkCnt > 0u)
623  {
624  /* Copy decimation factor number of new input samples into the state buffer */
625  i = S->M;
626 
627  do
628  {
629  *pStateCurnt++ = *pSrc++;
630 
631  } while(--i);
632 
633  /*Set sum to zero */
634  sum0 = 0;
635 
636  /* Initialize state pointer */
637  px = pState;
638 
639  /* Initialize coeff pointer */
640  pb = pCoeffs;
641 
642  tapCnt = numTaps;
643 
644  while(tapCnt > 0u)
645  {
646  /* Read coefficients */
647  c0 = *pb++;
648 
649  /* Fetch 1 state variable */
650  x0 = *px++;
651 
652  /* Perform the multiply-accumulate */
653  sum0 += (q31_t) x0 *c0;
654 
655  /* Decrement the loop counter */
656  tapCnt--;
657  }
658 
659  /* Advance the state pointer by the decimation factor
660  * to process the next group of decimation factor number samples */
661  pState = pState + S->M;
662 
663  /*Store filter output , smlad will return the values in 2.14 format */
664  /* so downsacle by 15 to get output in 1.15 */
665  *pDst++ = (q15_t) (__SSAT((sum0 >> 15), 16));
666 
667  /* Decrement the loop counter */
668  blkCnt--;
669  }
670 
671  /* Processing is complete.
672  ** Now copy the last numTaps - 1 samples to the start of the state buffer.
673  ** This prepares the state buffer for the next function call. */
674 
675  /* Points to the start of the state buffer */
676  pStateCurnt = S->pState;
677 
678  i = numTaps - 1u;
679 
680  /* copy data */
681  while(i > 0u)
682  {
683  *pStateCurnt++ = *pState++;
684 
685  /* Decrement the loop counter */
686  i--;
687  }
688 
689 
690 }
691 #endif /* #ifndef ARM_MATH_CM0_FAMILY */
692 
693 
int64_t q63_t
64-bit fractional data type in 1.63 format.
Definition: arm_math.h:402
int16_t q15_t
16-bit fractional data type in 1.15 format.
Definition: arm_math.h:392
Instance structure for the Q15 FIR decimator.
Definition: arm_math.h:3269
#define __SIMD32(addr)
definition to read/write two 16 bit values.
Definition: arm_math.h:445
int32_t q31_t
32-bit fractional data type in 1.31 format.
Definition: arm_math.h:397
void arm_fir_decimate_q15(const arm_fir_decimate_instance_q15 *S, q15_t *pSrc, q15_t *pDst, uint32_t blockSize)
Processing function for the Q15 FIR decimator.