STM32F769IDiscovery  1.00
uDANTE Audio Networking with STM32F7 DISCO board
arm_fir_q15.c
Go to the documentation of this file.
1 /* ----------------------------------------------------------------------
2 * Copyright (C) 2010-2014 ARM Limited. All rights reserved.
3 *
4 * $Date: 19. March 2015
5 * $Revision: V.1.4.5
6 *
7 * Project: CMSIS DSP Library
8 * Title: arm_fir_q15.c
9 *
10 * Description: Q15 FIR filter processing function.
11 *
12 * Target Processor: Cortex-M4/Cortex-M3/Cortex-M0
13 *
14 * Redistribution and use in source and binary forms, with or without
15 * modification, are permitted provided that the following conditions
16 * are met:
17 * - Redistributions of source code must retain the above copyright
18 * notice, this list of conditions and the following disclaimer.
19 * - Redistributions in binary form must reproduce the above copyright
20 * notice, this list of conditions and the following disclaimer in
21 * the documentation and/or other materials provided with the
22 * distribution.
23 * - Neither the name of ARM LIMITED nor the names of its contributors
24 * may be used to endorse or promote products derived from this
25 * software without specific prior written permission.
26 *
27 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
28 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
29 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
30 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
31 * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
32 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
33 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
34 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
35 * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
36 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
37 * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
38 * POSSIBILITY OF SUCH DAMAGE.
39 * -------------------------------------------------------------------- */
40 
41 #include "arm_math.h"
42 
78 #ifndef ARM_MATH_CM0_FAMILY
79 
80 /* Run the below code for Cortex-M4 and Cortex-M3 */
81 
82 #ifndef UNALIGNED_SUPPORT_DISABLE
83 
84 
86  const arm_fir_instance_q15 * S,
87  q15_t * pSrc,
88  q15_t * pDst,
89  uint32_t blockSize)
90 {
91  q15_t *pState = S->pState; /* State pointer */
92  q15_t *pCoeffs = S->pCoeffs; /* Coefficient pointer */
93  q15_t *pStateCurnt; /* Points to the current sample of the state */
94  q15_t *px1; /* Temporary q15 pointer for state buffer */
95  q15_t *pb; /* Temporary pointer for coefficient buffer */
96  q31_t x0, x1, x2, x3, c0; /* Temporary variables to hold SIMD state and coefficient values */
97  q63_t acc0, acc1, acc2, acc3; /* Accumulators */
98  uint32_t numTaps = S->numTaps; /* Number of taps in the filter */
99  uint32_t tapCnt, blkCnt; /* Loop counters */
100 
101 
102  /* S->pState points to state array which contains previous frame (numTaps - 1) samples */
103  /* pStateCurnt points to the location where the new input data should be written */
104  pStateCurnt = &(S->pState[(numTaps - 1u)]);
105 
106  /* Apply loop unrolling and compute 4 output values simultaneously.
107  * The variables acc0 ... acc3 hold output values that are being computed:
108  *
109  * acc0 = b[numTaps-1] * x[n-numTaps-1] + b[numTaps-2] * x[n-numTaps-2] + b[numTaps-3] * x[n-numTaps-3] +...+ b[0] * x[0]
110  * acc1 = b[numTaps-1] * x[n-numTaps] + b[numTaps-2] * x[n-numTaps-1] + b[numTaps-3] * x[n-numTaps-2] +...+ b[0] * x[1]
111  * acc2 = b[numTaps-1] * x[n-numTaps+1] + b[numTaps-2] * x[n-numTaps] + b[numTaps-3] * x[n-numTaps-1] +...+ b[0] * x[2]
112  * acc3 = b[numTaps-1] * x[n-numTaps+2] + b[numTaps-2] * x[n-numTaps+1] + b[numTaps-3] * x[n-numTaps] +...+ b[0] * x[3]
113  */
114 
115  blkCnt = blockSize >> 2;
116 
117  /* First part of the processing with loop unrolling. Compute 4 outputs at a time.
118  ** a second loop below computes the remaining 1 to 3 samples. */
119  while(blkCnt > 0u)
120  {
121  /* Copy four new input samples into the state buffer.
122  ** Use 32-bit SIMD to move the 16-bit data. Only requires two copies. */
123  *__SIMD32(pStateCurnt)++ = *__SIMD32(pSrc)++;
124  *__SIMD32(pStateCurnt)++ = *__SIMD32(pSrc)++;
125 
126  /* Set all accumulators to zero */
127  acc0 = 0;
128  acc1 = 0;
129  acc2 = 0;
130  acc3 = 0;
131 
132  /* Initialize state pointer of type q15 */
133  px1 = pState;
134 
135  /* Initialize coeff pointer of type q31 */
136  pb = pCoeffs;
137 
138  /* Read the first two samples from the state buffer: x[n-N], x[n-N-1] */
139  x0 = _SIMD32_OFFSET(px1);
140 
141  /* Read the third and forth samples from the state buffer: x[n-N-1], x[n-N-2] */
142  x1 = _SIMD32_OFFSET(px1 + 1u);
143 
144  px1 += 2u;
145 
146  /* Loop over the number of taps. Unroll by a factor of 4.
147  ** Repeat until we've computed numTaps-4 coefficients. */
148  tapCnt = numTaps >> 2;
149 
150  while(tapCnt > 0u)
151  {
152  /* Read the first two coefficients using SIMD: b[N] and b[N-1] coefficients */
153  c0 = *__SIMD32(pb)++;
154 
155  /* acc0 += b[N] * x[n-N] + b[N-1] * x[n-N-1] */
156  acc0 = __SMLALD(x0, c0, acc0);
157 
158  /* acc1 += b[N] * x[n-N-1] + b[N-1] * x[n-N-2] */
159  acc1 = __SMLALD(x1, c0, acc1);
160 
161  /* Read state x[n-N-2], x[n-N-3] */
162  x2 = _SIMD32_OFFSET(px1);
163 
164  /* Read state x[n-N-3], x[n-N-4] */
165  x3 = _SIMD32_OFFSET(px1 + 1u);
166 
167  /* acc2 += b[N] * x[n-N-2] + b[N-1] * x[n-N-3] */
168  acc2 = __SMLALD(x2, c0, acc2);
169 
170  /* acc3 += b[N] * x[n-N-3] + b[N-1] * x[n-N-4] */
171  acc3 = __SMLALD(x3, c0, acc3);
172 
173  /* Read coefficients b[N-2], b[N-3] */
174  c0 = *__SIMD32(pb)++;
175 
176  /* acc0 += b[N-2] * x[n-N-2] + b[N-3] * x[n-N-3] */
177  acc0 = __SMLALD(x2, c0, acc0);
178 
179  /* acc1 += b[N-2] * x[n-N-3] + b[N-3] * x[n-N-4] */
180  acc1 = __SMLALD(x3, c0, acc1);
181 
182  /* Read state x[n-N-4], x[n-N-5] */
183  x0 = _SIMD32_OFFSET(px1 + 2u);
184 
185  /* Read state x[n-N-5], x[n-N-6] */
186  x1 = _SIMD32_OFFSET(px1 + 3u);
187 
188  /* acc2 += b[N-2] * x[n-N-4] + b[N-3] * x[n-N-5] */
189  acc2 = __SMLALD(x0, c0, acc2);
190 
191  /* acc3 += b[N-2] * x[n-N-5] + b[N-3] * x[n-N-6] */
192  acc3 = __SMLALD(x1, c0, acc3);
193 
194  px1 += 4u;
195 
196  tapCnt--;
197 
198  }
199 
200 
201  /* If the filter length is not a multiple of 4, compute the remaining filter taps.
202  ** This is always be 2 taps since the filter length is even. */
203  if((numTaps & 0x3u) != 0u)
204  {
205  /* Read 2 coefficients */
206  c0 = *__SIMD32(pb)++;
207 
208  /* Fetch 4 state variables */
209  x2 = _SIMD32_OFFSET(px1);
210 
211  x3 = _SIMD32_OFFSET(px1 + 1u);
212 
213  /* Perform the multiply-accumulates */
214  acc0 = __SMLALD(x0, c0, acc0);
215 
216  px1 += 2u;
217 
218  acc1 = __SMLALD(x1, c0, acc1);
219  acc2 = __SMLALD(x2, c0, acc2);
220  acc3 = __SMLALD(x3, c0, acc3);
221  }
222 
223  /* The results in the 4 accumulators are in 2.30 format. Convert to 1.15 with saturation.
224  ** Then store the 4 outputs in the destination buffer. */
225 
226 #ifndef ARM_MATH_BIG_ENDIAN
227 
228  *__SIMD32(pDst)++ =
229  __PKHBT(__SSAT((acc0 >> 15), 16), __SSAT((acc1 >> 15), 16), 16);
230  *__SIMD32(pDst)++ =
231  __PKHBT(__SSAT((acc2 >> 15), 16), __SSAT((acc3 >> 15), 16), 16);
232 
233 #else
234 
235  *__SIMD32(pDst)++ =
236  __PKHBT(__SSAT((acc1 >> 15), 16), __SSAT((acc0 >> 15), 16), 16);
237  *__SIMD32(pDst)++ =
238  __PKHBT(__SSAT((acc3 >> 15), 16), __SSAT((acc2 >> 15), 16), 16);
239 
240 #endif /* #ifndef ARM_MATH_BIG_ENDIAN */
241 
242 
243 
244  /* Advance the state pointer by 4 to process the next group of 4 samples */
245  pState = pState + 4;
246 
247  /* Decrement the loop counter */
248  blkCnt--;
249  }
250 
251  /* If the blockSize is not a multiple of 4, compute any remaining output samples here.
252  ** No loop unrolling is used. */
253  blkCnt = blockSize % 0x4u;
254  while(blkCnt > 0u)
255  {
256  /* Copy two samples into state buffer */
257  *pStateCurnt++ = *pSrc++;
258 
259  /* Set the accumulator to zero */
260  acc0 = 0;
261 
262  /* Initialize state pointer of type q15 */
263  px1 = pState;
264 
265  /* Initialize coeff pointer of type q31 */
266  pb = pCoeffs;
267 
268  tapCnt = numTaps >> 1;
269 
270  do
271  {
272 
273  c0 = *__SIMD32(pb)++;
274  x0 = *__SIMD32(px1)++;
275 
276  acc0 = __SMLALD(x0, c0, acc0);
277  tapCnt--;
278  }
279  while(tapCnt > 0u);
280 
281  /* The result is in 2.30 format. Convert to 1.15 with saturation.
282  ** Then store the output in the destination buffer. */
283  *pDst++ = (q15_t) (__SSAT((acc0 >> 15), 16));
284 
285  /* Advance state pointer by 1 for the next sample */
286  pState = pState + 1;
287 
288  /* Decrement the loop counter */
289  blkCnt--;
290  }
291 
292  /* Processing is complete.
293  ** Now copy the last numTaps - 1 samples to the satrt of the state buffer.
294  ** This prepares the state buffer for the next function call. */
295 
296  /* Points to the start of the state buffer */
297  pStateCurnt = S->pState;
298 
299  /* Calculation of count for copying integer writes */
300  tapCnt = (numTaps - 1u) >> 2;
301 
302  while(tapCnt > 0u)
303  {
304 
305  /* Copy state values to start of state buffer */
306  *__SIMD32(pStateCurnt)++ = *__SIMD32(pState)++;
307  *__SIMD32(pStateCurnt)++ = *__SIMD32(pState)++;
308 
309  tapCnt--;
310 
311  }
312 
313  /* Calculation of count for remaining q15_t data */
314  tapCnt = (numTaps - 1u) % 0x4u;
315 
316  /* copy remaining data */
317  while(tapCnt > 0u)
318  {
319  *pStateCurnt++ = *pState++;
320 
321  /* Decrement the loop counter */
322  tapCnt--;
323  }
324 }
325 
326 #else /* UNALIGNED_SUPPORT_DISABLE */
327 
328 void arm_fir_q15(
329  const arm_fir_instance_q15 * S,
330  q15_t * pSrc,
331  q15_t * pDst,
332  uint32_t blockSize)
333 {
334  q15_t *pState = S->pState; /* State pointer */
335  q15_t *pCoeffs = S->pCoeffs; /* Coefficient pointer */
336  q15_t *pStateCurnt; /* Points to the current sample of the state */
337  q63_t acc0, acc1, acc2, acc3; /* Accumulators */
338  q15_t *pb; /* Temporary pointer for coefficient buffer */
339  q15_t *px; /* Temporary q31 pointer for SIMD state buffer accesses */
340  q31_t x0, x1, x2, c0; /* Temporary variables to hold SIMD state and coefficient values */
341  uint32_t numTaps = S->numTaps; /* Number of taps in the filter */
342  uint32_t tapCnt, blkCnt; /* Loop counters */
343 
344 
345  /* S->pState points to state array which contains previous frame (numTaps - 1) samples */
346  /* pStateCurnt points to the location where the new input data should be written */
347  pStateCurnt = &(S->pState[(numTaps - 1u)]);
348 
349  /* Apply loop unrolling and compute 4 output values simultaneously.
350  * The variables acc0 ... acc3 hold output values that are being computed:
351  *
352  * acc0 = b[numTaps-1] * x[n-numTaps-1] + b[numTaps-2] * x[n-numTaps-2] + b[numTaps-3] * x[n-numTaps-3] +...+ b[0] * x[0]
353  * acc1 = b[numTaps-1] * x[n-numTaps] + b[numTaps-2] * x[n-numTaps-1] + b[numTaps-3] * x[n-numTaps-2] +...+ b[0] * x[1]
354  * acc2 = b[numTaps-1] * x[n-numTaps+1] + b[numTaps-2] * x[n-numTaps] + b[numTaps-3] * x[n-numTaps-1] +...+ b[0] * x[2]
355  * acc3 = b[numTaps-1] * x[n-numTaps+2] + b[numTaps-2] * x[n-numTaps+1] + b[numTaps-3] * x[n-numTaps] +...+ b[0] * x[3]
356  */
357 
358  blkCnt = blockSize >> 2;
359 
360  /* First part of the processing with loop unrolling. Compute 4 outputs at a time.
361  ** a second loop below computes the remaining 1 to 3 samples. */
362  while(blkCnt > 0u)
363  {
364  /* Copy four new input samples into the state buffer.
365  ** Use 32-bit SIMD to move the 16-bit data. Only requires two copies. */
366  *pStateCurnt++ = *pSrc++;
367  *pStateCurnt++ = *pSrc++;
368  *pStateCurnt++ = *pSrc++;
369  *pStateCurnt++ = *pSrc++;
370 
371 
372  /* Set all accumulators to zero */
373  acc0 = 0;
374  acc1 = 0;
375  acc2 = 0;
376  acc3 = 0;
377 
378  /* Typecast q15_t pointer to q31_t pointer for state reading in q31_t */
379  px = pState;
380 
381  /* Typecast q15_t pointer to q31_t pointer for coefficient reading in q31_t */
382  pb = pCoeffs;
383 
384  /* Read the first two samples from the state buffer: x[n-N], x[n-N-1] */
385  x0 = *__SIMD32(px)++;
386 
387  /* Read the third and forth samples from the state buffer: x[n-N-2], x[n-N-3] */
388  x2 = *__SIMD32(px)++;
389 
390  /* Loop over the number of taps. Unroll by a factor of 4.
391  ** Repeat until we've computed numTaps-(numTaps%4) coefficients. */
392  tapCnt = numTaps >> 2;
393 
394  while(tapCnt > 0)
395  {
396  /* Read the first two coefficients using SIMD: b[N] and b[N-1] coefficients */
397  c0 = *__SIMD32(pb)++;
398 
399  /* acc0 += b[N] * x[n-N] + b[N-1] * x[n-N-1] */
400  acc0 = __SMLALD(x0, c0, acc0);
401 
402  /* acc2 += b[N] * x[n-N-2] + b[N-1] * x[n-N-3] */
403  acc2 = __SMLALD(x2, c0, acc2);
404 
405  /* pack x[n-N-1] and x[n-N-2] */
406 #ifndef ARM_MATH_BIG_ENDIAN
407  x1 = __PKHBT(x2, x0, 0);
408 #else
409  x1 = __PKHBT(x0, x2, 0);
410 #endif
411 
412  /* Read state x[n-N-4], x[n-N-5] */
413  x0 = _SIMD32_OFFSET(px);
414 
415  /* acc1 += b[N] * x[n-N-1] + b[N-1] * x[n-N-2] */
416  acc1 = __SMLALDX(x1, c0, acc1);
417 
418  /* pack x[n-N-3] and x[n-N-4] */
419 #ifndef ARM_MATH_BIG_ENDIAN
420  x1 = __PKHBT(x0, x2, 0);
421 #else
422  x1 = __PKHBT(x2, x0, 0);
423 #endif
424 
425  /* acc3 += b[N] * x[n-N-3] + b[N-1] * x[n-N-4] */
426  acc3 = __SMLALDX(x1, c0, acc3);
427 
428  /* Read coefficients b[N-2], b[N-3] */
429  c0 = *__SIMD32(pb)++;
430 
431  /* acc0 += b[N-2] * x[n-N-2] + b[N-3] * x[n-N-3] */
432  acc0 = __SMLALD(x2, c0, acc0);
433 
434  /* Read state x[n-N-6], x[n-N-7] with offset */
435  x2 = _SIMD32_OFFSET(px + 2u);
436 
437  /* acc2 += b[N-2] * x[n-N-4] + b[N-3] * x[n-N-5] */
438  acc2 = __SMLALD(x0, c0, acc2);
439 
440  /* acc1 += b[N-2] * x[n-N-3] + b[N-3] * x[n-N-4] */
441  acc1 = __SMLALDX(x1, c0, acc1);
442 
443  /* pack x[n-N-5] and x[n-N-6] */
444 #ifndef ARM_MATH_BIG_ENDIAN
445  x1 = __PKHBT(x2, x0, 0);
446 #else
447  x1 = __PKHBT(x0, x2, 0);
448 #endif
449 
450  /* acc3 += b[N-2] * x[n-N-5] + b[N-3] * x[n-N-6] */
451  acc3 = __SMLALDX(x1, c0, acc3);
452 
453  /* Update state pointer for next state reading */
454  px += 4u;
455 
456  /* Decrement tap count */
457  tapCnt--;
458 
459  }
460 
461  /* If the filter length is not a multiple of 4, compute the remaining filter taps.
462  ** This is always be 2 taps since the filter length is even. */
463  if((numTaps & 0x3u) != 0u)
464  {
465 
466  /* Read last two coefficients */
467  c0 = *__SIMD32(pb)++;
468 
469  /* Perform the multiply-accumulates */
470  acc0 = __SMLALD(x0, c0, acc0);
471  acc2 = __SMLALD(x2, c0, acc2);
472 
473  /* pack state variables */
474 #ifndef ARM_MATH_BIG_ENDIAN
475  x1 = __PKHBT(x2, x0, 0);
476 #else
477  x1 = __PKHBT(x0, x2, 0);
478 #endif
479 
480  /* Read last state variables */
481  x0 = *__SIMD32(px);
482 
483  /* Perform the multiply-accumulates */
484  acc1 = __SMLALDX(x1, c0, acc1);
485 
486  /* pack state variables */
487 #ifndef ARM_MATH_BIG_ENDIAN
488  x1 = __PKHBT(x0, x2, 0);
489 #else
490  x1 = __PKHBT(x2, x0, 0);
491 #endif
492 
493  /* Perform the multiply-accumulates */
494  acc3 = __SMLALDX(x1, c0, acc3);
495  }
496 
497  /* The results in the 4 accumulators are in 2.30 format. Convert to 1.15 with saturation.
498  ** Then store the 4 outputs in the destination buffer. */
499 
500 #ifndef ARM_MATH_BIG_ENDIAN
501 
502  *__SIMD32(pDst)++ =
503  __PKHBT(__SSAT((acc0 >> 15), 16), __SSAT((acc1 >> 15), 16), 16);
504 
505  *__SIMD32(pDst)++ =
506  __PKHBT(__SSAT((acc2 >> 15), 16), __SSAT((acc3 >> 15), 16), 16);
507 
508 #else
509 
510  *__SIMD32(pDst)++ =
511  __PKHBT(__SSAT((acc1 >> 15), 16), __SSAT((acc0 >> 15), 16), 16);
512 
513  *__SIMD32(pDst)++ =
514  __PKHBT(__SSAT((acc3 >> 15), 16), __SSAT((acc2 >> 15), 16), 16);
515 
516 #endif /* #ifndef ARM_MATH_BIG_ENDIAN */
517 
518  /* Advance the state pointer by 4 to process the next group of 4 samples */
519  pState = pState + 4;
520 
521  /* Decrement the loop counter */
522  blkCnt--;
523  }
524 
525  /* If the blockSize is not a multiple of 4, compute any remaining output samples here.
526  ** No loop unrolling is used. */
527  blkCnt = blockSize % 0x4u;
528  while(blkCnt > 0u)
529  {
530  /* Copy two samples into state buffer */
531  *pStateCurnt++ = *pSrc++;
532 
533  /* Set the accumulator to zero */
534  acc0 = 0;
535 
536  /* Use SIMD to hold states and coefficients */
537  px = pState;
538  pb = pCoeffs;
539 
540  tapCnt = numTaps >> 1u;
541 
542  do
543  {
544  acc0 += (q31_t) * px++ * *pb++;
545  acc0 += (q31_t) * px++ * *pb++;
546  tapCnt--;
547  }
548  while(tapCnt > 0u);
549 
550  /* The result is in 2.30 format. Convert to 1.15 with saturation.
551  ** Then store the output in the destination buffer. */
552  *pDst++ = (q15_t) (__SSAT((acc0 >> 15), 16));
553 
554  /* Advance state pointer by 1 for the next sample */
555  pState = pState + 1u;
556 
557  /* Decrement the loop counter */
558  blkCnt--;
559  }
560 
561  /* Processing is complete.
562  ** Now copy the last numTaps - 1 samples to the satrt of the state buffer.
563  ** This prepares the state buffer for the next function call. */
564 
565  /* Points to the start of the state buffer */
566  pStateCurnt = S->pState;
567 
568  /* Calculation of count for copying integer writes */
569  tapCnt = (numTaps - 1u) >> 2;
570 
571  while(tapCnt > 0u)
572  {
573  *pStateCurnt++ = *pState++;
574  *pStateCurnt++ = *pState++;
575  *pStateCurnt++ = *pState++;
576  *pStateCurnt++ = *pState++;
577 
578  tapCnt--;
579 
580  }
581 
582  /* Calculation of count for remaining q15_t data */
583  tapCnt = (numTaps - 1u) % 0x4u;
584 
585  /* copy remaining data */
586  while(tapCnt > 0u)
587  {
588  *pStateCurnt++ = *pState++;
589 
590  /* Decrement the loop counter */
591  tapCnt--;
592  }
593 }
594 
595 
596 #endif /* #ifndef UNALIGNED_SUPPORT_DISABLE */
597 
598 #else /* ARM_MATH_CM0_FAMILY */
599 
600 
601 /* Run the below code for Cortex-M0 */
602 
603 void arm_fir_q15(
604  const arm_fir_instance_q15 * S,
605  q15_t * pSrc,
606  q15_t * pDst,
607  uint32_t blockSize)
608 {
609  q15_t *pState = S->pState; /* State pointer */
610  q15_t *pCoeffs = S->pCoeffs; /* Coefficient pointer */
611  q15_t *pStateCurnt; /* Points to the current sample of the state */
612 
613 
614 
615  q15_t *px; /* Temporary pointer for state buffer */
616  q15_t *pb; /* Temporary pointer for coefficient buffer */
617  q63_t acc; /* Accumulator */
618  uint32_t numTaps = S->numTaps; /* Number of nTaps in the filter */
619  uint32_t tapCnt, blkCnt; /* Loop counters */
620 
621  /* S->pState buffer contains previous frame (numTaps - 1) samples */
622  /* pStateCurnt points to the location where the new input data should be written */
623  pStateCurnt = &(S->pState[(numTaps - 1u)]);
624 
625  /* Initialize blkCnt with blockSize */
626  blkCnt = blockSize;
627 
628  while(blkCnt > 0u)
629  {
630  /* Copy one sample at a time into state buffer */
631  *pStateCurnt++ = *pSrc++;
632 
633  /* Set the accumulator to zero */
634  acc = 0;
635 
636  /* Initialize state pointer */
637  px = pState;
638 
639  /* Initialize Coefficient pointer */
640  pb = pCoeffs;
641 
642  tapCnt = numTaps;
643 
644  /* Perform the multiply-accumulates */
645  do
646  {
647  /* acc = b[numTaps-1] * x[n-numTaps-1] + b[numTaps-2] * x[n-numTaps-2] + b[numTaps-3] * x[n-numTaps-3] +...+ b[0] * x[0] */
648  acc += (q31_t) * px++ * *pb++;
649  tapCnt--;
650  } while(tapCnt > 0u);
651 
652  /* The result is in 2.30 format. Convert to 1.15
653  ** Then store the output in the destination buffer. */
654  *pDst++ = (q15_t) __SSAT((acc >> 15u), 16);
655 
656  /* Advance state pointer by 1 for the next sample */
657  pState = pState + 1;
658 
659  /* Decrement the samples loop counter */
660  blkCnt--;
661  }
662 
663  /* Processing is complete.
664  ** Now copy the last numTaps - 1 samples to the satrt of the state buffer.
665  ** This prepares the state buffer for the next function call. */
666 
667  /* Points to the start of the state buffer */
668  pStateCurnt = S->pState;
669 
670  /* Copy numTaps number of values */
671  tapCnt = (numTaps - 1u);
672 
673  /* copy data */
674  while(tapCnt > 0u)
675  {
676  *pStateCurnt++ = *pState++;
677 
678  /* Decrement the loop counter */
679  tapCnt--;
680  }
681 
682 }
683 
684 #endif /* #ifndef ARM_MATH_CM0_FAMILY */
685 
686 
687 
688 
Instance structure for the Q15 FIR filter.
Definition: arm_math.h:1039
int64_t q63_t
64-bit fractional data type in 1.63 format.
Definition: arm_math.h:402
int16_t q15_t
16-bit fractional data type in 1.15 format.
Definition: arm_math.h:392
void arm_fir_q15(const arm_fir_instance_q15 *S, q15_t *pSrc, q15_t *pDst, uint32_t blockSize)
Processing function for the Q15 FIR filter.
Definition: arm_fir_q15.c:85
#define __SIMD32(addr)
definition to read/write two 16 bit values.
Definition: arm_math.h:445
#define _SIMD32_OFFSET(addr)
Definition: arm_math.h:447
int32_t q31_t
32-bit fractional data type in 1.31 format.
Definition: arm_math.h:397