STM32F769IDiscovery  1.00
uDANTE Audio Networking with STM32F7 DISCO board
arm_fir_decimate_f32.c
Go to the documentation of this file.
1 /* ----------------------------------------------------------------------
2 * Copyright (C) 2010-2014 ARM Limited. All rights reserved.
3 *
4 * $Date: 19. March 2015
5 * $Revision: V.1.4.5
6 *
7 * Project: CMSIS DSP Library
8 * Title: arm_fir_decimate_f32.c
9 *
10 * Description: FIR decimation for floating-point sequences.
11 *
12 * Target Processor: Cortex-M4/Cortex-M3/Cortex-M0
13 *
14 * Redistribution and use in source and binary forms, with or without
15 * modification, are permitted provided that the following conditions
16 * are met:
17 * - Redistributions of source code must retain the above copyright
18 * notice, this list of conditions and the following disclaimer.
19 * - Redistributions in binary form must reproduce the above copyright
20 * notice, this list of conditions and the following disclaimer in
21 * the documentation and/or other materials provided with the
22 * distribution.
23 * - Neither the name of ARM LIMITED nor the names of its contributors
24 * may be used to endorse or promote products derived from this
25 * software without specific prior written permission.
26 *
27 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
28 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
29 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
30 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
31 * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
32 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
33 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
34 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
35 * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
36 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
37 * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
38 * POSSIBILITY OF SUCH DAMAGE.
39 * -------------------------------------------------------------------- */
40 
41 #include "arm_math.h"
42 
143  float32_t * pSrc,
144  float32_t * pDst,
145  uint32_t blockSize)
146 {
147  float32_t *pState = S->pState; /* State pointer */
148  float32_t *pCoeffs = S->pCoeffs; /* Coefficient pointer */
149  float32_t *pStateCurnt; /* Points to the current sample of the state */
150  float32_t *px, *pb; /* Temporary pointers for state and coefficient buffers */
151  float32_t sum0; /* Accumulator */
152  float32_t x0, c0; /* Temporary variables to hold state and coefficient values */
153  uint32_t numTaps = S->numTaps; /* Number of filter coefficients in the filter */
154  uint32_t i, tapCnt, blkCnt, outBlockSize = blockSize / S->M; /* Loop counters */
155 
156 #ifndef ARM_MATH_CM0_FAMILY
157 
158  uint32_t blkCntN4;
159  float32_t *px0, *px1, *px2, *px3;
160  float32_t acc0, acc1, acc2, acc3;
161  float32_t x1, x2, x3;
162 
163  /* Run the below code for Cortex-M4 and Cortex-M3 */
164 
165  /* S->pState buffer contains previous frame (numTaps - 1) samples */
166  /* pStateCurnt points to the location where the new input data should be written */
167  pStateCurnt = S->pState + (numTaps - 1u);
168 
169  /* Total number of output samples to be computed */
170  blkCnt = outBlockSize / 4;
171  blkCntN4 = outBlockSize - (4 * blkCnt);
172 
173  while(blkCnt > 0u)
174  {
175  /* Copy 4 * decimation factor number of new input samples into the state buffer */
176  i = 4 * S->M;
177 
178  do
179  {
180  *pStateCurnt++ = *pSrc++;
181 
182  } while(--i);
183 
184  /* Set accumulators to zero */
185  acc0 = 0.0f;
186  acc1 = 0.0f;
187  acc2 = 0.0f;
188  acc3 = 0.0f;
189 
190  /* Initialize state pointer for all the samples */
191  px0 = pState;
192  px1 = pState + S->M;
193  px2 = pState + 2 * S->M;
194  px3 = pState + 3 * S->M;
195 
196  /* Initialize coeff pointer */
197  pb = pCoeffs;
198 
199  /* Loop unrolling. Process 4 taps at a time. */
200  tapCnt = numTaps >> 2;
201 
202  /* Loop over the number of taps. Unroll by a factor of 4.
203  ** Repeat until we've computed numTaps-4 coefficients. */
204 
205  while(tapCnt > 0u)
206  {
207  /* Read the b[numTaps-1] coefficient */
208  c0 = *(pb++);
209 
210  /* Read x[n-numTaps-1] sample for acc0 */
211  x0 = *(px0++);
212  /* Read x[n-numTaps-1] sample for acc1 */
213  x1 = *(px1++);
214  /* Read x[n-numTaps-1] sample for acc2 */
215  x2 = *(px2++);
216  /* Read x[n-numTaps-1] sample for acc3 */
217  x3 = *(px3++);
218 
219  /* Perform the multiply-accumulate */
220  acc0 += x0 * c0;
221  acc1 += x1 * c0;
222  acc2 += x2 * c0;
223  acc3 += x3 * c0;
224 
225  /* Read the b[numTaps-2] coefficient */
226  c0 = *(pb++);
227 
228  /* Read x[n-numTaps-2] sample for acc0, acc1, acc2, acc3 */
229  x0 = *(px0++);
230  x1 = *(px1++);
231  x2 = *(px2++);
232  x3 = *(px3++);
233 
234  /* Perform the multiply-accumulate */
235  acc0 += x0 * c0;
236  acc1 += x1 * c0;
237  acc2 += x2 * c0;
238  acc3 += x3 * c0;
239 
240  /* Read the b[numTaps-3] coefficient */
241  c0 = *(pb++);
242 
243  /* Read x[n-numTaps-3] sample acc0, acc1, acc2, acc3 */
244  x0 = *(px0++);
245  x1 = *(px1++);
246  x2 = *(px2++);
247  x3 = *(px3++);
248 
249  /* Perform the multiply-accumulate */
250  acc0 += x0 * c0;
251  acc1 += x1 * c0;
252  acc2 += x2 * c0;
253  acc3 += x3 * c0;
254 
255  /* Read the b[numTaps-4] coefficient */
256  c0 = *(pb++);
257 
258  /* Read x[n-numTaps-4] sample acc0, acc1, acc2, acc3 */
259  x0 = *(px0++);
260  x1 = *(px1++);
261  x2 = *(px2++);
262  x3 = *(px3++);
263 
264  /* Perform the multiply-accumulate */
265  acc0 += x0 * c0;
266  acc1 += x1 * c0;
267  acc2 += x2 * c0;
268  acc3 += x3 * c0;
269 
270  /* Decrement the loop counter */
271  tapCnt--;
272  }
273 
274  /* If the filter length is not a multiple of 4, compute the remaining filter taps */
275  tapCnt = numTaps % 0x4u;
276 
277  while(tapCnt > 0u)
278  {
279  /* Read coefficients */
280  c0 = *(pb++);
281 
282  /* Fetch state variables for acc0, acc1, acc2, acc3 */
283  x0 = *(px0++);
284  x1 = *(px1++);
285  x2 = *(px2++);
286  x3 = *(px3++);
287 
288  /* Perform the multiply-accumulate */
289  acc0 += x0 * c0;
290  acc1 += x1 * c0;
291  acc2 += x2 * c0;
292  acc3 += x3 * c0;
293 
294  /* Decrement the loop counter */
295  tapCnt--;
296  }
297 
298  /* Advance the state pointer by the decimation factor
299  * to process the next group of decimation factor number samples */
300  pState = pState + 4 * S->M;
301 
302  /* The result is in the accumulator, store in the destination buffer. */
303  *pDst++ = acc0;
304  *pDst++ = acc1;
305  *pDst++ = acc2;
306  *pDst++ = acc3;
307 
308  /* Decrement the loop counter */
309  blkCnt--;
310  }
311 
312  while(blkCntN4 > 0u)
313  {
314  /* Copy decimation factor number of new input samples into the state buffer */
315  i = S->M;
316 
317  do
318  {
319  *pStateCurnt++ = *pSrc++;
320 
321  } while(--i);
322 
323  /* Set accumulator to zero */
324  sum0 = 0.0f;
325 
326  /* Initialize state pointer */
327  px = pState;
328 
329  /* Initialize coeff pointer */
330  pb = pCoeffs;
331 
332  /* Loop unrolling. Process 4 taps at a time. */
333  tapCnt = numTaps >> 2;
334 
335  /* Loop over the number of taps. Unroll by a factor of 4.
336  ** Repeat until we've computed numTaps-4 coefficients. */
337  while(tapCnt > 0u)
338  {
339  /* Read the b[numTaps-1] coefficient */
340  c0 = *(pb++);
341 
342  /* Read x[n-numTaps-1] sample */
343  x0 = *(px++);
344 
345  /* Perform the multiply-accumulate */
346  sum0 += x0 * c0;
347 
348  /* Read the b[numTaps-2] coefficient */
349  c0 = *(pb++);
350 
351  /* Read x[n-numTaps-2] sample */
352  x0 = *(px++);
353 
354  /* Perform the multiply-accumulate */
355  sum0 += x0 * c0;
356 
357  /* Read the b[numTaps-3] coefficient */
358  c0 = *(pb++);
359 
360  /* Read x[n-numTaps-3] sample */
361  x0 = *(px++);
362 
363  /* Perform the multiply-accumulate */
364  sum0 += x0 * c0;
365 
366  /* Read the b[numTaps-4] coefficient */
367  c0 = *(pb++);
368 
369  /* Read x[n-numTaps-4] sample */
370  x0 = *(px++);
371 
372  /* Perform the multiply-accumulate */
373  sum0 += x0 * c0;
374 
375  /* Decrement the loop counter */
376  tapCnt--;
377  }
378 
379  /* If the filter length is not a multiple of 4, compute the remaining filter taps */
380  tapCnt = numTaps % 0x4u;
381 
382  while(tapCnt > 0u)
383  {
384  /* Read coefficients */
385  c0 = *(pb++);
386 
387  /* Fetch 1 state variable */
388  x0 = *(px++);
389 
390  /* Perform the multiply-accumulate */
391  sum0 += x0 * c0;
392 
393  /* Decrement the loop counter */
394  tapCnt--;
395  }
396 
397  /* Advance the state pointer by the decimation factor
398  * to process the next group of decimation factor number samples */
399  pState = pState + S->M;
400 
401  /* The result is in the accumulator, store in the destination buffer. */
402  *pDst++ = sum0;
403 
404  /* Decrement the loop counter */
405  blkCntN4--;
406  }
407 
408  /* Processing is complete.
409  ** Now copy the last numTaps - 1 samples to the satrt of the state buffer.
410  ** This prepares the state buffer for the next function call. */
411 
412  /* Points to the start of the state buffer */
413  pStateCurnt = S->pState;
414 
415  i = (numTaps - 1u) >> 2;
416 
417  /* copy data */
418  while(i > 0u)
419  {
420  *pStateCurnt++ = *pState++;
421  *pStateCurnt++ = *pState++;
422  *pStateCurnt++ = *pState++;
423  *pStateCurnt++ = *pState++;
424 
425  /* Decrement the loop counter */
426  i--;
427  }
428 
429  i = (numTaps - 1u) % 0x04u;
430 
431  /* copy data */
432  while(i > 0u)
433  {
434  *pStateCurnt++ = *pState++;
435 
436  /* Decrement the loop counter */
437  i--;
438  }
439 
440 #else
441 
442 /* Run the below code for Cortex-M0 */
443 
444  /* S->pState buffer contains previous frame (numTaps - 1) samples */
445  /* pStateCurnt points to the location where the new input data should be written */
446  pStateCurnt = S->pState + (numTaps - 1u);
447 
448  /* Total number of output samples to be computed */
449  blkCnt = outBlockSize;
450 
451  while(blkCnt > 0u)
452  {
453  /* Copy decimation factor number of new input samples into the state buffer */
454  i = S->M;
455 
456  do
457  {
458  *pStateCurnt++ = *pSrc++;
459 
460  } while(--i);
461 
462  /* Set accumulator to zero */
463  sum0 = 0.0f;
464 
465  /* Initialize state pointer */
466  px = pState;
467 
468  /* Initialize coeff pointer */
469  pb = pCoeffs;
470 
471  tapCnt = numTaps;
472 
473  while(tapCnt > 0u)
474  {
475  /* Read coefficients */
476  c0 = *pb++;
477 
478  /* Fetch 1 state variable */
479  x0 = *px++;
480 
481  /* Perform the multiply-accumulate */
482  sum0 += x0 * c0;
483 
484  /* Decrement the loop counter */
485  tapCnt--;
486  }
487 
488  /* Advance the state pointer by the decimation factor
489  * to process the next group of decimation factor number samples */
490  pState = pState + S->M;
491 
492  /* The result is in the accumulator, store in the destination buffer. */
493  *pDst++ = sum0;
494 
495  /* Decrement the loop counter */
496  blkCnt--;
497  }
498 
499  /* Processing is complete.
500  ** Now copy the last numTaps - 1 samples to the start of the state buffer.
501  ** This prepares the state buffer for the next function call. */
502 
503  /* Points to the start of the state buffer */
504  pStateCurnt = S->pState;
505 
506  /* Copy numTaps number of values */
507  i = (numTaps - 1u);
508 
509  /* copy data */
510  while(i > 0u)
511  {
512  *pStateCurnt++ = *pState++;
513 
514  /* Decrement the loop counter */
515  i--;
516  }
517 
518 #endif /* #ifndef ARM_MATH_CM0_FAMILY */
519 
520 }
521 
void arm_fir_decimate_f32(const arm_fir_decimate_instance_f32 *S, float32_t *pSrc, float32_t *pDst, uint32_t blockSize)
Processing function for the floating-point FIR decimator.
float float32_t
32-bit floating-point type definition.
Definition: arm_math.h:407
Instance structure for the floating-point FIR decimator.
Definition: arm_math.h:3291