STM32F769IDiscovery  1.00
uDANTE Audio Networking with STM32F7 DISCO board
arm_biquad_cascade_df2T_f32.c
Go to the documentation of this file.
1 /* ----------------------------------------------------------------------
2 * Copyright (C) 2010-2014 ARM Limited. All rights reserved.
3 *
4 * $Date: 19. March 2015
5 * $Revision: V.1.4.5
6 *
7 * Project: CMSIS DSP Library
8 * Title: arm_biquad_cascade_df2T_f32.c
9 *
10 * Description: Processing function for the floating-point transposed
11 * direct form II Biquad cascade filter.
12 *
13 * Target Processor: Cortex-M4/Cortex-M3/Cortex-M0
14 *
15 * Redistribution and use in source and binary forms, with or without
16 * modification, are permitted provided that the following conditions
17 * are met:
18 * - Redistributions of source code must retain the above copyright
19 * notice, this list of conditions and the following disclaimer.
20 * - Redistributions in binary form must reproduce the above copyright
21 * notice, this list of conditions and the following disclaimer in
22 * the documentation and/or other materials provided with the
23 * distribution.
24 * - Neither the name of ARM LIMITED nor the names of its contributors
25 * may be used to endorse or promote products derived from this
26 * software without specific prior written permission.
27 *
28 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
29 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
30 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
31 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
32 * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
33 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
34 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
35 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
36 * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
37 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
38 * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
39 * POSSIBILITY OF SUCH DAMAGE.
40 * -------------------------------------------------------------------- */
41 
42 #include "arm_math.h"
43 
154 LOW_OPTIMIZATION_ENTER
157 float32_t * pSrc,
158 float32_t * pDst,
159 uint32_t blockSize)
160 {
161 
162  float32_t *pIn = pSrc; /* source pointer */
163  float32_t *pOut = pDst; /* destination pointer */
164  float32_t *pState = S->pState; /* State pointer */
165  float32_t *pCoeffs = S->pCoeffs; /* coefficient pointer */
166  float32_t acc1; /* accumulator */
167  float32_t b0, b1, b2, a1, a2; /* Filter coefficients */
168  float32_t Xn1; /* temporary input */
169  float32_t d1, d2; /* state variables */
170  uint32_t sample, stage = S->numStages; /* loop counters */
171 
172 #if defined(ARM_MATH_CM7)
173 
174  float32_t Xn2, Xn3, Xn4, Xn5, Xn6, Xn7, Xn8; /* Input State variables */
175  float32_t Xn9, Xn10, Xn11, Xn12, Xn13, Xn14, Xn15, Xn16;
176  float32_t acc2, acc3, acc4, acc5, acc6, acc7; /* Simulates the accumulator */
177  float32_t acc8, acc9, acc10, acc11, acc12, acc13, acc14, acc15, acc16;
178 
179  do
180  {
181  /* Reading the coefficients */
182  b0 = pCoeffs[0];
183  b1 = pCoeffs[1];
184  b2 = pCoeffs[2];
185  a1 = pCoeffs[3];
186  /* Apply loop unrolling and compute 16 output values simultaneously. */
187  sample = blockSize >> 4u;
188  a2 = pCoeffs[4];
189 
190  /*Reading the state values */
191  d1 = pState[0];
192  d2 = pState[1];
193 
194  pCoeffs += 5u;
195 
196 
197  /* First part of the processing with loop unrolling. Compute 16 outputs at a time.
198  ** a second loop below computes the remaining 1 to 15 samples. */
199  while(sample > 0u) {
200 
201  /* y[n] = b0 * x[n] + d1 */
202  /* d1 = b1 * x[n] + a1 * y[n] + d2 */
203  /* d2 = b2 * x[n] + a2 * y[n] */
204 
205  /* Read the first 2 inputs. 2 cycles */
206  Xn1 = pIn[0 ];
207  Xn2 = pIn[1 ];
208 
209  /* Sample 1. 5 cycles */
210  Xn3 = pIn[2 ];
211  acc1 = b0 * Xn1 + d1;
212 
213  Xn4 = pIn[3 ];
214  d1 = b1 * Xn1 + d2;
215 
216  Xn5 = pIn[4 ];
217  d2 = b2 * Xn1;
218 
219  Xn6 = pIn[5 ];
220  d1 += a1 * acc1;
221 
222  Xn7 = pIn[6 ];
223  d2 += a2 * acc1;
224 
225  /* Sample 2. 5 cycles */
226  Xn8 = pIn[7 ];
227  acc2 = b0 * Xn2 + d1;
228 
229  Xn9 = pIn[8 ];
230  d1 = b1 * Xn2 + d2;
231 
232  Xn10 = pIn[9 ];
233  d2 = b2 * Xn2;
234 
235  Xn11 = pIn[10];
236  d1 += a1 * acc2;
237 
238  Xn12 = pIn[11];
239  d2 += a2 * acc2;
240 
241  /* Sample 3. 5 cycles */
242  Xn13 = pIn[12];
243  acc3 = b0 * Xn3 + d1;
244 
245  Xn14 = pIn[13];
246  d1 = b1 * Xn3 + d2;
247 
248  Xn15 = pIn[14];
249  d2 = b2 * Xn3;
250 
251  Xn16 = pIn[15];
252  d1 += a1 * acc3;
253 
254  pIn += 16;
255  d2 += a2 * acc3;
256 
257  /* Sample 4. 5 cycles */
258  acc4 = b0 * Xn4 + d1;
259  d1 = b1 * Xn4 + d2;
260  d2 = b2 * Xn4;
261  d1 += a1 * acc4;
262  d2 += a2 * acc4;
263 
264  /* Sample 5. 5 cycles */
265  acc5 = b0 * Xn5 + d1;
266  d1 = b1 * Xn5 + d2;
267  d2 = b2 * Xn5;
268  d1 += a1 * acc5;
269  d2 += a2 * acc5;
270 
271  /* Sample 6. 5 cycles */
272  acc6 = b0 * Xn6 + d1;
273  d1 = b1 * Xn6 + d2;
274  d2 = b2 * Xn6;
275  d1 += a1 * acc6;
276  d2 += a2 * acc6;
277 
278  /* Sample 7. 5 cycles */
279  acc7 = b0 * Xn7 + d1;
280  d1 = b1 * Xn7 + d2;
281  d2 = b2 * Xn7;
282  d1 += a1 * acc7;
283  d2 += a2 * acc7;
284 
285  /* Sample 8. 5 cycles */
286  acc8 = b0 * Xn8 + d1;
287  d1 = b1 * Xn8 + d2;
288  d2 = b2 * Xn8;
289  d1 += a1 * acc8;
290  d2 += a2 * acc8;
291 
292  /* Sample 9. 5 cycles */
293  acc9 = b0 * Xn9 + d1;
294  d1 = b1 * Xn9 + d2;
295  d2 = b2 * Xn9;
296  d1 += a1 * acc9;
297  d2 += a2 * acc9;
298 
299  /* Sample 10. 5 cycles */
300  acc10 = b0 * Xn10 + d1;
301  d1 = b1 * Xn10 + d2;
302  d2 = b2 * Xn10;
303  d1 += a1 * acc10;
304  d2 += a2 * acc10;
305 
306  /* Sample 11. 5 cycles */
307  acc11 = b0 * Xn11 + d1;
308  d1 = b1 * Xn11 + d2;
309  d2 = b2 * Xn11;
310  d1 += a1 * acc11;
311  d2 += a2 * acc11;
312 
313  /* Sample 12. 5 cycles */
314  acc12 = b0 * Xn12 + d1;
315  d1 = b1 * Xn12 + d2;
316  d2 = b2 * Xn12;
317  d1 += a1 * acc12;
318  d2 += a2 * acc12;
319 
320  /* Sample 13. 5 cycles */
321  acc13 = b0 * Xn13 + d1;
322  d1 = b1 * Xn13 + d2;
323  d2 = b2 * Xn13;
324 
325  pOut[0 ] = acc1 ;
326  d1 += a1 * acc13;
327 
328  pOut[1 ] = acc2 ;
329  d2 += a2 * acc13;
330 
331  /* Sample 14. 5 cycles */
332  pOut[2 ] = acc3 ;
333  acc14 = b0 * Xn14 + d1;
334 
335  pOut[3 ] = acc4 ;
336  d1 = b1 * Xn14 + d2;
337 
338  pOut[4 ] = acc5 ;
339  d2 = b2 * Xn14;
340 
341  pOut[5 ] = acc6 ;
342  d1 += a1 * acc14;
343 
344  pOut[6 ] = acc7 ;
345  d2 += a2 * acc14;
346 
347  /* Sample 15. 5 cycles */
348  pOut[7 ] = acc8 ;
349  pOut[8 ] = acc9 ;
350  acc15 = b0 * Xn15 + d1;
351 
352  pOut[9 ] = acc10;
353  d1 = b1 * Xn15 + d2;
354 
355  pOut[10] = acc11;
356  d2 = b2 * Xn15;
357 
358  pOut[11] = acc12;
359  d1 += a1 * acc15;
360 
361  pOut[12] = acc13;
362  d2 += a2 * acc15;
363 
364  /* Sample 16. 5 cycles */
365  pOut[13] = acc14;
366  acc16 = b0 * Xn16 + d1;
367 
368  pOut[14] = acc15;
369  d1 = b1 * Xn16 + d2;
370 
371  pOut[15] = acc16;
372  d2 = b2 * Xn16;
373 
374  sample--;
375  d1 += a1 * acc16;
376 
377  pOut += 16;
378  d2 += a2 * acc16;
379  }
380 
381  sample = blockSize & 0xFu;
382  while(sample > 0u) {
383  Xn1 = *pIn;
384  acc1 = b0 * Xn1 + d1;
385 
386  pIn++;
387  d1 = b1 * Xn1 + d2;
388 
389  *pOut = acc1;
390  d2 = b2 * Xn1;
391 
392  pOut++;
393  d1 += a1 * acc1;
394 
395  sample--;
396  d2 += a2 * acc1;
397  }
398 
399  /* Store the updated state variables back into the state array */
400  pState[0] = d1;
401  /* The current stage input is given as the output to the next stage */
402  pIn = pDst;
403 
404  pState[1] = d2;
405  /* decrement the loop counter */
406  stage--;
407 
408  pState += 2u;
409 
410  /*Reset the output working pointer */
411  pOut = pDst;
412 
413  } while(stage > 0u);
414 
415 #elif defined(ARM_MATH_CM0_FAMILY)
416 
417  /* Run the below code for Cortex-M0 */
418 
419  do
420  {
421  /* Reading the coefficients */
422  b0 = *pCoeffs++;
423  b1 = *pCoeffs++;
424  b2 = *pCoeffs++;
425  a1 = *pCoeffs++;
426  a2 = *pCoeffs++;
427 
428  /*Reading the state values */
429  d1 = pState[0];
430  d2 = pState[1];
431 
432 
433  sample = blockSize;
434 
435  while(sample > 0u)
436  {
437  /* Read the input */
438  Xn1 = *pIn++;
439 
440  /* y[n] = b0 * x[n] + d1 */
441  acc1 = (b0 * Xn1) + d1;
442 
443  /* Store the result in the accumulator in the destination buffer. */
444  *pOut++ = acc1;
445 
446  /* Every time after the output is computed state should be updated. */
447  /* d1 = b1 * x[n] + a1 * y[n] + d2 */
448  d1 = ((b1 * Xn1) + (a1 * acc1)) + d2;
449 
450  /* d2 = b2 * x[n] + a2 * y[n] */
451  d2 = (b2 * Xn1) + (a2 * acc1);
452 
453  /* decrement the loop counter */
454  sample--;
455  }
456 
457  /* Store the updated state variables back into the state array */
458  *pState++ = d1;
459  *pState++ = d2;
460 
461  /* The current stage input is given as the output to the next stage */
462  pIn = pDst;
463 
464  /*Reset the output working pointer */
465  pOut = pDst;
466 
467  /* decrement the loop counter */
468  stage--;
469 
470  } while(stage > 0u);
471 
472 #else
473 
474  float32_t Xn2, Xn3, Xn4; /* Input State variables */
475  float32_t acc2, acc3, acc4; /* accumulator */
476 
477 
478  float32_t p0, p1, p2, p3, p4, A1;
479 
480  /* Run the below code for Cortex-M4 and Cortex-M3 */
481  do
482  {
483  /* Reading the coefficients */
484  b0 = *pCoeffs++;
485  b1 = *pCoeffs++;
486  b2 = *pCoeffs++;
487  a1 = *pCoeffs++;
488  a2 = *pCoeffs++;
489 
490 
491  /*Reading the state values */
492  d1 = pState[0];
493  d2 = pState[1];
494 
495  /* Apply loop unrolling and compute 4 output values simultaneously. */
496  sample = blockSize >> 2u;
497 
498  /* First part of the processing with loop unrolling. Compute 4 outputs at a time.
499  ** a second loop below computes the remaining 1 to 3 samples. */
500  while(sample > 0u) {
501 
502  /* y[n] = b0 * x[n] + d1 */
503  /* d1 = b1 * x[n] + a1 * y[n] + d2 */
504  /* d2 = b2 * x[n] + a2 * y[n] */
505 
506  /* Read the four inputs */
507  Xn1 = pIn[0];
508  Xn2 = pIn[1];
509  Xn3 = pIn[2];
510  Xn4 = pIn[3];
511  pIn += 4;
512 
513  p0 = b0 * Xn1;
514  p1 = b1 * Xn1;
515  acc1 = p0 + d1;
516  p0 = b0 * Xn2;
517  p3 = a1 * acc1;
518  p2 = b2 * Xn1;
519  A1 = p1 + p3;
520  p4 = a2 * acc1;
521  d1 = A1 + d2;
522  d2 = p2 + p4;
523 
524  p1 = b1 * Xn2;
525  acc2 = p0 + d1;
526  p0 = b0 * Xn3;
527  p3 = a1 * acc2;
528  p2 = b2 * Xn2;
529  A1 = p1 + p3;
530  p4 = a2 * acc2;
531  d1 = A1 + d2;
532  d2 = p2 + p4;
533 
534  p1 = b1 * Xn3;
535  acc3 = p0 + d1;
536  p0 = b0 * Xn4;
537  p3 = a1 * acc3;
538  p2 = b2 * Xn3;
539  A1 = p1 + p3;
540  p4 = a2 * acc3;
541  d1 = A1 + d2;
542  d2 = p2 + p4;
543 
544  acc4 = p0 + d1;
545  p1 = b1 * Xn4;
546  p3 = a1 * acc4;
547  p2 = b2 * Xn4;
548  A1 = p1 + p3;
549  p4 = a2 * acc4;
550  d1 = A1 + d2;
551  d2 = p2 + p4;
552 
553  pOut[0] = acc1;
554  pOut[1] = acc2;
555  pOut[2] = acc3;
556  pOut[3] = acc4;
557  pOut += 4;
558 
559  sample--;
560  }
561 
562  sample = blockSize & 0x3u;
563  while(sample > 0u) {
564  Xn1 = *pIn++;
565 
566  p0 = b0 * Xn1;
567  p1 = b1 * Xn1;
568  acc1 = p0 + d1;
569  p3 = a1 * acc1;
570  p2 = b2 * Xn1;
571  A1 = p1 + p3;
572  p4 = a2 * acc1;
573  d1 = A1 + d2;
574  d2 = p2 + p4;
575 
576  *pOut++ = acc1;
577 
578  sample--;
579  }
580 
581  /* Store the updated state variables back into the state array */
582  *pState++ = d1;
583  *pState++ = d2;
584 
585  /* The current stage input is given as the output to the next stage */
586  pIn = pDst;
587 
588  /*Reset the output working pointer */
589  pOut = pDst;
590 
591  /* decrement the loop counter */
592  stage--;
593 
594  } while(stage > 0u);
595 
596 #endif
597 
598 }
599 LOW_OPTIMIZATION_EXIT
600 
float float32_t
32-bit floating-point type definition.
Definition: arm_math.h:407
LOW_OPTIMIZATION_ENTER void arm_biquad_cascade_df2T_f32(const arm_biquad_cascade_df2T_instance_f32 *S, float32_t *pSrc, float32_t *pDst, uint32_t blockSize)
Processing function for the floating-point transposed direct form II Biquad cascade filter...
Instance structure for the floating-point transposed direct form II Biquad cascade filter...
Definition: arm_math.h:3608