STM32F769IDiscovery  1.00
uDANTE Audio Networking with STM32F7 DISCO board
arm_biquad_cascade_stereo_df2T_f32.c
Go to the documentation of this file.
1 /* ----------------------------------------------------------------------
2 * Copyright (C) 2010-2014 ARM Limited. All rights reserved.
3 *
4 * $Date: 19. March 2015
5 * $Revision: V.1.4.5
6 *
7 * Project: CMSIS DSP Library
8 * Title: arm_biquad_cascade_stereo_df2T_f32.c
9 *
10 * Description: Processing function for the floating-point transposed
11 * direct form II Biquad cascade filter. 2 channels
12 *
13 * Target Processor: Cortex-M4/Cortex-M3/Cortex-M0
14 *
15 * Redistribution and use in source and binary forms, with or without
16 * modification, are permitted provided that the following conditions
17 * are met:
18 * - Redistributions of source code must retain the above copyright
19 * notice, this list of conditions and the following disclaimer.
20 * - Redistributions in binary form must reproduce the above copyright
21 * notice, this list of conditions and the following disclaimer in
22 * the documentation and/or other materials provided with the
23 * distribution.
24 * - Neither the name of ARM LIMITED nor the names of its contributors
25 * may be used to endorse or promote products derived from this
26 * software without specific prior written permission.
27 *
28 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
29 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
30 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
31 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
32 * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
33 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
34 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
35 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
36 * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
37 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
38 * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
39 * POSSIBILITY OF SUCH DAMAGE.
40 * -------------------------------------------------------------------- */
41 
42 #include "arm_math.h"
43 
154 LOW_OPTIMIZATION_ENTER
157 float32_t * pSrc,
158 float32_t * pDst,
159 uint32_t blockSize)
160 {
161 
162  float32_t *pIn = pSrc; /* source pointer */
163  float32_t *pOut = pDst; /* destination pointer */
164  float32_t *pState = S->pState; /* State pointer */
165  float32_t *pCoeffs = S->pCoeffs; /* coefficient pointer */
166  float32_t acc1a, acc1b; /* accumulator */
167  float32_t b0, b1, b2, a1, a2; /* Filter coefficients */
168  float32_t Xn1a, Xn1b; /* temporary input */
169  float32_t d1a, d2a, d1b, d2b; /* state variables */
170  uint32_t sample, stage = S->numStages; /* loop counters */
171 
172 #if defined(ARM_MATH_CM7)
173 
174  float32_t Xn2a, Xn3a, Xn4a, Xn5a, Xn6a, Xn7a, Xn8a; /* Input State variables */
175  float32_t Xn2b, Xn3b, Xn4b, Xn5b, Xn6b, Xn7b, Xn8b; /* Input State variables */
176  float32_t acc2a, acc3a, acc4a, acc5a, acc6a, acc7a, acc8a; /* Simulates the accumulator */
177  float32_t acc2b, acc3b, acc4b, acc5b, acc6b, acc7b, acc8b; /* Simulates the accumulator */
178 
179  do
180  {
181  /* Reading the coefficients */
182  b0 = pCoeffs[0];
183  b1 = pCoeffs[1];
184  b2 = pCoeffs[2];
185  a1 = pCoeffs[3];
186  /* Apply loop unrolling and compute 8 output values simultaneously. */
187  sample = blockSize >> 3u;
188  a2 = pCoeffs[4];
189 
190  /*Reading the state values */
191  d1a = pState[0];
192  d2a = pState[1];
193  d1b = pState[2];
194  d2b = pState[3];
195 
196  pCoeffs += 5u;
197 
198  /* First part of the processing with loop unrolling. Compute 8 outputs at a time.
199  ** a second loop below computes the remaining 1 to 7 samples. */
200  while(sample > 0u) {
201 
202  /* y[n] = b0 * x[n] + d1 */
203  /* d1 = b1 * x[n] + a1 * y[n] + d2 */
204  /* d2 = b2 * x[n] + a2 * y[n] */
205 
206  /* Read the first 2 inputs. 2 cycles */
207  Xn1a = pIn[0 ];
208  Xn1b = pIn[1 ];
209 
210  /* Sample 1. 5 cycles */
211  Xn2a = pIn[2 ];
212  acc1a = b0 * Xn1a + d1a;
213 
214  Xn2b = pIn[3 ];
215  d1a = b1 * Xn1a + d2a;
216 
217  Xn3a = pIn[4 ];
218  d2a = b2 * Xn1a;
219 
220  Xn3b = pIn[5 ];
221  d1a += a1 * acc1a;
222 
223  Xn4a = pIn[6 ];
224  d2a += a2 * acc1a;
225 
226  /* Sample 2. 5 cycles */
227  Xn4b = pIn[7 ];
228  acc1b = b0 * Xn1b + d1b;
229 
230  Xn5a = pIn[8 ];
231  d1b = b1 * Xn1b + d2b;
232 
233  Xn5b = pIn[9 ];
234  d2b = b2 * Xn1b;
235 
236  Xn6a = pIn[10];
237  d1b += a1 * acc1b;
238 
239  Xn6b = pIn[11];
240  d2b += a2 * acc1b;
241 
242  /* Sample 3. 5 cycles */
243  Xn7a = pIn[12];
244  acc2a = b0 * Xn2a + d1a;
245 
246  Xn7b = pIn[13];
247  d1a = b1 * Xn2a + d2a;
248 
249  Xn8a = pIn[14];
250  d2a = b2 * Xn2a;
251 
252  Xn8b = pIn[15];
253  d1a += a1 * acc2a;
254 
255  pIn += 16;
256  d2a += a2 * acc2a;
257 
258  /* Sample 4. 5 cycles */
259  acc2b = b0 * Xn2b + d1b;
260  d1b = b1 * Xn2b + d2b;
261  d2b = b2 * Xn2b;
262  d1b += a1 * acc2b;
263  d2b += a2 * acc2b;
264 
265  /* Sample 5. 5 cycles */
266  acc3a = b0 * Xn3a + d1a;
267  d1a = b1 * Xn3a + d2a;
268  d2a = b2 * Xn3a;
269  d1a += a1 * acc3a;
270  d2a += a2 * acc3a;
271 
272  /* Sample 6. 5 cycles */
273  acc3b = b0 * Xn3b + d1b;
274  d1b = b1 * Xn3b + d2b;
275  d2b = b2 * Xn3b;
276  d1b += a1 * acc3b;
277  d2b += a2 * acc3b;
278 
279  /* Sample 7. 5 cycles */
280  acc4a = b0 * Xn4a + d1a;
281  d1a = b1 * Xn4a + d2a;
282  d2a = b2 * Xn4a;
283  d1a += a1 * acc4a;
284  d2a += a2 * acc4a;
285 
286  /* Sample 8. 5 cycles */
287  acc4b = b0 * Xn4b + d1b;
288  d1b = b1 * Xn4b + d2b;
289  d2b = b2 * Xn4b;
290  d1b += a1 * acc4b;
291  d2b += a2 * acc4b;
292 
293  /* Sample 9. 5 cycles */
294  acc5a = b0 * Xn5a + d1a;
295  d1a = b1 * Xn5a + d2a;
296  d2a = b2 * Xn5a;
297  d1a += a1 * acc5a;
298  d2a += a2 * acc5a;
299 
300  /* Sample 10. 5 cycles */
301  acc5b = b0 * Xn5b + d1b;
302  d1b = b1 * Xn5b + d2b;
303  d2b = b2 * Xn5b;
304  d1b += a1 * acc5b;
305  d2b += a2 * acc5b;
306 
307  /* Sample 11. 5 cycles */
308  acc6a = b0 * Xn6a + d1a;
309  d1a = b1 * Xn6a + d2a;
310  d2a = b2 * Xn6a;
311  d1a += a1 * acc6a;
312  d2a += a2 * acc6a;
313 
314  /* Sample 12. 5 cycles */
315  acc6b = b0 * Xn6b + d1b;
316  d1b = b1 * Xn6b + d2b;
317  d2b = b2 * Xn6b;
318  d1b += a1 * acc6b;
319  d2b += a2 * acc6b;
320 
321  /* Sample 13. 5 cycles */
322  acc7a = b0 * Xn7a + d1a;
323  d1a = b1 * Xn7a + d2a;
324 
325  pOut[0 ] = acc1a ;
326  d2a = b2 * Xn7a;
327 
328  pOut[1 ] = acc1b ;
329  d1a += a1 * acc7a;
330 
331  pOut[2 ] = acc2a ;
332  d2a += a2 * acc7a;
333 
334  /* Sample 14. 5 cycles */
335  pOut[3 ] = acc2b ;
336  acc7b = b0 * Xn7b + d1b;
337 
338  pOut[4 ] = acc3a ;
339  d1b = b1 * Xn7b + d2b;
340 
341  pOut[5 ] = acc3b ;
342  d2b = b2 * Xn7b;
343 
344  pOut[6 ] = acc4a ;
345  d1b += a1 * acc7b;
346 
347  pOut[7 ] = acc4b ;
348  d2b += a2 * acc7b;
349 
350  /* Sample 15. 5 cycles */
351  pOut[8 ] = acc5a ;
352  acc8a = b0 * Xn8a + d1a;
353 
354  pOut[9 ] = acc5b;
355  d1a = b1 * Xn8a + d2a;
356 
357  pOut[10] = acc6a;
358  d2a = b2 * Xn8a;
359 
360  pOut[11] = acc6b;
361  d1a += a1 * acc8a;
362 
363  pOut[12] = acc7a;
364  d2a += a2 * acc8a;
365 
366  /* Sample 16. 5 cycles */
367  pOut[13] = acc7b;
368  acc8b = b0 * Xn8b + d1b;
369 
370  pOut[14] = acc8a;
371  d1b = b1 * Xn8b + d2b;
372 
373  pOut[15] = acc8b;
374  d2b = b2 * Xn8b;
375 
376  sample--;
377  d1b += a1 * acc8b;
378 
379  pOut += 16;
380  d2b += a2 * acc8b;
381  }
382 
383  sample = blockSize & 0x7u;
384  while(sample > 0u) {
385  /* Read the input */
386  Xn1a = *pIn++; //Channel a
387  Xn1b = *pIn++; //Channel b
388 
389  /* y[n] = b0 * x[n] + d1 */
390  acc1a = (b0 * Xn1a) + d1a;
391  acc1b = (b0 * Xn1b) + d1b;
392 
393  /* Store the result in the accumulator in the destination buffer. */
394  *pOut++ = acc1a;
395  *pOut++ = acc1b;
396 
397  /* Every time after the output is computed state should be updated. */
398  /* d1 = b1 * x[n] + a1 * y[n] + d2 */
399  d1a = ((b1 * Xn1a) + (a1 * acc1a)) + d2a;
400  d1b = ((b1 * Xn1b) + (a1 * acc1b)) + d2b;
401 
402  /* d2 = b2 * x[n] + a2 * y[n] */
403  d2a = (b2 * Xn1a) + (a2 * acc1a);
404  d2b = (b2 * Xn1b) + (a2 * acc1b);
405 
406  sample--;
407  }
408 
409  /* Store the updated state variables back into the state array */
410  pState[0] = d1a;
411  pState[1] = d2a;
412 
413  pState[2] = d1b;
414  pState[3] = d2b;
415 
416  /* The current stage input is given as the output to the next stage */
417  pIn = pDst;
418  /* decrement the loop counter */
419  stage--;
420 
421  pState += 4u;
422  /*Reset the output working pointer */
423  pOut = pDst;
424 
425  } while(stage > 0u);
426 
427 #elif defined(ARM_MATH_CM0_FAMILY)
428 
429  /* Run the below code for Cortex-M0 */
430 
431  do
432  {
433  /* Reading the coefficients */
434  b0 = *pCoeffs++;
435  b1 = *pCoeffs++;
436  b2 = *pCoeffs++;
437  a1 = *pCoeffs++;
438  a2 = *pCoeffs++;
439 
440  /*Reading the state values */
441  d1a = pState[0];
442  d2a = pState[1];
443  d1b = pState[2];
444  d2b = pState[3];
445 
446 
447  sample = blockSize;
448 
449  while(sample > 0u)
450  {
451  /* Read the input */
452  Xn1a = *pIn++; //Channel a
453  Xn1b = *pIn++; //Channel b
454 
455  /* y[n] = b0 * x[n] + d1 */
456  acc1a = (b0 * Xn1a) + d1a;
457  acc1b = (b0 * Xn1b) + d1b;
458 
459  /* Store the result in the accumulator in the destination buffer. */
460  *pOut++ = acc1a;
461  *pOut++ = acc1b;
462 
463  /* Every time after the output is computed state should be updated. */
464  /* d1 = b1 * x[n] + a1 * y[n] + d2 */
465  d1a = ((b1 * Xn1a) + (a1 * acc1a)) + d2a;
466  d1b = ((b1 * Xn1b) + (a1 * acc1b)) + d2b;
467 
468  /* d2 = b2 * x[n] + a2 * y[n] */
469  d2a = (b2 * Xn1a) + (a2 * acc1a);
470  d2b = (b2 * Xn1b) + (a2 * acc1b);
471 
472  /* decrement the loop counter */
473  sample--;
474  }
475 
476  /* Store the updated state variables back into the state array */
477  *pState++ = d1a;
478  *pState++ = d2a;
479  *pState++ = d1b;
480  *pState++ = d2b;
481 
482  /* The current stage input is given as the output to the next stage */
483  pIn = pDst;
484 
485  /*Reset the output working pointer */
486  pOut = pDst;
487 
488  /* decrement the loop counter */
489  stage--;
490 
491  } while(stage > 0u);
492 
493 #else
494 
495  float32_t Xn2a, Xn3a, Xn4a; /* Input State variables */
496  float32_t Xn2b, Xn3b, Xn4b; /* Input State variables */
497  float32_t acc2a, acc3a, acc4a; /* accumulator */
498  float32_t acc2b, acc3b, acc4b; /* accumulator */
499  float32_t p0a, p1a, p2a, p3a, p4a, A1a;
500  float32_t p0b, p1b, p2b, p3b, p4b, A1b;
501 
502  /* Run the below code for Cortex-M4 and Cortex-M3 */
503  do
504  {
505  /* Reading the coefficients */
506  b0 = *pCoeffs++;
507  b1 = *pCoeffs++;
508  b2 = *pCoeffs++;
509  a1 = *pCoeffs++;
510  a2 = *pCoeffs++;
511 
512  /*Reading the state values */
513  d1a = pState[0];
514  d2a = pState[1];
515  d1b = pState[2];
516  d2b = pState[3];
517 
518  /* Apply loop unrolling and compute 4 output values simultaneously. */
519  sample = blockSize >> 2u;
520 
521  /* First part of the processing with loop unrolling. Compute 4 outputs at a time.
522  ** a second loop below computes the remaining 1 to 3 samples. */
523  while(sample > 0u) {
524 
525  /* y[n] = b0 * x[n] + d1 */
526  /* d1 = b1 * x[n] + a1 * y[n] + d2 */
527  /* d2 = b2 * x[n] + a2 * y[n] */
528 
529  /* Read the four inputs */
530  Xn1a = pIn[0];
531  Xn1b = pIn[1];
532  Xn2a = pIn[2];
533  Xn2b = pIn[3];
534  Xn3a = pIn[4];
535  Xn3b = pIn[5];
536  Xn4a = pIn[6];
537  Xn4b = pIn[7];
538  pIn += 8;
539 
540  p0a = b0 * Xn1a;
541  p0b = b0 * Xn1b;
542  p1a = b1 * Xn1a;
543  p1b = b1 * Xn1b;
544  acc1a = p0a + d1a;
545  acc1b = p0b + d1b;
546  p0a = b0 * Xn2a;
547  p0b = b0 * Xn2b;
548  p3a = a1 * acc1a;
549  p3b = a1 * acc1b;
550  p2a = b2 * Xn1a;
551  p2b = b2 * Xn1b;
552  A1a = p1a + p3a;
553  A1b = p1b + p3b;
554  p4a = a2 * acc1a;
555  p4b = a2 * acc1b;
556  d1a = A1a + d2a;
557  d1b = A1b + d2b;
558  d2a = p2a + p4a;
559  d2b = p2b + p4b;
560 
561  p1a = b1 * Xn2a;
562  p1b = b1 * Xn2b;
563  acc2a = p0a + d1a;
564  acc2b = p0b + d1b;
565  p0a = b0 * Xn3a;
566  p0b = b0 * Xn3b;
567  p3a = a1 * acc2a;
568  p3b = a1 * acc2b;
569  p2a = b2 * Xn2a;
570  p2b = b2 * Xn2b;
571  A1a = p1a + p3a;
572  A1b = p1b + p3b;
573  p4a = a2 * acc2a;
574  p4b = a2 * acc2b;
575  d1a = A1a + d2a;
576  d1b = A1b + d2b;
577  d2a = p2a + p4a;
578  d2b = p2b + p4b;
579 
580  p1a = b1 * Xn3a;
581  p1b = b1 * Xn3b;
582  acc3a = p0a + d1a;
583  acc3b = p0b + d1b;
584  p0a = b0 * Xn4a;
585  p0b = b0 * Xn4b;
586  p3a = a1 * acc3a;
587  p3b = a1 * acc3b;
588  p2a = b2 * Xn3a;
589  p2b = b2 * Xn3b;
590  A1a = p1a + p3a;
591  A1b = p1b + p3b;
592  p4a = a2 * acc3a;
593  p4b = a2 * acc3b;
594  d1a = A1a + d2a;
595  d1b = A1b + d2b;
596  d2a = p2a + p4a;
597  d2b = p2b + p4b;
598 
599  acc4a = p0a + d1a;
600  acc4b = p0b + d1b;
601  p1a = b1 * Xn4a;
602  p1b = b1 * Xn4b;
603  p3a = a1 * acc4a;
604  p3b = a1 * acc4b;
605  p2a = b2 * Xn4a;
606  p2b = b2 * Xn4b;
607  A1a = p1a + p3a;
608  A1b = p1b + p3b;
609  p4a = a2 * acc4a;
610  p4b = a2 * acc4b;
611  d1a = A1a + d2a;
612  d1b = A1b + d2b;
613  d2a = p2a + p4a;
614  d2b = p2b + p4b;
615 
616  pOut[0] = acc1a;
617  pOut[1] = acc1b;
618  pOut[2] = acc2a;
619  pOut[3] = acc2b;
620  pOut[4] = acc3a;
621  pOut[5] = acc3b;
622  pOut[6] = acc4a;
623  pOut[7] = acc4b;
624  pOut += 8;
625 
626  sample--;
627  }
628 
629  sample = blockSize & 0x3u;
630  while(sample > 0u) {
631  Xn1a = *pIn++;
632  Xn1b = *pIn++;
633 
634  p0a = b0 * Xn1a;
635  p0b = b0 * Xn1b;
636  p1a = b1 * Xn1a;
637  p1b = b1 * Xn1b;
638  acc1a = p0a + d1a;
639  acc1b = p0b + d1b;
640  p3a = a1 * acc1a;
641  p3b = a1 * acc1b;
642  p2a = b2 * Xn1a;
643  p2b = b2 * Xn1b;
644  A1a = p1a + p3a;
645  A1b = p1b + p3b;
646  p4a = a2 * acc1a;
647  p4b = a2 * acc1b;
648  d1a = A1a + d2a;
649  d1b = A1b + d2b;
650  d2a = p2a + p4a;
651  d2b = p2b + p4b;
652 
653  *pOut++ = acc1a;
654  *pOut++ = acc1b;
655 
656  sample--;
657  }
658 
659  /* Store the updated state variables back into the state array */
660  *pState++ = d1a;
661  *pState++ = d2a;
662  *pState++ = d1b;
663  *pState++ = d2b;
664 
665  /* The current stage input is given as the output to the next stage */
666  pIn = pDst;
667 
668  /*Reset the output working pointer */
669  pOut = pDst;
670 
671  /* decrement the loop counter */
672  stage--;
673 
674  } while(stage > 0u);
675 
676 #endif
677 
678 }
679 LOW_OPTIMIZATION_EXIT
680 
float float32_t
32-bit floating-point type definition.
Definition: arm_math.h:407
Instance structure for the floating-point transposed direct form II Biquad cascade filter...
Definition: arm_math.h:3618
LOW_OPTIMIZATION_ENTER void arm_biquad_cascade_stereo_df2T_f32(const arm_biquad_cascade_stereo_df2T_instance_f32 *S, float32_t *pSrc, float32_t *pDst, uint32_t blockSize)
Processing function for the floating-point transposed direct form II Biquad cascade filter...