STM32F769IDiscovery  1.00
uDANTE Audio Networking with STM32F7 DISCO board
arm_biquad_cascade_df1_32x64_q31.c
Go to the documentation of this file.
1 /* ----------------------------------------------------------------------
2 * Copyright (C) 2010-2014 ARM Limited. All rights reserved.
3 *
4 * $Date: 19. October 2015
5 * $Revision: V.1.4.5 a
6 *
7 * Project: CMSIS DSP Library
8 * Title: arm_biquad_cascade_df1_32x64_q31.c
9 *
10 * Description: High precision Q31 Biquad cascade filter processing function
11 *
12 * Target Processor: Cortex-M4/Cortex-M3/Cortex-M0
13 *
14 * Redistribution and use in source and binary forms, with or without
15 * modification, are permitted provided that the following conditions
16 * are met:
17 * - Redistributions of source code must retain the above copyright
18 * notice, this list of conditions and the following disclaimer.
19 * - Redistributions in binary form must reproduce the above copyright
20 * notice, this list of conditions and the following disclaimer in
21 * the documentation and/or other materials provided with the
22 * distribution.
23 * - Neither the name of ARM LIMITED nor the names of its contributors
24 * may be used to endorse or promote products derived from this
25 * software without specific prior written permission.
26 *
27 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
28 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
29 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
30 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
31 * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
32 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
33 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
34 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
35 * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
36 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
37 * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
38 * POSSIBILITY OF SUCH DAMAGE.
39 * -------------------------------------------------------------------- */
40 
41 #include "arm_math.h"
42 
189  q31_t * pSrc,
190  q31_t * pDst,
191  uint32_t blockSize)
192 {
193  q31_t *pIn = pSrc; /* input pointer initialization */
194  q31_t *pOut = pDst; /* output pointer initialization */
195  q63_t *pState = S->pState; /* state pointer initialization */
196  q31_t *pCoeffs = S->pCoeffs; /* coeff pointer initialization */
197  q63_t acc; /* accumulator */
198  q31_t Xn1, Xn2; /* Input Filter state variables */
199  q63_t Yn1, Yn2; /* Output Filter state variables */
200  q31_t b0, b1, b2, a1, a2; /* Filter coefficients */
201  q31_t Xn; /* temporary input */
202  int32_t shift = (int32_t) S->postShift + 1; /* Shift to be applied to the output */
203  uint32_t sample, stage = S->numStages; /* loop counters */
204  q31_t acc_l, acc_h; /* temporary output */
205  uint32_t uShift = ((uint32_t) S->postShift + 1u);
206  uint32_t lShift = 32u - uShift; /* Shift to be applied to the output */
207 
208 
209 #ifndef ARM_MATH_CM0_FAMILY
210 
211  /* Run the below code for Cortex-M4 and Cortex-M3 */
212 
213  do
214  {
215  /* Reading the coefficients */
216  b0 = *pCoeffs++;
217  b1 = *pCoeffs++;
218  b2 = *pCoeffs++;
219  a1 = *pCoeffs++;
220  a2 = *pCoeffs++;
221 
222  /* Reading the state values */
223  Xn1 = (q31_t) (pState[0]);
224  Xn2 = (q31_t) (pState[1]);
225  Yn1 = pState[2];
226  Yn2 = pState[3];
227 
228  /* Apply loop unrolling and compute 4 output values simultaneously. */
229  /* The variable acc hold output value that is being computed and
230  * stored in the destination buffer
231  * acc = b0 * x[n] + b1 * x[n-1] + b2 * x[n-2] + a1 * y[n-1] + a2 * y[n-2]
232  */
233 
234  sample = blockSize >> 2u;
235 
236  /* First part of the processing with loop unrolling. Compute 4 outputs at a time.
237  ** a second loop below computes the remaining 1 to 3 samples. */
238  while(sample > 0u)
239  {
240  /* Read the input */
241  Xn = *pIn++;
242 
243  /* acc = b0 * x[n] + b1 * x[n-1] + b2 * x[n-2] + a1 * y[n-1] + a2 * y[n-2] */
244 
245  /* acc = b0 * x[n] */
246  acc = (q63_t) Xn *b0;
247 
248  /* acc += b1 * x[n-1] */
249  acc += (q63_t) Xn1 *b1;
250 
251  /* acc += b[2] * x[n-2] */
252  acc += (q63_t) Xn2 *b2;
253 
254  /* acc += a1 * y[n-1] */
255  acc += mult32x64(Yn1, a1);
256 
257  /* acc += a2 * y[n-2] */
258  acc += mult32x64(Yn2, a2);
259 
260  /* The result is converted to 1.63 , Yn2 variable is reused */
261  Yn2 = acc << shift;
262 
263  /* Calc lower part of acc */
264  acc_l = acc & 0xffffffff;
265 
266  /* Calc upper part of acc */
267  acc_h = (acc >> 32) & 0xffffffff;
268 
269  /* Apply shift for lower part of acc and upper part of acc */
270  acc_h = (uint32_t) acc_l >> lShift | acc_h << uShift;
271 
272  /* Store the output in the destination buffer in 1.31 format. */
273  *pOut = acc_h;
274 
275  /* Read the second input into Xn2, to reuse the value */
276  Xn2 = *pIn++;
277 
278  /* acc = b0 * x[n] + b1 * x[n-1] + b2 * x[n-2] + a1 * y[n-1] + a2 * y[n-2] */
279 
280  /* acc += b1 * x[n-1] */
281  acc = (q63_t) Xn *b1;
282 
283  /* acc = b0 * x[n] */
284  acc += (q63_t) Xn2 *b0;
285 
286  /* acc += b[2] * x[n-2] */
287  acc += (q63_t) Xn1 *b2;
288 
289  /* acc += a1 * y[n-1] */
290  acc += mult32x64(Yn2, a1);
291 
292  /* acc += a2 * y[n-2] */
293  acc += mult32x64(Yn1, a2);
294 
295  /* The result is converted to 1.63, Yn1 variable is reused */
296  Yn1 = acc << shift;
297 
298  /* Calc lower part of acc */
299  acc_l = acc & 0xffffffff;
300 
301  /* Calc upper part of acc */
302  acc_h = (acc >> 32) & 0xffffffff;
303 
304  /* Apply shift for lower part of acc and upper part of acc */
305  acc_h = (uint32_t) acc_l >> lShift | acc_h << uShift;
306 
307  /* Read the third input into Xn1, to reuse the value */
308  Xn1 = *pIn++;
309 
310  /* The result is converted to 1.31 */
311  /* Store the output in the destination buffer. */
312  *(pOut + 1u) = acc_h;
313 
314  /* acc = b0 * x[n] + b1 * x[n-1] + b2 * x[n-2] + a1 * y[n-1] + a2 * y[n-2] */
315 
316  /* acc = b0 * x[n] */
317  acc = (q63_t) Xn1 *b0;
318 
319  /* acc += b1 * x[n-1] */
320  acc += (q63_t) Xn2 *b1;
321 
322  /* acc += b[2] * x[n-2] */
323  acc += (q63_t) Xn *b2;
324 
325  /* acc += a1 * y[n-1] */
326  acc += mult32x64(Yn1, a1);
327 
328  /* acc += a2 * y[n-2] */
329  acc += mult32x64(Yn2, a2);
330 
331  /* The result is converted to 1.63, Yn2 variable is reused */
332  Yn2 = acc << shift;
333 
334  /* Calc lower part of acc */
335  acc_l = acc & 0xffffffff;
336 
337  /* Calc upper part of acc */
338  acc_h = (acc >> 32) & 0xffffffff;
339 
340  /* Apply shift for lower part of acc and upper part of acc */
341  acc_h = (uint32_t) acc_l >> lShift | acc_h << uShift;
342 
343  /* Store the output in the destination buffer in 1.31 format. */
344  *(pOut + 2u) = acc_h;
345 
346  /* Read the fourth input into Xn, to reuse the value */
347  Xn = *pIn++;
348 
349  /* acc = b0 * x[n] + b1 * x[n-1] + b2 * x[n-2] + a1 * y[n-1] + a2 * y[n-2] */
350  /* acc = b0 * x[n] */
351  acc = (q63_t) Xn *b0;
352 
353  /* acc += b1 * x[n-1] */
354  acc += (q63_t) Xn1 *b1;
355 
356  /* acc += b[2] * x[n-2] */
357  acc += (q63_t) Xn2 *b2;
358 
359  /* acc += a1 * y[n-1] */
360  acc += mult32x64(Yn2, a1);
361 
362  /* acc += a2 * y[n-2] */
363  acc += mult32x64(Yn1, a2);
364 
365  /* The result is converted to 1.63, Yn1 variable is reused */
366  Yn1 = acc << shift;
367 
368  /* Calc lower part of acc */
369  acc_l = acc & 0xffffffff;
370 
371  /* Calc upper part of acc */
372  acc_h = (acc >> 32) & 0xffffffff;
373 
374  /* Apply shift for lower part of acc and upper part of acc */
375  acc_h = (uint32_t) acc_l >> lShift | acc_h << uShift;
376 
377  /* Store the output in the destination buffer in 1.31 format. */
378  *(pOut + 3u) = acc_h;
379 
380  /* Every time after the output is computed state should be updated. */
381  /* The states should be updated as: */
382  /* Xn2 = Xn1 */
383  /* Xn1 = Xn */
384  /* Yn2 = Yn1 */
385  /* Yn1 = acc */
386  Xn2 = Xn1;
387  Xn1 = Xn;
388 
389  /* update output pointer */
390  pOut += 4u;
391 
392  /* decrement the loop counter */
393  sample--;
394  }
395 
396  /* If the blockSize is not a multiple of 4, compute any remaining output samples here.
397  ** No loop unrolling is used. */
398  sample = (blockSize & 0x3u);
399 
400  while(sample > 0u)
401  {
402  /* Read the input */
403  Xn = *pIn++;
404 
405  /* acc = b0 * x[n] + b1 * x[n-1] + b2 * x[n-2] + a1 * y[n-1] + a2 * y[n-2] */
406 
407  /* acc = b0 * x[n] */
408  acc = (q63_t) Xn *b0;
409  /* acc += b1 * x[n-1] */
410  acc += (q63_t) Xn1 *b1;
411  /* acc += b[2] * x[n-2] */
412  acc += (q63_t) Xn2 *b2;
413  /* acc += a1 * y[n-1] */
414  acc += mult32x64(Yn1, a1);
415  /* acc += a2 * y[n-2] */
416  acc += mult32x64(Yn2, a2);
417 
418  /* Every time after the output is computed state should be updated. */
419  /* The states should be updated as: */
420  /* Xn2 = Xn1 */
421  /* Xn1 = Xn */
422  /* Yn2 = Yn1 */
423  /* Yn1 = acc */
424  Xn2 = Xn1;
425  Xn1 = Xn;
426  Yn2 = Yn1;
427  /* The result is converted to 1.63, Yn1 variable is reused */
428  Yn1 = acc << shift;
429 
430  /* Calc lower part of acc */
431  acc_l = acc & 0xffffffff;
432 
433  /* Calc upper part of acc */
434  acc_h = (acc >> 32) & 0xffffffff;
435 
436  /* Apply shift for lower part of acc and upper part of acc */
437  acc_h = (uint32_t) acc_l >> lShift | acc_h << uShift;
438 
439  /* Store the output in the destination buffer in 1.31 format. */
440  *pOut++ = acc_h;
441  /* Yn1 = acc << shift; */
442 
443  /* Store the output in the destination buffer in 1.31 format. */
444 /* *pOut++ = (q31_t) (acc >> (32 - shift)); */
445 
446  /* decrement the loop counter */
447  sample--;
448  }
449 
450  /* The first stage output is given as input to the second stage. */
451  pIn = pDst;
452 
453  /* Reset to destination buffer working pointer */
454  pOut = pDst;
455 
456  /* Store the updated state variables back into the pState array */
457  /* Store the updated state variables back into the pState array */
458  *pState++ = (q63_t) Xn1;
459  *pState++ = (q63_t) Xn2;
460  *pState++ = Yn1;
461  *pState++ = Yn2;
462 
463  } while(--stage);
464 
465 #else
466 
467  /* Run the below code for Cortex-M0 */
468 
469  do
470  {
471  /* Reading the coefficients */
472  b0 = *pCoeffs++;
473  b1 = *pCoeffs++;
474  b2 = *pCoeffs++;
475  a1 = *pCoeffs++;
476  a2 = *pCoeffs++;
477 
478  /* Reading the state values */
479  Xn1 = pState[0];
480  Xn2 = pState[1];
481  Yn1 = pState[2];
482  Yn2 = pState[3];
483 
484  /* The variable acc hold output value that is being computed and
485  * stored in the destination buffer
486  * acc = b0 * x[n] + b1 * x[n-1] + b2 * x[n-2] + a1 * y[n-1] + a2 * y[n-2]
487  */
488 
489  sample = blockSize;
490 
491  while(sample > 0u)
492  {
493  /* Read the input */
494  Xn = *pIn++;
495 
496  /* acc = b0 * x[n] + b1 * x[n-1] + b2 * x[n-2] + a1 * y[n-1] + a2 * y[n-2] */
497  /* acc = b0 * x[n] */
498  acc = (q63_t) Xn *b0;
499  /* acc += b1 * x[n-1] */
500  acc += (q63_t) Xn1 *b1;
501  /* acc += b[2] * x[n-2] */
502  acc += (q63_t) Xn2 *b2;
503  /* acc += a1 * y[n-1] */
504  acc += mult32x64(Yn1, a1);
505  /* acc += a2 * y[n-2] */
506  acc += mult32x64(Yn2, a2);
507 
508  /* Every time after the output is computed state should be updated. */
509  /* The states should be updated as: */
510  /* Xn2 = Xn1 */
511  /* Xn1 = Xn */
512  /* Yn2 = Yn1 */
513  /* Yn1 = acc */
514  Xn2 = Xn1;
515  Xn1 = Xn;
516  Yn2 = Yn1;
517 
518  /* The result is converted to 1.63, Yn1 variable is reused */
519  Yn1 = acc << shift;
520 
521  /* Calc lower part of acc */
522  acc_l = acc & 0xffffffff;
523 
524  /* Calc upper part of acc */
525  acc_h = (acc >> 32) & 0xffffffff;
526 
527  /* Apply shift for lower part of acc and upper part of acc */
528  acc_h = (uint32_t) acc_l >> lShift | acc_h << uShift;
529 
530  /* Store the output in the destination buffer in 1.31 format. */
531  *pOut++ = acc_h;
532 
533  /* Yn1 = acc << shift; */
534 
535  /* Store the output in the destination buffer in 1.31 format. */
536  /* *pOut++ = (q31_t) (acc >> (32 - shift)); */
537 
538  /* decrement the loop counter */
539  sample--;
540  }
541 
542  /* The first stage output is given as input to the second stage. */
543  pIn = pDst;
544 
545  /* Reset to destination buffer working pointer */
546  pOut = pDst;
547 
548  /* Store the updated state variables back into the pState array */
549  *pState++ = (q63_t) Xn1;
550  *pState++ = (q63_t) Xn2;
551  *pState++ = Yn1;
552  *pState++ = Yn2;
553 
554  } while(--stage);
555 
556 #endif /* #ifndef ARM_MATH_CM0_FAMILY */
557 }
558 
int64_t q63_t
64-bit fractional data type in 1.63 format.
Definition: arm_math.h:402
int32_t q31_t
32-bit fractional data type in 1.31 format.
Definition: arm_math.h:397
Instance structure for the high precision Q31 Biquad cascade filter.
Definition: arm_math.h:3568
void arm_biquad_cas_df1_32x64_q31(const arm_biquad_cas_df1_32x64_ins_q31 *S, q31_t *pSrc, q31_t *pDst, uint32_t blockSize)