STM32F769IDiscovery  1.00
uDANTE Audio Networking with STM32F7 DISCO board
arm_iir_lattice_q15.c
Go to the documentation of this file.
1 /* ----------------------------------------------------------------------
2 * Copyright (C) 2010-2014 ARM Limited. All rights reserved.
3 *
4 * $Date: 19. March 2015
5 * $Revision: V.1.4.5
6 *
7 * Project: CMSIS DSP Library
8 * Title: arm_iir_lattice_q15.c
9 *
10 * Description: Q15 IIR lattice filter processing function.
11 *
12 * Target Processor: Cortex-M4/Cortex-M3/Cortex-M0
13 *
14 * Redistribution and use in source and binary forms, with or without
15 * modification, are permitted provided that the following conditions
16 * are met:
17 * - Redistributions of source code must retain the above copyright
18 * notice, this list of conditions and the following disclaimer.
19 * - Redistributions in binary form must reproduce the above copyright
20 * notice, this list of conditions and the following disclaimer in
21 * the documentation and/or other materials provided with the
22 * distribution.
23 * - Neither the name of ARM LIMITED nor the names of its contributors
24 * may be used to endorse or promote products derived from this
25 * software without specific prior written permission.
26 *
27 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
28 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
29 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
30 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
31 * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
32 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
33 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
34 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
35 * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
36 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
37 * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
38 * POSSIBILITY OF SUCH DAMAGE.
39 * -------------------------------------------------------------------- */
40 
41 #include "arm_math.h"
42 
73  q15_t * pSrc,
74  q15_t * pDst,
75  uint32_t blockSize)
76 {
77 
78 
79 #ifndef ARM_MATH_CM0_FAMILY
80 
81  /* Run the below code for Cortex-M4 and Cortex-M3 */
82 
83  q31_t fcurr, fnext, gcurr = 0, gnext; /* Temporary variables for lattice stages */
84  q15_t gnext1, gnext2; /* Temporary variables for lattice stages */
85  uint32_t stgCnt; /* Temporary variables for counts */
86  q63_t acc; /* Accumlator */
87  uint32_t blkCnt, tapCnt; /* Temporary variables for counts */
88  q15_t *px1, *px2, *pk, *pv; /* temporary pointers for state and coef */
89  uint32_t numStages = S->numStages; /* number of stages */
90  q15_t *pState; /* State pointer */
91  q15_t *pStateCurnt; /* State current pointer */
92  q15_t out; /* Temporary variable for output */
93  q31_t v; /* Temporary variable for ladder coefficient */
94 #ifdef UNALIGNED_SUPPORT_DISABLE
95  q15_t v1, v2;
96 #endif
97 
98 
99  blkCnt = blockSize;
100 
101  pState = &S->pState[0];
102 
103  /* Sample processing */
104  while(blkCnt > 0u)
105  {
106  /* Read Sample from input buffer */
107  /* fN(n) = x(n) */
108  fcurr = *pSrc++;
109 
110  /* Initialize state read pointer */
111  px1 = pState;
112  /* Initialize state write pointer */
113  px2 = pState;
114  /* Set accumulator to zero */
115  acc = 0;
116  /* Initialize Ladder coeff pointer */
117  pv = &S->pvCoeffs[0];
118  /* Initialize Reflection coeff pointer */
119  pk = &S->pkCoeffs[0];
120 
121 
122  /* Process sample for first tap */
123  gcurr = *px1++;
124  /* fN-1(n) = fN(n) - kN * gN-1(n-1) */
125  fnext = fcurr - (((q31_t) gcurr * (*pk)) >> 15);
126  fnext = __SSAT(fnext, 16);
127  /* gN(n) = kN * fN-1(n) + gN-1(n-1) */
128  gnext = (((q31_t) fnext * (*pk++)) >> 15) + gcurr;
129  gnext = __SSAT(gnext, 16);
130  /* write gN(n) into state for next sample processing */
131  *px2++ = (q15_t) gnext;
132  /* y(n) += gN(n) * vN */
133  acc += (q31_t) ((gnext * (*pv++)));
134 
135 
136  /* Update f values for next coefficient processing */
137  fcurr = fnext;
138 
139  /* Loop unrolling. Process 4 taps at a time. */
140  tapCnt = (numStages - 1u) >> 2;
141 
142  while(tapCnt > 0u)
143  {
144 
145  /* Process sample for 2nd, 6th ...taps */
146  /* Read gN-2(n-1) from state buffer */
147  gcurr = *px1++;
148  /* Process sample for 2nd, 6th .. taps */
149  /* fN-2(n) = fN-1(n) - kN-1 * gN-2(n-1) */
150  fnext = fcurr - (((q31_t) gcurr * (*pk)) >> 15);
151  fnext = __SSAT(fnext, 16);
152  /* gN-1(n) = kN-1 * fN-2(n) + gN-2(n-1) */
153  gnext = (((q31_t) fnext * (*pk++)) >> 15) + gcurr;
154  gnext1 = (q15_t) __SSAT(gnext, 16);
155  /* write gN-1(n) into state */
156  *px2++ = (q15_t) gnext1;
157 
158 
159  /* Process sample for 3nd, 7th ...taps */
160  /* Read gN-3(n-1) from state */
161  gcurr = *px1++;
162  /* Process sample for 3rd, 7th .. taps */
163  /* fN-3(n) = fN-2(n) - kN-2 * gN-3(n-1) */
164  fcurr = fnext - (((q31_t) gcurr * (*pk)) >> 15);
165  fcurr = __SSAT(fcurr, 16);
166  /* gN-2(n) = kN-2 * fN-3(n) + gN-3(n-1) */
167  gnext = (((q31_t) fcurr * (*pk++)) >> 15) + gcurr;
168  gnext2 = (q15_t) __SSAT(gnext, 16);
169  /* write gN-2(n) into state */
170  *px2++ = (q15_t) gnext2;
171 
172  /* Read vN-1 and vN-2 at a time */
173 #ifndef UNALIGNED_SUPPORT_DISABLE
174 
175  v = *__SIMD32(pv)++;
176 
177 #else
178 
179  v1 = *pv++;
180  v2 = *pv++;
181 
182 #ifndef ARM_MATH_BIG_ENDIAN
183 
184  v = __PKHBT(v1, v2, 16);
185 
186 #else
187 
188  v = __PKHBT(v2, v1, 16);
189 
190 #endif /* #ifndef ARM_MATH_BIG_ENDIAN */
191 
192 #endif /* #ifndef UNALIGNED_SUPPORT_DISABLE */
193 
194 
195  /* Pack gN-1(n) and gN-2(n) */
196 
197 #ifndef ARM_MATH_BIG_ENDIAN
198 
199  gnext = __PKHBT(gnext1, gnext2, 16);
200 
201 #else
202 
203  gnext = __PKHBT(gnext2, gnext1, 16);
204 
205 #endif /* #ifndef ARM_MATH_BIG_ENDIAN */
206 
207  /* y(n) += gN-1(n) * vN-1 */
208  /* process for gN-5(n) * vN-5, gN-9(n) * vN-9 ... */
209  /* y(n) += gN-2(n) * vN-2 */
210  /* process for gN-6(n) * vN-6, gN-10(n) * vN-10 ... */
211  acc = __SMLALD(gnext, v, acc);
212 
213 
214  /* Process sample for 4th, 8th ...taps */
215  /* Read gN-4(n-1) from state */
216  gcurr = *px1++;
217  /* Process sample for 4th, 8th .. taps */
218  /* fN-4(n) = fN-3(n) - kN-3 * gN-4(n-1) */
219  fnext = fcurr - (((q31_t) gcurr * (*pk)) >> 15);
220  fnext = __SSAT(fnext, 16);
221  /* gN-3(n) = kN-3 * fN-1(n) + gN-1(n-1) */
222  gnext = (((q31_t) fnext * (*pk++)) >> 15) + gcurr;
223  gnext1 = (q15_t) __SSAT(gnext, 16);
224  /* write gN-3(n) for the next sample process */
225  *px2++ = (q15_t) gnext1;
226 
227 
228  /* Process sample for 5th, 9th ...taps */
229  /* Read gN-5(n-1) from state */
230  gcurr = *px1++;
231  /* Process sample for 5th, 9th .. taps */
232  /* fN-5(n) = fN-4(n) - kN-4 * gN-5(n-1) */
233  fcurr = fnext - (((q31_t) gcurr * (*pk)) >> 15);
234  fcurr = __SSAT(fcurr, 16);
235  /* gN-4(n) = kN-4 * fN-5(n) + gN-5(n-1) */
236  gnext = (((q31_t) fcurr * (*pk++)) >> 15) + gcurr;
237  gnext2 = (q15_t) __SSAT(gnext, 16);
238  /* write gN-4(n) for the next sample process */
239  *px2++ = (q15_t) gnext2;
240 
241  /* Read vN-3 and vN-4 at a time */
242 #ifndef UNALIGNED_SUPPORT_DISABLE
243 
244  v = *__SIMD32(pv)++;
245 
246 #else
247 
248  v1 = *pv++;
249  v2 = *pv++;
250 
251 #ifndef ARM_MATH_BIG_ENDIAN
252 
253  v = __PKHBT(v1, v2, 16);
254 
255 #else
256 
257  v = __PKHBT(v2, v1, 16);
258 
259 #endif /* #ifndef ARM_MATH_BIG_ENDIAN */
260 
261 #endif /* #ifndef UNALIGNED_SUPPORT_DISABLE */
262 
263 
264  /* Pack gN-3(n) and gN-4(n) */
265 #ifndef ARM_MATH_BIG_ENDIAN
266 
267  gnext = __PKHBT(gnext1, gnext2, 16);
268 
269 #else
270 
271  gnext = __PKHBT(gnext2, gnext1, 16);
272 
273 #endif /* #ifndef ARM_MATH_BIG_ENDIAN */
274 
275  /* y(n) += gN-4(n) * vN-4 */
276  /* process for gN-8(n) * vN-8, gN-12(n) * vN-12 ... */
277  /* y(n) += gN-3(n) * vN-3 */
278  /* process for gN-7(n) * vN-7, gN-11(n) * vN-11 ... */
279  acc = __SMLALD(gnext, v, acc);
280 
281  tapCnt--;
282 
283  }
284 
285  fnext = fcurr;
286 
287  /* If the filter length is not a multiple of 4, compute the remaining filter taps */
288  tapCnt = (numStages - 1u) % 0x4u;
289 
290  while(tapCnt > 0u)
291  {
292  gcurr = *px1++;
293  /* Process sample for last taps */
294  fnext = fcurr - (((q31_t) gcurr * (*pk)) >> 15);
295  fnext = __SSAT(fnext, 16);
296  gnext = (((q31_t) fnext * (*pk++)) >> 15) + gcurr;
297  gnext = __SSAT(gnext, 16);
298  /* Output samples for last taps */
299  acc += (q31_t) (((q31_t) gnext * (*pv++)));
300  *px2++ = (q15_t) gnext;
301  fcurr = fnext;
302 
303  tapCnt--;
304  }
305 
306  /* y(n) += g0(n) * v0 */
307  acc += (q31_t) (((q31_t) fnext * (*pv++)));
308 
309  out = (q15_t) __SSAT(acc >> 15, 16);
310  *px2++ = (q15_t) fnext;
311 
312  /* write out into pDst */
313  *pDst++ = out;
314 
315  /* Advance the state pointer by 4 to process the next group of 4 samples */
316  pState = pState + 1u;
317  blkCnt--;
318 
319  }
320 
321  /* Processing is complete. Now copy last S->numStages samples to start of the buffer
322  for the preperation of next frame process */
323  /* Points to the start of the state buffer */
324  pStateCurnt = &S->pState[0];
325  pState = &S->pState[blockSize];
326 
327  stgCnt = (numStages >> 2u);
328 
329  /* copy data */
330  while(stgCnt > 0u)
331  {
332 #ifndef UNALIGNED_SUPPORT_DISABLE
333 
334  *__SIMD32(pStateCurnt)++ = *__SIMD32(pState)++;
335  *__SIMD32(pStateCurnt)++ = *__SIMD32(pState)++;
336 
337 #else
338 
339  *pStateCurnt++ = *pState++;
340  *pStateCurnt++ = *pState++;
341  *pStateCurnt++ = *pState++;
342  *pStateCurnt++ = *pState++;
343 
344 #endif /* #ifndef UNALIGNED_SUPPORT_DISABLE */
345 
346  /* Decrement the loop counter */
347  stgCnt--;
348 
349  }
350 
351  /* Calculation of count for remaining q15_t data */
352  stgCnt = (numStages) % 0x4u;
353 
354  /* copy data */
355  while(stgCnt > 0u)
356  {
357  *pStateCurnt++ = *pState++;
358 
359  /* Decrement the loop counter */
360  stgCnt--;
361  }
362 
363 #else
364 
365  /* Run the below code for Cortex-M0 */
366 
367  q31_t fcurr, fnext = 0, gcurr = 0, gnext; /* Temporary variables for lattice stages */
368  uint32_t stgCnt; /* Temporary variables for counts */
369  q63_t acc; /* Accumlator */
370  uint32_t blkCnt, tapCnt; /* Temporary variables for counts */
371  q15_t *px1, *px2, *pk, *pv; /* temporary pointers for state and coef */
372  uint32_t numStages = S->numStages; /* number of stages */
373  q15_t *pState; /* State pointer */
374  q15_t *pStateCurnt; /* State current pointer */
375  q15_t out; /* Temporary variable for output */
376 
377 
378  blkCnt = blockSize;
379 
380  pState = &S->pState[0];
381 
382  /* Sample processing */
383  while(blkCnt > 0u)
384  {
385  /* Read Sample from input buffer */
386  /* fN(n) = x(n) */
387  fcurr = *pSrc++;
388 
389  /* Initialize state read pointer */
390  px1 = pState;
391  /* Initialize state write pointer */
392  px2 = pState;
393  /* Set accumulator to zero */
394  acc = 0;
395  /* Initialize Ladder coeff pointer */
396  pv = &S->pvCoeffs[0];
397  /* Initialize Reflection coeff pointer */
398  pk = &S->pkCoeffs[0];
399 
400  tapCnt = numStages;
401 
402  while(tapCnt > 0u)
403  {
404  gcurr = *px1++;
405  /* Process sample */
406  /* fN-1(n) = fN(n) - kN * gN-1(n-1) */
407  fnext = fcurr - ((gcurr * (*pk)) >> 15);
408  fnext = __SSAT(fnext, 16);
409  /* gN(n) = kN * fN-1(n) + gN-1(n-1) */
410  gnext = ((fnext * (*pk++)) >> 15) + gcurr;
411  gnext = __SSAT(gnext, 16);
412  /* Output samples */
413  /* y(n) += gN(n) * vN */
414  acc += (q31_t) ((gnext * (*pv++)));
415  /* write gN(n) into state for next sample processing */
416  *px2++ = (q15_t) gnext;
417  /* Update f values for next coefficient processing */
418  fcurr = fnext;
419 
420  tapCnt--;
421  }
422 
423  /* y(n) += g0(n) * v0 */
424  acc += (q31_t) ((fnext * (*pv++)));
425 
426  out = (q15_t) __SSAT(acc >> 15, 16);
427  *px2++ = (q15_t) fnext;
428 
429  /* write out into pDst */
430  *pDst++ = out;
431 
432  /* Advance the state pointer by 1 to process the next group of samples */
433  pState = pState + 1u;
434  blkCnt--;
435 
436  }
437 
438  /* Processing is complete. Now copy last S->numStages samples to start of the buffer
439  for the preperation of next frame process */
440  /* Points to the start of the state buffer */
441  pStateCurnt = &S->pState[0];
442  pState = &S->pState[blockSize];
443 
444  stgCnt = numStages;
445 
446  /* copy data */
447  while(stgCnt > 0u)
448  {
449  *pStateCurnt++ = *pState++;
450 
451  /* Decrement the loop counter */
452  stgCnt--;
453  }
454 
455 #endif /* #ifndef ARM_MATH_CM0_FAMILY */
456 
457 }
458 
459 
460 
461 
void arm_iir_lattice_q15(const arm_iir_lattice_instance_q15 *S, q15_t *pSrc, q15_t *pDst, uint32_t blockSize)
Processing function for the Q15 IIR lattice filter.
int64_t q63_t
64-bit fractional data type in 1.63 format.
Definition: arm_math.h:402
int16_t q15_t
16-bit fractional data type in 1.15 format.
Definition: arm_math.h:392
Instance structure for the Q15 IIR lattice filter.
Definition: arm_math.h:3838
#define __SIMD32(addr)
definition to read/write two 16 bit values.
Definition: arm_math.h:445
int32_t q31_t
32-bit fractional data type in 1.31 format.
Definition: arm_math.h:397