STM32F769IDiscovery  1.00
uDANTE Audio Networking with STM32F7 DISCO board
arm_fir_lattice_f32.c
Go to the documentation of this file.
1 /* ----------------------------------------------------------------------
2 * Copyright (C) 2010-2014 ARM Limited. All rights reserved.
3 *
4 * $Date: 19. March 2015
5 * $Revision: V.1.4.5
6 *
7 * Project: CMSIS DSP Library
8 * Title: arm_fir_lattice_f32.c
9 *
10 * Description: Processing function for the floating-point FIR Lattice filter.
11 *
12 * Target Processor: Cortex-M4/Cortex-M3/Cortex-M0
13 *
14 * Redistribution and use in source and binary forms, with or without
15 * modification, are permitted provided that the following conditions
16 * are met:
17 * - Redistributions of source code must retain the above copyright
18 * notice, this list of conditions and the following disclaimer.
19 * - Redistributions in binary form must reproduce the above copyright
20 * notice, this list of conditions and the following disclaimer in
21 * the documentation and/or other materials provided with the
22 * distribution.
23 * - Neither the name of ARM LIMITED nor the names of its contributors
24 * may be used to endorse or promote products derived from this
25 * software without specific prior written permission.
26 *
27 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
28 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
29 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
30 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
31 * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
32 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
33 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
34 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
35 * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
36 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
37 * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
38 * POSSIBILITY OF SUCH DAMAGE.
39 * -------------------------------------------------------------------- */
40 
41 #include "arm_math.h"
42 
133  float32_t * pSrc,
134  float32_t * pDst,
135  uint32_t blockSize)
136 {
137  float32_t *pState; /* State pointer */
138  float32_t *pCoeffs = S->pCoeffs; /* Coefficient pointer */
139  float32_t *px; /* temporary state pointer */
140  float32_t *pk; /* temporary coefficient pointer */
141 
142 
143 #ifndef ARM_MATH_CM0_FAMILY
144 
145  /* Run the below code for Cortex-M4 and Cortex-M3 */
146 
147  float32_t fcurr1, fnext1, gcurr1, gnext1; /* temporary variables for first sample in loop unrolling */
148  float32_t fcurr2, fnext2, gnext2; /* temporary variables for second sample in loop unrolling */
149  float32_t fcurr3, fnext3, gnext3; /* temporary variables for third sample in loop unrolling */
150  float32_t fcurr4, fnext4, gnext4; /* temporary variables for fourth sample in loop unrolling */
151  uint32_t numStages = S->numStages; /* Number of stages in the filter */
152  uint32_t blkCnt, stageCnt; /* temporary variables for counts */
153 
154  gcurr1 = 0.0f;
155  pState = &S->pState[0];
156 
157  blkCnt = blockSize >> 2;
158 
159  /* First part of the processing with loop unrolling. Compute 4 outputs at a time.
160  a second loop below computes the remaining 1 to 3 samples. */
161  while(blkCnt > 0u)
162  {
163 
164  /* Read two samples from input buffer */
165  /* f0(n) = x(n) */
166  fcurr1 = *pSrc++;
167  fcurr2 = *pSrc++;
168 
169  /* Initialize coeff pointer */
170  pk = (pCoeffs);
171 
172  /* Initialize state pointer */
173  px = pState;
174 
175  /* Read g0(n-1) from state */
176  gcurr1 = *px;
177 
178  /* Process first sample for first tap */
179  /* f1(n) = f0(n) + K1 * g0(n-1) */
180  fnext1 = fcurr1 + ((*pk) * gcurr1);
181  /* g1(n) = f0(n) * K1 + g0(n-1) */
182  gnext1 = (fcurr1 * (*pk)) + gcurr1;
183 
184  /* Process second sample for first tap */
185  /* for sample 2 processing */
186  fnext2 = fcurr2 + ((*pk) * fcurr1);
187  gnext2 = (fcurr2 * (*pk)) + fcurr1;
188 
189  /* Read next two samples from input buffer */
190  /* f0(n+2) = x(n+2) */
191  fcurr3 = *pSrc++;
192  fcurr4 = *pSrc++;
193 
194  /* Copy only last input samples into the state buffer
195  which will be used for next four samples processing */
196  *px++ = fcurr4;
197 
198  /* Process third sample for first tap */
199  fnext3 = fcurr3 + ((*pk) * fcurr2);
200  gnext3 = (fcurr3 * (*pk)) + fcurr2;
201 
202  /* Process fourth sample for first tap */
203  fnext4 = fcurr4 + ((*pk) * fcurr3);
204  gnext4 = (fcurr4 * (*pk++)) + fcurr3;
205 
206  /* Update of f values for next coefficient set processing */
207  fcurr1 = fnext1;
208  fcurr2 = fnext2;
209  fcurr3 = fnext3;
210  fcurr4 = fnext4;
211 
212  /* Loop unrolling. Process 4 taps at a time . */
213  stageCnt = (numStages - 1u) >> 2u;
214 
215  /* Loop over the number of taps. Unroll by a factor of 4.
216  ** Repeat until we've computed numStages-3 coefficients. */
217 
218  /* Process 2nd, 3rd, 4th and 5th taps ... here */
219  while(stageCnt > 0u)
220  {
221  /* Read g1(n-1), g3(n-1) .... from state */
222  gcurr1 = *px;
223 
224  /* save g1(n) in state buffer */
225  *px++ = gnext4;
226 
227  /* Process first sample for 2nd, 6th .. tap */
228  /* Sample processing for K2, K6.... */
229  /* f2(n) = f1(n) + K2 * g1(n-1) */
230  fnext1 = fcurr1 + ((*pk) * gcurr1);
231  /* Process second sample for 2nd, 6th .. tap */
232  /* for sample 2 processing */
233  fnext2 = fcurr2 + ((*pk) * gnext1);
234  /* Process third sample for 2nd, 6th .. tap */
235  fnext3 = fcurr3 + ((*pk) * gnext2);
236  /* Process fourth sample for 2nd, 6th .. tap */
237  fnext4 = fcurr4 + ((*pk) * gnext3);
238 
239  /* g2(n) = f1(n) * K2 + g1(n-1) */
240  /* Calculation of state values for next stage */
241  gnext4 = (fcurr4 * (*pk)) + gnext3;
242  gnext3 = (fcurr3 * (*pk)) + gnext2;
243  gnext2 = (fcurr2 * (*pk)) + gnext1;
244  gnext1 = (fcurr1 * (*pk++)) + gcurr1;
245 
246 
247  /* Read g2(n-1), g4(n-1) .... from state */
248  gcurr1 = *px;
249 
250  /* save g2(n) in state buffer */
251  *px++ = gnext4;
252 
253  /* Sample processing for K3, K7.... */
254  /* Process first sample for 3rd, 7th .. tap */
255  /* f3(n) = f2(n) + K3 * g2(n-1) */
256  fcurr1 = fnext1 + ((*pk) * gcurr1);
257  /* Process second sample for 3rd, 7th .. tap */
258  fcurr2 = fnext2 + ((*pk) * gnext1);
259  /* Process third sample for 3rd, 7th .. tap */
260  fcurr3 = fnext3 + ((*pk) * gnext2);
261  /* Process fourth sample for 3rd, 7th .. tap */
262  fcurr4 = fnext4 + ((*pk) * gnext3);
263 
264  /* Calculation of state values for next stage */
265  /* g3(n) = f2(n) * K3 + g2(n-1) */
266  gnext4 = (fnext4 * (*pk)) + gnext3;
267  gnext3 = (fnext3 * (*pk)) + gnext2;
268  gnext2 = (fnext2 * (*pk)) + gnext1;
269  gnext1 = (fnext1 * (*pk++)) + gcurr1;
270 
271 
272  /* Read g1(n-1), g3(n-1) .... from state */
273  gcurr1 = *px;
274 
275  /* save g3(n) in state buffer */
276  *px++ = gnext4;
277 
278  /* Sample processing for K4, K8.... */
279  /* Process first sample for 4th, 8th .. tap */
280  /* f4(n) = f3(n) + K4 * g3(n-1) */
281  fnext1 = fcurr1 + ((*pk) * gcurr1);
282  /* Process second sample for 4th, 8th .. tap */
283  /* for sample 2 processing */
284  fnext2 = fcurr2 + ((*pk) * gnext1);
285  /* Process third sample for 4th, 8th .. tap */
286  fnext3 = fcurr3 + ((*pk) * gnext2);
287  /* Process fourth sample for 4th, 8th .. tap */
288  fnext4 = fcurr4 + ((*pk) * gnext3);
289 
290  /* g4(n) = f3(n) * K4 + g3(n-1) */
291  /* Calculation of state values for next stage */
292  gnext4 = (fcurr4 * (*pk)) + gnext3;
293  gnext3 = (fcurr3 * (*pk)) + gnext2;
294  gnext2 = (fcurr2 * (*pk)) + gnext1;
295  gnext1 = (fcurr1 * (*pk++)) + gcurr1;
296 
297  /* Read g2(n-1), g4(n-1) .... from state */
298  gcurr1 = *px;
299 
300  /* save g4(n) in state buffer */
301  *px++ = gnext4;
302 
303  /* Sample processing for K5, K9.... */
304  /* Process first sample for 5th, 9th .. tap */
305  /* f5(n) = f4(n) + K5 * g4(n-1) */
306  fcurr1 = fnext1 + ((*pk) * gcurr1);
307  /* Process second sample for 5th, 9th .. tap */
308  fcurr2 = fnext2 + ((*pk) * gnext1);
309  /* Process third sample for 5th, 9th .. tap */
310  fcurr3 = fnext3 + ((*pk) * gnext2);
311  /* Process fourth sample for 5th, 9th .. tap */
312  fcurr4 = fnext4 + ((*pk) * gnext3);
313 
314  /* Calculation of state values for next stage */
315  /* g5(n) = f4(n) * K5 + g4(n-1) */
316  gnext4 = (fnext4 * (*pk)) + gnext3;
317  gnext3 = (fnext3 * (*pk)) + gnext2;
318  gnext2 = (fnext2 * (*pk)) + gnext1;
319  gnext1 = (fnext1 * (*pk++)) + gcurr1;
320 
321  stageCnt--;
322  }
323 
324  /* If the (filter length -1) is not a multiple of 4, compute the remaining filter taps */
325  stageCnt = (numStages - 1u) % 0x4u;
326 
327  while(stageCnt > 0u)
328  {
329  gcurr1 = *px;
330 
331  /* save g value in state buffer */
332  *px++ = gnext4;
333 
334  /* Process four samples for last three taps here */
335  fnext1 = fcurr1 + ((*pk) * gcurr1);
336  fnext2 = fcurr2 + ((*pk) * gnext1);
337  fnext3 = fcurr3 + ((*pk) * gnext2);
338  fnext4 = fcurr4 + ((*pk) * gnext3);
339 
340  /* g1(n) = f0(n) * K1 + g0(n-1) */
341  gnext4 = (fcurr4 * (*pk)) + gnext3;
342  gnext3 = (fcurr3 * (*pk)) + gnext2;
343  gnext2 = (fcurr2 * (*pk)) + gnext1;
344  gnext1 = (fcurr1 * (*pk++)) + gcurr1;
345 
346  /* Update of f values for next coefficient set processing */
347  fcurr1 = fnext1;
348  fcurr2 = fnext2;
349  fcurr3 = fnext3;
350  fcurr4 = fnext4;
351 
352  stageCnt--;
353 
354  }
355 
356  /* The results in the 4 accumulators, store in the destination buffer. */
357  /* y(n) = fN(n) */
358  *pDst++ = fcurr1;
359  *pDst++ = fcurr2;
360  *pDst++ = fcurr3;
361  *pDst++ = fcurr4;
362 
363  blkCnt--;
364  }
365 
366  /* If the blockSize is not a multiple of 4, compute any remaining output samples here.
367  ** No loop unrolling is used. */
368  blkCnt = blockSize % 0x4u;
369 
370  while(blkCnt > 0u)
371  {
372  /* f0(n) = x(n) */
373  fcurr1 = *pSrc++;
374 
375  /* Initialize coeff pointer */
376  pk = (pCoeffs);
377 
378  /* Initialize state pointer */
379  px = pState;
380 
381  /* read g2(n) from state buffer */
382  gcurr1 = *px;
383 
384  /* for sample 1 processing */
385  /* f1(n) = f0(n) + K1 * g0(n-1) */
386  fnext1 = fcurr1 + ((*pk) * gcurr1);
387  /* g1(n) = f0(n) * K1 + g0(n-1) */
388  gnext1 = (fcurr1 * (*pk++)) + gcurr1;
389 
390  /* save g1(n) in state buffer */
391  *px++ = fcurr1;
392 
393  /* f1(n) is saved in fcurr1
394  for next stage processing */
395  fcurr1 = fnext1;
396 
397  stageCnt = (numStages - 1u);
398 
399  /* stage loop */
400  while(stageCnt > 0u)
401  {
402  /* read g2(n) from state buffer */
403  gcurr1 = *px;
404 
405  /* save g1(n) in state buffer */
406  *px++ = gnext1;
407 
408  /* Sample processing for K2, K3.... */
409  /* f2(n) = f1(n) + K2 * g1(n-1) */
410  fnext1 = fcurr1 + ((*pk) * gcurr1);
411  /* g2(n) = f1(n) * K2 + g1(n-1) */
412  gnext1 = (fcurr1 * (*pk++)) + gcurr1;
413 
414  /* f1(n) is saved in fcurr1
415  for next stage processing */
416  fcurr1 = fnext1;
417 
418  stageCnt--;
419 
420  }
421 
422  /* y(n) = fN(n) */
423  *pDst++ = fcurr1;
424 
425  blkCnt--;
426 
427  }
428 
429 #else
430 
431  /* Run the below code for Cortex-M0 */
432 
433  float32_t fcurr, fnext, gcurr, gnext; /* temporary variables */
434  uint32_t numStages = S->numStages; /* Length of the filter */
435  uint32_t blkCnt, stageCnt; /* temporary variables for counts */
436 
437  pState = &S->pState[0];
438 
439  blkCnt = blockSize;
440 
441  while(blkCnt > 0u)
442  {
443  /* f0(n) = x(n) */
444  fcurr = *pSrc++;
445 
446  /* Initialize coeff pointer */
447  pk = pCoeffs;
448 
449  /* Initialize state pointer */
450  px = pState;
451 
452  /* read g0(n-1) from state buffer */
453  gcurr = *px;
454 
455  /* for sample 1 processing */
456  /* f1(n) = f0(n) + K1 * g0(n-1) */
457  fnext = fcurr + ((*pk) * gcurr);
458  /* g1(n) = f0(n) * K1 + g0(n-1) */
459  gnext = (fcurr * (*pk++)) + gcurr;
460 
461  /* save f0(n) in state buffer */
462  *px++ = fcurr;
463 
464  /* f1(n) is saved in fcurr
465  for next stage processing */
466  fcurr = fnext;
467 
468  stageCnt = (numStages - 1u);
469 
470  /* stage loop */
471  while(stageCnt > 0u)
472  {
473  /* read g2(n) from state buffer */
474  gcurr = *px;
475 
476  /* save g1(n) in state buffer */
477  *px++ = gnext;
478 
479  /* Sample processing for K2, K3.... */
480  /* f2(n) = f1(n) + K2 * g1(n-1) */
481  fnext = fcurr + ((*pk) * gcurr);
482  /* g2(n) = f1(n) * K2 + g1(n-1) */
483  gnext = (fcurr * (*pk++)) + gcurr;
484 
485  /* f1(n) is saved in fcurr1
486  for next stage processing */
487  fcurr = fnext;
488 
489  stageCnt--;
490 
491  }
492 
493  /* y(n) = fN(n) */
494  *pDst++ = fcurr;
495 
496  blkCnt--;
497 
498  }
499 
500 #endif /* #ifndef ARM_MATH_CM0_FAMILY */
501 
502 }
503 
float float32_t
32-bit floating-point type definition.
Definition: arm_math.h:407
Instance structure for the floating-point FIR lattice filter.
Definition: arm_math.h:3743
void arm_fir_lattice_f32(const arm_fir_lattice_instance_f32 *S, float32_t *pSrc, float32_t *pDst, uint32_t blockSize)
Processing function for the floating-point FIR lattice filter.