STM32F769IDiscovery  1.00
uDANTE Audio Networking with STM32F7 DISCO board
arm_fir_q7.c
Go to the documentation of this file.
1 /* ----------------------------------------------------------------------
2 * Copyright (C) 2010-2014 ARM Limited. All rights reserved.
3 *
4 * $Date: 19. March 2015
5 * $Revision: V.1.4.5
6 *
7 * Project: CMSIS DSP Library
8 * Title: arm_fir_q7.c
9 *
10 * Description: Q7 FIR filter processing function.
11 *
12 * Target Processor: Cortex-M4/Cortex-M3/Cortex-M0
13 *
14 * Redistribution and use in source and binary forms, with or without
15 * modification, are permitted provided that the following conditions
16 * are met:
17 * - Redistributions of source code must retain the above copyright
18 * notice, this list of conditions and the following disclaimer.
19 * - Redistributions in binary form must reproduce the above copyright
20 * notice, this list of conditions and the following disclaimer in
21 * the documentation and/or other materials provided with the
22 * distribution.
23 * - Neither the name of ARM LIMITED nor the names of its contributors
24 * may be used to endorse or promote products derived from this
25 * software without specific prior written permission.
26 *
27 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
28 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
29 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
30 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
31 * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
32 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
33 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
34 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
35 * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
36 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
37 * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
38 * POSSIBILITY OF SUCH DAMAGE.
39 * -------------------------------------------------------------------- */
40 
41 #include "arm_math.h"
42 
70  const arm_fir_instance_q7 * S,
71  q7_t * pSrc,
72  q7_t * pDst,
73  uint32_t blockSize)
74 {
75 
76 #ifndef ARM_MATH_CM0_FAMILY
77 
78  /* Run the below code for Cortex-M4 and Cortex-M3 */
79 
80  q7_t *pState = S->pState; /* State pointer */
81  q7_t *pCoeffs = S->pCoeffs; /* Coefficient pointer */
82  q7_t *pStateCurnt; /* Points to the current sample of the state */
83  q7_t x0, x1, x2, x3; /* Temporary variables to hold state */
84  q7_t c0; /* Temporary variable to hold coefficient value */
85  q7_t *px; /* Temporary pointer for state */
86  q7_t *pb; /* Temporary pointer for coefficient buffer */
87  q31_t acc0, acc1, acc2, acc3; /* Accumulators */
88  uint32_t numTaps = S->numTaps; /* Number of filter coefficients in the filter */
89  uint32_t i, tapCnt, blkCnt; /* Loop counters */
90 
91  /* S->pState points to state array which contains previous frame (numTaps - 1) samples */
92  /* pStateCurnt points to the location where the new input data should be written */
93  pStateCurnt = &(S->pState[(numTaps - 1u)]);
94 
95  /* Apply loop unrolling and compute 4 output values simultaneously.
96  * The variables acc0 ... acc3 hold output values that are being computed:
97  *
98  * acc0 = b[numTaps-1] * x[n-numTaps-1] + b[numTaps-2] * x[n-numTaps-2] + b[numTaps-3] * x[n-numTaps-3] +...+ b[0] * x[0]
99  * acc1 = b[numTaps-1] * x[n-numTaps] + b[numTaps-2] * x[n-numTaps-1] + b[numTaps-3] * x[n-numTaps-2] +...+ b[0] * x[1]
100  * acc2 = b[numTaps-1] * x[n-numTaps+1] + b[numTaps-2] * x[n-numTaps] + b[numTaps-3] * x[n-numTaps-1] +...+ b[0] * x[2]
101  * acc3 = b[numTaps-1] * x[n-numTaps+2] + b[numTaps-2] * x[n-numTaps+1] + b[numTaps-3] * x[n-numTaps] +...+ b[0] * x[3]
102  */
103  blkCnt = blockSize >> 2;
104 
105  /* First part of the processing with loop unrolling. Compute 4 outputs at a time.
106  ** a second loop below computes the remaining 1 to 3 samples. */
107  while(blkCnt > 0u)
108  {
109  /* Copy four new input samples into the state buffer */
110  *pStateCurnt++ = *pSrc++;
111  *pStateCurnt++ = *pSrc++;
112  *pStateCurnt++ = *pSrc++;
113  *pStateCurnt++ = *pSrc++;
114 
115  /* Set all accumulators to zero */
116  acc0 = 0;
117  acc1 = 0;
118  acc2 = 0;
119  acc3 = 0;
120 
121  /* Initialize state pointer */
122  px = pState;
123 
124  /* Initialize coefficient pointer */
125  pb = pCoeffs;
126 
127  /* Read the first three samples from the state buffer:
128  * x[n-numTaps], x[n-numTaps-1], x[n-numTaps-2] */
129  x0 = *(px++);
130  x1 = *(px++);
131  x2 = *(px++);
132 
133  /* Loop unrolling. Process 4 taps at a time. */
134  tapCnt = numTaps >> 2;
135  i = tapCnt;
136 
137  while(i > 0u)
138  {
139  /* Read the b[numTaps] coefficient */
140  c0 = *pb;
141 
142  /* Read x[n-numTaps-3] sample */
143  x3 = *px;
144 
145  /* acc0 += b[numTaps] * x[n-numTaps] */
146  acc0 += ((q15_t) x0 * c0);
147 
148  /* acc1 += b[numTaps] * x[n-numTaps-1] */
149  acc1 += ((q15_t) x1 * c0);
150 
151  /* acc2 += b[numTaps] * x[n-numTaps-2] */
152  acc2 += ((q15_t) x2 * c0);
153 
154  /* acc3 += b[numTaps] * x[n-numTaps-3] */
155  acc3 += ((q15_t) x3 * c0);
156 
157  /* Read the b[numTaps-1] coefficient */
158  c0 = *(pb + 1u);
159 
160  /* Read x[n-numTaps-4] sample */
161  x0 = *(px + 1u);
162 
163  /* Perform the multiply-accumulates */
164  acc0 += ((q15_t) x1 * c0);
165  acc1 += ((q15_t) x2 * c0);
166  acc2 += ((q15_t) x3 * c0);
167  acc3 += ((q15_t) x0 * c0);
168 
169  /* Read the b[numTaps-2] coefficient */
170  c0 = *(pb + 2u);
171 
172  /* Read x[n-numTaps-5] sample */
173  x1 = *(px + 2u);
174 
175  /* Perform the multiply-accumulates */
176  acc0 += ((q15_t) x2 * c0);
177  acc1 += ((q15_t) x3 * c0);
178  acc2 += ((q15_t) x0 * c0);
179  acc3 += ((q15_t) x1 * c0);
180 
181  /* Read the b[numTaps-3] coefficients */
182  c0 = *(pb + 3u);
183 
184  /* Read x[n-numTaps-6] sample */
185  x2 = *(px + 3u);
186 
187  /* Perform the multiply-accumulates */
188  acc0 += ((q15_t) x3 * c0);
189  acc1 += ((q15_t) x0 * c0);
190  acc2 += ((q15_t) x1 * c0);
191  acc3 += ((q15_t) x2 * c0);
192 
193  /* update coefficient pointer */
194  pb += 4u;
195  px += 4u;
196 
197  /* Decrement the loop counter */
198  i--;
199  }
200 
201  /* If the filter length is not a multiple of 4, compute the remaining filter taps */
202 
203  i = numTaps - (tapCnt * 4u);
204  while(i > 0u)
205  {
206  /* Read coefficients */
207  c0 = *(pb++);
208 
209  /* Fetch 1 state variable */
210  x3 = *(px++);
211 
212  /* Perform the multiply-accumulates */
213  acc0 += ((q15_t) x0 * c0);
214  acc1 += ((q15_t) x1 * c0);
215  acc2 += ((q15_t) x2 * c0);
216  acc3 += ((q15_t) x3 * c0);
217 
218  /* Reuse the present sample states for next sample */
219  x0 = x1;
220  x1 = x2;
221  x2 = x3;
222 
223  /* Decrement the loop counter */
224  i--;
225  }
226 
227  /* Advance the state pointer by 4 to process the next group of 4 samples */
228  pState = pState + 4;
229 
230  /* The results in the 4 accumulators are in 2.62 format. Convert to 1.31
231  ** Then store the 4 outputs in the destination buffer. */
232  acc0 = __SSAT((acc0 >> 7u), 8);
233  *pDst++ = acc0;
234  acc1 = __SSAT((acc1 >> 7u), 8);
235  *pDst++ = acc1;
236  acc2 = __SSAT((acc2 >> 7u), 8);
237  *pDst++ = acc2;
238  acc3 = __SSAT((acc3 >> 7u), 8);
239  *pDst++ = acc3;
240 
241  /* Decrement the samples loop counter */
242  blkCnt--;
243  }
244 
245 
246  /* If the blockSize is not a multiple of 4, compute any remaining output samples here.
247  ** No loop unrolling is used. */
248  blkCnt = blockSize % 4u;
249 
250  while(blkCnt > 0u)
251  {
252  /* Copy one sample at a time into state buffer */
253  *pStateCurnt++ = *pSrc++;
254 
255  /* Set the accumulator to zero */
256  acc0 = 0;
257 
258  /* Initialize state pointer */
259  px = pState;
260 
261  /* Initialize Coefficient pointer */
262  pb = (pCoeffs);
263 
264  i = numTaps;
265 
266  /* Perform the multiply-accumulates */
267  do
268  {
269  acc0 += (q15_t) * (px++) * (*(pb++));
270  i--;
271  } while(i > 0u);
272 
273  /* The result is in 2.14 format. Convert to 1.7
274  ** Then store the output in the destination buffer. */
275  *pDst++ = __SSAT((acc0 >> 7u), 8);
276 
277  /* Advance state pointer by 1 for the next sample */
278  pState = pState + 1;
279 
280  /* Decrement the samples loop counter */
281  blkCnt--;
282  }
283 
284  /* Processing is complete.
285  ** Now copy the last numTaps - 1 samples to the satrt of the state buffer.
286  ** This prepares the state buffer for the next function call. */
287 
288  /* Points to the start of the state buffer */
289  pStateCurnt = S->pState;
290 
291  tapCnt = (numTaps - 1u) >> 2u;
292 
293  /* copy data */
294  while(tapCnt > 0u)
295  {
296  *pStateCurnt++ = *pState++;
297  *pStateCurnt++ = *pState++;
298  *pStateCurnt++ = *pState++;
299  *pStateCurnt++ = *pState++;
300 
301  /* Decrement the loop counter */
302  tapCnt--;
303  }
304 
305  /* Calculate remaining number of copies */
306  tapCnt = (numTaps - 1u) % 0x4u;
307 
308  /* Copy the remaining q31_t data */
309  while(tapCnt > 0u)
310  {
311  *pStateCurnt++ = *pState++;
312 
313  /* Decrement the loop counter */
314  tapCnt--;
315  }
316 
317 #else
318 
319 /* Run the below code for Cortex-M0 */
320 
321  uint32_t numTaps = S->numTaps; /* Number of taps in the filter */
322  uint32_t i, blkCnt; /* Loop counters */
323  q7_t *pState = S->pState; /* State pointer */
324  q7_t *pCoeffs = S->pCoeffs; /* Coefficient pointer */
325  q7_t *px, *pb; /* Temporary pointers to state and coeff */
326  q31_t acc = 0; /* Accumlator */
327  q7_t *pStateCurnt; /* Points to the current sample of the state */
328 
329 
330  /* S->pState points to state array which contains previous frame (numTaps - 1) samples */
331  /* pStateCurnt points to the location where the new input data should be written */
332  pStateCurnt = S->pState + (numTaps - 1u);
333 
334  /* Initialize blkCnt with blockSize */
335  blkCnt = blockSize;
336 
337  /* Perform filtering upto BlockSize - BlockSize%4 */
338  while(blkCnt > 0u)
339  {
340  /* Copy one sample at a time into state buffer */
341  *pStateCurnt++ = *pSrc++;
342 
343  /* Set accumulator to zero */
344  acc = 0;
345 
346  /* Initialize state pointer of type q7 */
347  px = pState;
348 
349  /* Initialize coeff pointer of type q7 */
350  pb = pCoeffs;
351 
352 
353  i = numTaps;
354 
355  while(i > 0u)
356  {
357  /* acc = b[numTaps-1] * x[n-numTaps-1] + b[numTaps-2] * x[n-numTaps-2] + b[numTaps-3] * x[n-numTaps-3] +...+ b[0] * x[0] */
358  acc += (q15_t) * px++ * *pb++;
359  i--;
360  }
361 
362  /* Store the 1.7 format filter output in destination buffer */
363  *pDst++ = (q7_t) __SSAT((acc >> 7), 8);
364 
365  /* Advance the state pointer by 1 to process the next sample */
366  pState = pState + 1;
367 
368  /* Decrement the loop counter */
369  blkCnt--;
370  }
371 
372  /* Processing is complete.
373  ** Now copy the last numTaps - 1 samples to the satrt of the state buffer.
374  ** This prepares the state buffer for the next function call. */
375 
376 
377  /* Points to the start of the state buffer */
378  pStateCurnt = S->pState;
379 
380 
381  /* Copy numTaps number of values */
382  i = (numTaps - 1u);
383 
384  /* Copy q7_t data */
385  while(i > 0u)
386  {
387  *pStateCurnt++ = *pState++;
388  i--;
389  }
390 
391 #endif /* #ifndef ARM_MATH_CM0_FAMILY */
392 
393 }
394 
int8_t q7_t
8-bit fractional data type in 1.7 format.
Definition: arm_math.h:387
Instance structure for the Q7 FIR filter.
Definition: arm_math.h:1029
void arm_fir_q7(const arm_fir_instance_q7 *S, q7_t *pSrc, q7_t *pDst, uint32_t blockSize)
Processing function for the Q7 FIR filter.
Definition: arm_fir_q7.c:69
int16_t q15_t
16-bit fractional data type in 1.15 format.
Definition: arm_math.h:392
int32_t q31_t
32-bit fractional data type in 1.31 format.
Definition: arm_math.h:397