STM32F769IDiscovery  1.00
uDANTE Audio Networking with STM32F7 DISCO board
arm_fir_decimate_fast_q31.c
Go to the documentation of this file.
1 /* ----------------------------------------------------------------------
2 * Copyright (C) 2010-2014 ARM Limited. All rights reserved.
3 *
4 * $Date: 19. March 2015
5 * $Revision: V.1.4.5
6 *
7 * Project: CMSIS DSP Library
8 * Title: arm_fir_decimate_fast_q31.c
9 *
10 * Description: Fast Q31 FIR Decimator.
11 *
12 * Target Processor: Cortex-M4/Cortex-M3
13 *
14 * Redistribution and use in source and binary forms, with or without
15 * modification, are permitted provided that the following conditions
16 * are met:
17 * - Redistributions of source code must retain the above copyright
18 * notice, this list of conditions and the following disclaimer.
19 * - Redistributions in binary form must reproduce the above copyright
20 * notice, this list of conditions and the following disclaimer in
21 * the documentation and/or other materials provided with the
22 * distribution.
23 * - Neither the name of ARM LIMITED nor the names of its contributors
24 * may be used to endorse or promote products derived from this
25 * software without specific prior written permission.
26 *
27 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
28 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
29 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
30 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
31 * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
32 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
33 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
34 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
35 * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
36 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
37 * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
38 * POSSIBILITY OF SUCH DAMAGE.
39 * -------------------------------------------------------------------- */
40 
41 #include "arm_math.h"
42 
78  q31_t * pSrc,
79  q31_t * pDst,
80  uint32_t blockSize)
81 {
82  q31_t *pState = S->pState; /* State pointer */
83  q31_t *pCoeffs = S->pCoeffs; /* Coefficient pointer */
84  q31_t *pStateCurnt; /* Points to the current sample of the state */
85  q31_t x0, c0; /* Temporary variables to hold state and coefficient values */
86  q31_t *px; /* Temporary pointers for state buffer */
87  q31_t *pb; /* Temporary pointers for coefficient buffer */
88  q31_t sum0; /* Accumulator */
89  uint32_t numTaps = S->numTaps; /* Number of taps */
90  uint32_t i, tapCnt, blkCnt, outBlockSize = blockSize / S->M; /* Loop counters */
91  uint32_t blkCntN2;
92  q31_t x1;
93  q31_t acc0, acc1;
94  q31_t *px0, *px1;
95 
96  /* S->pState buffer contains previous frame (numTaps - 1) samples */
97  /* pStateCurnt points to the location where the new input data should be written */
98  pStateCurnt = S->pState + (numTaps - 1u);
99 
100  /* Total number of output samples to be computed */
101 
102  blkCnt = outBlockSize / 2;
103  blkCntN2 = outBlockSize - (2 * blkCnt);
104 
105  while(blkCnt > 0u)
106  {
107  /* Copy decimation factor number of new input samples into the state buffer */
108  i = 2 * S->M;
109 
110  do
111  {
112  *pStateCurnt++ = *pSrc++;
113 
114  } while(--i);
115 
116  /* Set accumulator to zero */
117  acc0 = 0;
118  acc1 = 0;
119 
120  /* Initialize state pointer */
121  px0 = pState;
122  px1 = pState + S->M;
123 
124  /* Initialize coeff pointer */
125  pb = pCoeffs;
126 
127  /* Loop unrolling. Process 4 taps at a time. */
128  tapCnt = numTaps >> 2;
129 
130  /* Loop over the number of taps. Unroll by a factor of 4.
131  ** Repeat until we've computed numTaps-4 coefficients. */
132  while(tapCnt > 0u)
133  {
134  /* Read the b[numTaps-1] coefficient */
135  c0 = *(pb);
136 
137  /* Read x[n-numTaps-1] for sample 0 sample 1 */
138  x0 = *(px0);
139  x1 = *(px1);
140 
141  /* Perform the multiply-accumulate */
142  acc0 = (q31_t) ((((q63_t) acc0 << 32) + ((q63_t) x0 * c0)) >> 32);
143  acc1 = (q31_t) ((((q63_t) acc1 << 32) + ((q63_t) x1 * c0)) >> 32);
144 
145  /* Read the b[numTaps-2] coefficient */
146  c0 = *(pb + 1u);
147 
148  /* Read x[n-numTaps-2] for sample 0 sample 1 */
149  x0 = *(px0 + 1u);
150  x1 = *(px1 + 1u);
151 
152  /* Perform the multiply-accumulate */
153  acc0 = (q31_t) ((((q63_t) acc0 << 32) + ((q63_t) x0 * c0)) >> 32);
154  acc1 = (q31_t) ((((q63_t) acc1 << 32) + ((q63_t) x1 * c0)) >> 32);
155 
156  /* Read the b[numTaps-3] coefficient */
157  c0 = *(pb + 2u);
158 
159  /* Read x[n-numTaps-3] for sample 0 sample 1 */
160  x0 = *(px0 + 2u);
161  x1 = *(px1 + 2u);
162  pb += 4u;
163 
164  /* Perform the multiply-accumulate */
165  acc0 = (q31_t) ((((q63_t) acc0 << 32) + ((q63_t) x0 * c0)) >> 32);
166  acc1 = (q31_t) ((((q63_t) acc1 << 32) + ((q63_t) x1 * c0)) >> 32);
167 
168  /* Read the b[numTaps-4] coefficient */
169  c0 = *(pb - 1u);
170 
171  /* Read x[n-numTaps-4] for sample 0 sample 1 */
172  x0 = *(px0 + 3u);
173  x1 = *(px1 + 3u);
174 
175 
176  /* Perform the multiply-accumulate */
177  acc0 = (q31_t) ((((q63_t) acc0 << 32) + ((q63_t) x0 * c0)) >> 32);
178  acc1 = (q31_t) ((((q63_t) acc1 << 32) + ((q63_t) x1 * c0)) >> 32);
179 
180  /* update state pointers */
181  px0 += 4u;
182  px1 += 4u;
183 
184  /* Decrement the loop counter */
185  tapCnt--;
186  }
187 
188  /* If the filter length is not a multiple of 4, compute the remaining filter taps */
189  tapCnt = numTaps % 0x4u;
190 
191  while(tapCnt > 0u)
192  {
193  /* Read coefficients */
194  c0 = *(pb++);
195 
196  /* Fetch 1 state variable */
197  x0 = *(px0++);
198  x1 = *(px1++);
199 
200  /* Perform the multiply-accumulate */
201  acc0 = (q31_t) ((((q63_t) acc0 << 32) + ((q63_t) x0 * c0)) >> 32);
202  acc1 = (q31_t) ((((q63_t) acc1 << 32) + ((q63_t) x1 * c0)) >> 32);
203 
204  /* Decrement the loop counter */
205  tapCnt--;
206  }
207 
208  /* Advance the state pointer by the decimation factor
209  * to process the next group of decimation factor number samples */
210  pState = pState + S->M * 2;
211 
212  /* The result is in the accumulator, store in the destination buffer. */
213  *pDst++ = (q31_t) (acc0 << 1);
214  *pDst++ = (q31_t) (acc1 << 1);
215 
216  /* Decrement the loop counter */
217  blkCnt--;
218  }
219 
220  while(blkCntN2 > 0u)
221  {
222  /* Copy decimation factor number of new input samples into the state buffer */
223  i = S->M;
224 
225  do
226  {
227  *pStateCurnt++ = *pSrc++;
228 
229  } while(--i);
230 
231  /* Set accumulator to zero */
232  sum0 = 0;
233 
234  /* Initialize state pointer */
235  px = pState;
236 
237  /* Initialize coeff pointer */
238  pb = pCoeffs;
239 
240  /* Loop unrolling. Process 4 taps at a time. */
241  tapCnt = numTaps >> 2;
242 
243  /* Loop over the number of taps. Unroll by a factor of 4.
244  ** Repeat until we've computed numTaps-4 coefficients. */
245  while(tapCnt > 0u)
246  {
247  /* Read the b[numTaps-1] coefficient */
248  c0 = *(pb++);
249 
250  /* Read x[n-numTaps-1] sample */
251  x0 = *(px++);
252 
253  /* Perform the multiply-accumulate */
254  sum0 = (q31_t) ((((q63_t) sum0 << 32) + ((q63_t) x0 * c0)) >> 32);
255 
256  /* Read the b[numTaps-2] coefficient */
257  c0 = *(pb++);
258 
259  /* Read x[n-numTaps-2] sample */
260  x0 = *(px++);
261 
262  /* Perform the multiply-accumulate */
263  sum0 = (q31_t) ((((q63_t) sum0 << 32) + ((q63_t) x0 * c0)) >> 32);
264 
265  /* Read the b[numTaps-3] coefficient */
266  c0 = *(pb++);
267 
268  /* Read x[n-numTaps-3] sample */
269  x0 = *(px++);
270 
271  /* Perform the multiply-accumulate */
272  sum0 = (q31_t) ((((q63_t) sum0 << 32) + ((q63_t) x0 * c0)) >> 32);
273 
274  /* Read the b[numTaps-4] coefficient */
275  c0 = *(pb++);
276 
277  /* Read x[n-numTaps-4] sample */
278  x0 = *(px++);
279 
280  /* Perform the multiply-accumulate */
281  sum0 = (q31_t) ((((q63_t) sum0 << 32) + ((q63_t) x0 * c0)) >> 32);
282 
283  /* Decrement the loop counter */
284  tapCnt--;
285  }
286 
287  /* If the filter length is not a multiple of 4, compute the remaining filter taps */
288  tapCnt = numTaps % 0x4u;
289 
290  while(tapCnt > 0u)
291  {
292  /* Read coefficients */
293  c0 = *(pb++);
294 
295  /* Fetch 1 state variable */
296  x0 = *(px++);
297 
298  /* Perform the multiply-accumulate */
299  sum0 = (q31_t) ((((q63_t) sum0 << 32) + ((q63_t) x0 * c0)) >> 32);
300 
301  /* Decrement the loop counter */
302  tapCnt--;
303  }
304 
305  /* Advance the state pointer by the decimation factor
306  * to process the next group of decimation factor number samples */
307  pState = pState + S->M;
308 
309  /* The result is in the accumulator, store in the destination buffer. */
310  *pDst++ = (q31_t) (sum0 << 1);
311 
312  /* Decrement the loop counter */
313  blkCntN2--;
314  }
315 
316  /* Processing is complete.
317  ** Now copy the last numTaps - 1 samples to the satrt of the state buffer.
318  ** This prepares the state buffer for the next function call. */
319 
320  /* Points to the start of the state buffer */
321  pStateCurnt = S->pState;
322 
323  i = (numTaps - 1u) >> 2u;
324 
325  /* copy data */
326  while(i > 0u)
327  {
328  *pStateCurnt++ = *pState++;
329  *pStateCurnt++ = *pState++;
330  *pStateCurnt++ = *pState++;
331  *pStateCurnt++ = *pState++;
332 
333  /* Decrement the loop counter */
334  i--;
335  }
336 
337  i = (numTaps - 1u) % 0x04u;
338 
339  /* copy data */
340  while(i > 0u)
341  {
342  *pStateCurnt++ = *pState++;
343 
344  /* Decrement the loop counter */
345  i--;
346  }
347 }
348 
int64_t q63_t
64-bit fractional data type in 1.63 format.
Definition: arm_math.h:402
int32_t q31_t
32-bit fractional data type in 1.31 format.
Definition: arm_math.h:397
Instance structure for the Q31 FIR decimator.
Definition: arm_math.h:3280
void arm_fir_decimate_fast_q31(arm_fir_decimate_instance_q31 *S, q31_t *pSrc, q31_t *pDst, uint32_t blockSize)
Processing function for the Q31 FIR decimator (fast variant) for Cortex-M3 and Cortex-M4.