STM32F769IDiscovery  1.00
uDANTE Audio Networking with STM32F7 DISCO board
arm_fir_sparse_f32.c
Go to the documentation of this file.
1 /* ----------------------------------------------------------------------
2 * Copyright (C) 2010-2014 ARM Limited. All rights reserved.
3 *
4 * $Date: 19. March 2015
5 * $Revision: V.1.4.5
6 *
7 * Project: CMSIS DSP Library
8 * Title: arm_fir_sparse_f32.c
9 *
10 * Description: Floating-point sparse FIR filter processing function.
11 *
12 * Target Processor: Cortex-M4/Cortex-M3/Cortex-M0
13 *
14 * Redistribution and use in source and binary forms, with or without
15 * modification, are permitted provided that the following conditions
16 * are met:
17 * - Redistributions of source code must retain the above copyright
18 * notice, this list of conditions and the following disclaimer.
19 * - Redistributions in binary form must reproduce the above copyright
20 * notice, this list of conditions and the following disclaimer in
21 * the documentation and/or other materials provided with the
22 * distribution.
23 * - Neither the name of ARM LIMITED nor the names of its contributors
24 * may be used to endorse or promote products derived from this
25 * software without specific prior written permission.
26 *
27 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
28 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
29 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
30 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
31 * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
32 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
33 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
34 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
35 * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
36 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
37 * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
38 * POSSIBILITY OF SUCH DAMAGE.
39 * ------------------------------------------------------------------- */
40 #include "arm_math.h"
41 
125  float32_t * pSrc,
126  float32_t * pDst,
127  float32_t * pScratchIn,
128  uint32_t blockSize)
129 {
130 
131  float32_t *pState = S->pState; /* State pointer */
132  float32_t *pCoeffs = S->pCoeffs; /* Coefficient pointer */
133  float32_t *px; /* Scratch buffer pointer */
134  float32_t *py = pState; /* Temporary pointers for state buffer */
135  float32_t *pb = pScratchIn; /* Temporary pointers for scratch buffer */
136  float32_t *pOut; /* Destination pointer */
137  int32_t *pTapDelay = S->pTapDelay; /* Pointer to the array containing offset of the non-zero tap values. */
138  uint32_t delaySize = S->maxDelay + blockSize; /* state length */
139  uint16_t numTaps = S->numTaps; /* Number of filter coefficients in the filter */
140  int32_t readIndex; /* Read index of the state buffer */
141  uint32_t tapCnt, blkCnt; /* loop counters */
142  float32_t coeff = *pCoeffs++; /* Read the first coefficient value */
143 
144 
145 
146  /* BlockSize of Input samples are copied into the state buffer */
147  /* StateIndex points to the starting position to write in the state buffer */
148  arm_circularWrite_f32((int32_t *) py, delaySize, &S->stateIndex, 1,
149  (int32_t *) pSrc, 1, blockSize);
150 
151 
152  /* Read Index, from where the state buffer should be read, is calculated. */
153  readIndex = ((int32_t) S->stateIndex - (int32_t) blockSize) - *pTapDelay++;
154 
155  /* Wraparound of readIndex */
156  if(readIndex < 0)
157  {
158  readIndex += (int32_t) delaySize;
159  }
160 
161  /* Working pointer for state buffer is updated */
162  py = pState;
163 
164  /* blockSize samples are read from the state buffer */
165  arm_circularRead_f32((int32_t *) py, delaySize, &readIndex, 1,
166  (int32_t *) pb, (int32_t *) pb, blockSize, 1,
167  blockSize);
168 
169  /* Working pointer for the scratch buffer */
170  px = pb;
171 
172  /* Working pointer for destination buffer */
173  pOut = pDst;
174 
175 
176 #ifndef ARM_MATH_CM0_FAMILY
177 
178  /* Run the below code for Cortex-M4 and Cortex-M3 */
179 
180  /* Loop over the blockSize. Unroll by a factor of 4.
181  * Compute 4 Multiplications at a time. */
182  blkCnt = blockSize >> 2u;
183 
184  while(blkCnt > 0u)
185  {
186  /* Perform Multiplications and store in destination buffer */
187  *pOut++ = *px++ * coeff;
188  *pOut++ = *px++ * coeff;
189  *pOut++ = *px++ * coeff;
190  *pOut++ = *px++ * coeff;
191 
192  /* Decrement the loop counter */
193  blkCnt--;
194  }
195 
196  /* If the blockSize is not a multiple of 4,
197  * compute the remaining samples */
198  blkCnt = blockSize % 0x4u;
199 
200  while(blkCnt > 0u)
201  {
202  /* Perform Multiplications and store in destination buffer */
203  *pOut++ = *px++ * coeff;
204 
205  /* Decrement the loop counter */
206  blkCnt--;
207  }
208 
209  /* Load the coefficient value and
210  * increment the coefficient buffer for the next set of state values */
211  coeff = *pCoeffs++;
212 
213  /* Read Index, from where the state buffer should be read, is calculated. */
214  readIndex = ((int32_t) S->stateIndex - (int32_t) blockSize) - *pTapDelay++;
215 
216  /* Wraparound of readIndex */
217  if(readIndex < 0)
218  {
219  readIndex += (int32_t) delaySize;
220  }
221 
222  /* Loop over the number of taps. */
223  tapCnt = (uint32_t) numTaps - 2u;
224 
225  while(tapCnt > 0u)
226  {
227 
228  /* Working pointer for state buffer is updated */
229  py = pState;
230 
231  /* blockSize samples are read from the state buffer */
232  arm_circularRead_f32((int32_t *) py, delaySize, &readIndex, 1,
233  (int32_t *) pb, (int32_t *) pb, blockSize, 1,
234  blockSize);
235 
236  /* Working pointer for the scratch buffer */
237  px = pb;
238 
239  /* Working pointer for destination buffer */
240  pOut = pDst;
241 
242  /* Loop over the blockSize. Unroll by a factor of 4.
243  * Compute 4 MACS at a time. */
244  blkCnt = blockSize >> 2u;
245 
246  while(blkCnt > 0u)
247  {
248  /* Perform Multiply-Accumulate */
249  *pOut++ += *px++ * coeff;
250  *pOut++ += *px++ * coeff;
251  *pOut++ += *px++ * coeff;
252  *pOut++ += *px++ * coeff;
253 
254  /* Decrement the loop counter */
255  blkCnt--;
256  }
257 
258  /* If the blockSize is not a multiple of 4,
259  * compute the remaining samples */
260  blkCnt = blockSize % 0x4u;
261 
262  while(blkCnt > 0u)
263  {
264  /* Perform Multiply-Accumulate */
265  *pOut++ += *px++ * coeff;
266 
267  /* Decrement the loop counter */
268  blkCnt--;
269  }
270 
271  /* Load the coefficient value and
272  * increment the coefficient buffer for the next set of state values */
273  coeff = *pCoeffs++;
274 
275  /* Read Index, from where the state buffer should be read, is calculated. */
276  readIndex = ((int32_t) S->stateIndex -
277  (int32_t) blockSize) - *pTapDelay++;
278 
279  /* Wraparound of readIndex */
280  if(readIndex < 0)
281  {
282  readIndex += (int32_t) delaySize;
283  }
284 
285  /* Decrement the tap loop counter */
286  tapCnt--;
287  }
288 
289  /* Compute last tap without the final read of pTapDelay */
290 
291  /* Working pointer for state buffer is updated */
292  py = pState;
293 
294  /* blockSize samples are read from the state buffer */
295  arm_circularRead_f32((int32_t *) py, delaySize, &readIndex, 1,
296  (int32_t *) pb, (int32_t *) pb, blockSize, 1,
297  blockSize);
298 
299  /* Working pointer for the scratch buffer */
300  px = pb;
301 
302  /* Working pointer for destination buffer */
303  pOut = pDst;
304 
305  /* Loop over the blockSize. Unroll by a factor of 4.
306  * Compute 4 MACS at a time. */
307  blkCnt = blockSize >> 2u;
308 
309  while(blkCnt > 0u)
310  {
311  /* Perform Multiply-Accumulate */
312  *pOut++ += *px++ * coeff;
313  *pOut++ += *px++ * coeff;
314  *pOut++ += *px++ * coeff;
315  *pOut++ += *px++ * coeff;
316 
317  /* Decrement the loop counter */
318  blkCnt--;
319  }
320 
321  /* If the blockSize is not a multiple of 4,
322  * compute the remaining samples */
323  blkCnt = blockSize % 0x4u;
324 
325  while(blkCnt > 0u)
326  {
327  /* Perform Multiply-Accumulate */
328  *pOut++ += *px++ * coeff;
329 
330  /* Decrement the loop counter */
331  blkCnt--;
332  }
333 
334 #else
335 
336 /* Run the below code for Cortex-M0 */
337 
338  blkCnt = blockSize;
339 
340  while(blkCnt > 0u)
341  {
342  /* Perform Multiplications and store in destination buffer */
343  *pOut++ = *px++ * coeff;
344 
345  /* Decrement the loop counter */
346  blkCnt--;
347  }
348 
349  /* Load the coefficient value and
350  * increment the coefficient buffer for the next set of state values */
351  coeff = *pCoeffs++;
352 
353  /* Read Index, from where the state buffer should be read, is calculated. */
354  readIndex = ((int32_t) S->stateIndex - (int32_t) blockSize) - *pTapDelay++;
355 
356  /* Wraparound of readIndex */
357  if(readIndex < 0)
358  {
359  readIndex += (int32_t) delaySize;
360  }
361 
362  /* Loop over the number of taps. */
363  tapCnt = (uint32_t) numTaps - 2u;
364 
365  while(tapCnt > 0u)
366  {
367 
368  /* Working pointer for state buffer is updated */
369  py = pState;
370 
371  /* blockSize samples are read from the state buffer */
372  arm_circularRead_f32((int32_t *) py, delaySize, &readIndex, 1,
373  (int32_t *) pb, (int32_t *) pb, blockSize, 1,
374  blockSize);
375 
376  /* Working pointer for the scratch buffer */
377  px = pb;
378 
379  /* Working pointer for destination buffer */
380  pOut = pDst;
381 
382  blkCnt = blockSize;
383 
384  while(blkCnt > 0u)
385  {
386  /* Perform Multiply-Accumulate */
387  *pOut++ += *px++ * coeff;
388 
389  /* Decrement the loop counter */
390  blkCnt--;
391  }
392 
393  /* Load the coefficient value and
394  * increment the coefficient buffer for the next set of state values */
395  coeff = *pCoeffs++;
396 
397  /* Read Index, from where the state buffer should be read, is calculated. */
398  readIndex =
399  ((int32_t) S->stateIndex - (int32_t) blockSize) - *pTapDelay++;
400 
401  /* Wraparound of readIndex */
402  if(readIndex < 0)
403  {
404  readIndex += (int32_t) delaySize;
405  }
406 
407  /* Decrement the tap loop counter */
408  tapCnt--;
409  }
410 
411  /* Compute last tap without the final read of pTapDelay */
412 
413  /* Working pointer for state buffer is updated */
414  py = pState;
415 
416  /* blockSize samples are read from the state buffer */
417  arm_circularRead_f32((int32_t *) py, delaySize, &readIndex, 1,
418  (int32_t *) pb, (int32_t *) pb, blockSize, 1,
419  blockSize);
420 
421  /* Working pointer for the scratch buffer */
422  px = pb;
423 
424  /* Working pointer for destination buffer */
425  pOut = pDst;
426 
427  blkCnt = blockSize;
428 
429  while(blkCnt > 0u)
430  {
431  /* Perform Multiply-Accumulate */
432  *pOut++ += *px++ * coeff;
433 
434  /* Decrement the loop counter */
435  blkCnt--;
436  }
437 
438 #endif /* #ifndef ARM_MATH_CM0_FAMILY */
439 
440 }
441 
float float32_t
32-bit floating-point type definition.
Definition: arm_math.h:407
void arm_fir_sparse_f32(arm_fir_sparse_instance_f32 *S, float32_t *pSrc, float32_t *pDst, float32_t *pScratchIn, uint32_t blockSize)
Processing function for the floating-point sparse FIR filter.
Instance structure for the floating-point sparse FIR filter.
Definition: arm_math.h:4430