STM32F769IDiscovery  1.00
uDANTE Audio Networking with STM32F7 DISCO board
arm_fir_interpolate_q15.c
Go to the documentation of this file.
1 /*-----------------------------------------------------------------------------
2 * Copyright (C) 2010-2014 ARM Limited. All rights reserved.
3 *
4 * $Date: 19. March 2015
5 * $Revision: V.1.4.5
6 *
7 * Project: CMSIS DSP Library
8 * Title: arm_fir_interpolate_q15.c
9 *
10 * Description: Q15 FIR interpolation.
11 *
12 * Target Processor: Cortex-M4/Cortex-M3/Cortex-M0
13 *
14 * Redistribution and use in source and binary forms, with or without
15 * modification, are permitted provided that the following conditions
16 * are met:
17 * - Redistributions of source code must retain the above copyright
18 * notice, this list of conditions and the following disclaimer.
19 * - Redistributions in binary form must reproduce the above copyright
20 * notice, this list of conditions and the following disclaimer in
21 * the documentation and/or other materials provided with the
22 * distribution.
23 * - Neither the name of ARM LIMITED nor the names of its contributors
24 * may be used to endorse or promote products derived from this
25 * software without specific prior written permission.
26 *
27 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
28 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
29 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
30 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
31 * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
32 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
33 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
34 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
35 * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
36 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
37 * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
38 * POSSIBILITY OF SUCH DAMAGE.
39 * ---------------------------------------------------------------------------*/
40 
41 #include "arm_math.h"
42 
70 #ifndef ARM_MATH_CM0_FAMILY
71 
72  /* Run the below code for Cortex-M4 and Cortex-M3 */
73 
76  q15_t * pSrc,
77  q15_t * pDst,
78  uint32_t blockSize)
79 {
80  q15_t *pState = S->pState; /* State pointer */
81  q15_t *pCoeffs = S->pCoeffs; /* Coefficient pointer */
82  q15_t *pStateCurnt; /* Points to the current sample of the state */
83  q15_t *ptr1, *ptr2; /* Temporary pointers for state and coefficient buffers */
84  q63_t sum0; /* Accumulators */
85  q15_t x0, c0; /* Temporary variables to hold state and coefficient values */
86  uint32_t i, blkCnt, j, tapCnt; /* Loop counters */
87  uint16_t phaseLen = S->phaseLength; /* Length of each polyphase filter component */
88  uint32_t blkCntN2;
89  q63_t acc0, acc1;
90  q15_t x1;
91 
92  /* S->pState buffer contains previous frame (phaseLen - 1) samples */
93  /* pStateCurnt points to the location where the new input data should be written */
94  pStateCurnt = S->pState + ((q31_t) phaseLen - 1);
95 
96  /* Initialise blkCnt */
97  blkCnt = blockSize / 2;
98  blkCntN2 = blockSize - (2 * blkCnt);
99 
100  /* Samples loop unrolled by 2 */
101  while(blkCnt > 0u)
102  {
103  /* Copy new input sample into the state buffer */
104  *pStateCurnt++ = *pSrc++;
105  *pStateCurnt++ = *pSrc++;
106 
107  /* Address modifier index of coefficient buffer */
108  j = 1u;
109 
110  /* Loop over the Interpolation factor. */
111  i = (S->L);
112 
113  while(i > 0u)
114  {
115  /* Set accumulator to zero */
116  acc0 = 0;
117  acc1 = 0;
118 
119  /* Initialize state pointer */
120  ptr1 = pState;
121 
122  /* Initialize coefficient pointer */
123  ptr2 = pCoeffs + (S->L - j);
124 
125  /* Loop over the polyPhase length. Unroll by a factor of 4.
126  ** Repeat until we've computed numTaps-(4*S->L) coefficients. */
127  tapCnt = phaseLen >> 2u;
128 
129  x0 = *(ptr1++);
130 
131  while(tapCnt > 0u)
132  {
133 
134  /* Read the input sample */
135  x1 = *(ptr1++);
136 
137  /* Read the coefficient */
138  c0 = *(ptr2);
139 
140  /* Perform the multiply-accumulate */
141  acc0 += (q63_t) x0 *c0;
142  acc1 += (q63_t) x1 *c0;
143 
144 
145  /* Read the coefficient */
146  c0 = *(ptr2 + S->L);
147 
148  /* Read the input sample */
149  x0 = *(ptr1++);
150 
151  /* Perform the multiply-accumulate */
152  acc0 += (q63_t) x1 *c0;
153  acc1 += (q63_t) x0 *c0;
154 
155 
156  /* Read the coefficient */
157  c0 = *(ptr2 + S->L * 2);
158 
159  /* Read the input sample */
160  x1 = *(ptr1++);
161 
162  /* Perform the multiply-accumulate */
163  acc0 += (q63_t) x0 *c0;
164  acc1 += (q63_t) x1 *c0;
165 
166  /* Read the coefficient */
167  c0 = *(ptr2 + S->L * 3);
168 
169  /* Read the input sample */
170  x0 = *(ptr1++);
171 
172  /* Perform the multiply-accumulate */
173  acc0 += (q63_t) x1 *c0;
174  acc1 += (q63_t) x0 *c0;
175 
176 
177  /* Upsampling is done by stuffing L-1 zeros between each sample.
178  * So instead of multiplying zeros with coefficients,
179  * Increment the coefficient pointer by interpolation factor times. */
180  ptr2 += 4 * S->L;
181 
182  /* Decrement the loop counter */
183  tapCnt--;
184  }
185 
186  /* If the polyPhase length is not a multiple of 4, compute the remaining filter taps */
187  tapCnt = phaseLen % 0x4u;
188 
189  while(tapCnt > 0u)
190  {
191 
192  /* Read the input sample */
193  x1 = *(ptr1++);
194 
195  /* Read the coefficient */
196  c0 = *(ptr2);
197 
198  /* Perform the multiply-accumulate */
199  acc0 += (q63_t) x0 *c0;
200  acc1 += (q63_t) x1 *c0;
201 
202  /* Increment the coefficient pointer by interpolation factor times. */
203  ptr2 += S->L;
204 
205  /* update states for next sample processing */
206  x0 = x1;
207 
208  /* Decrement the loop counter */
209  tapCnt--;
210  }
211 
212  /* The result is in the accumulator, store in the destination buffer. */
213  *pDst = (q15_t) (__SSAT((acc0 >> 15), 16));
214  *(pDst + S->L) = (q15_t) (__SSAT((acc1 >> 15), 16));
215 
216  pDst++;
217 
218  /* Increment the address modifier index of coefficient buffer */
219  j++;
220 
221  /* Decrement the loop counter */
222  i--;
223  }
224 
225  /* Advance the state pointer by 1
226  * to process the next group of interpolation factor number samples */
227  pState = pState + 2;
228 
229  pDst += S->L;
230 
231  /* Decrement the loop counter */
232  blkCnt--;
233  }
234 
235  /* If the blockSize is not a multiple of 2, compute any remaining output samples here.
236  ** No loop unrolling is used. */
237  blkCnt = blkCntN2;
238 
239  /* Loop over the blockSize. */
240  while(blkCnt > 0u)
241  {
242  /* Copy new input sample into the state buffer */
243  *pStateCurnt++ = *pSrc++;
244 
245  /* Address modifier index of coefficient buffer */
246  j = 1u;
247 
248  /* Loop over the Interpolation factor. */
249  i = S->L;
250  while(i > 0u)
251  {
252  /* Set accumulator to zero */
253  sum0 = 0;
254 
255  /* Initialize state pointer */
256  ptr1 = pState;
257 
258  /* Initialize coefficient pointer */
259  ptr2 = pCoeffs + (S->L - j);
260 
261  /* Loop over the polyPhase length. Unroll by a factor of 4.
262  ** Repeat until we've computed numTaps-(4*S->L) coefficients. */
263  tapCnt = phaseLen >> 2;
264  while(tapCnt > 0u)
265  {
266 
267  /* Read the coefficient */
268  c0 = *(ptr2);
269 
270  /* Upsampling is done by stuffing L-1 zeros between each sample.
271  * So instead of multiplying zeros with coefficients,
272  * Increment the coefficient pointer by interpolation factor times. */
273  ptr2 += S->L;
274 
275  /* Read the input sample */
276  x0 = *(ptr1++);
277 
278  /* Perform the multiply-accumulate */
279  sum0 += (q63_t) x0 *c0;
280 
281  /* Read the coefficient */
282  c0 = *(ptr2);
283 
284  /* Increment the coefficient pointer by interpolation factor times. */
285  ptr2 += S->L;
286 
287  /* Read the input sample */
288  x0 = *(ptr1++);
289 
290  /* Perform the multiply-accumulate */
291  sum0 += (q63_t) x0 *c0;
292 
293  /* Read the coefficient */
294  c0 = *(ptr2);
295 
296  /* Increment the coefficient pointer by interpolation factor times. */
297  ptr2 += S->L;
298 
299  /* Read the input sample */
300  x0 = *(ptr1++);
301 
302  /* Perform the multiply-accumulate */
303  sum0 += (q63_t) x0 *c0;
304 
305  /* Read the coefficient */
306  c0 = *(ptr2);
307 
308  /* Increment the coefficient pointer by interpolation factor times. */
309  ptr2 += S->L;
310 
311  /* Read the input sample */
312  x0 = *(ptr1++);
313 
314  /* Perform the multiply-accumulate */
315  sum0 += (q63_t) x0 *c0;
316 
317  /* Decrement the loop counter */
318  tapCnt--;
319  }
320 
321  /* If the polyPhase length is not a multiple of 4, compute the remaining filter taps */
322  tapCnt = phaseLen & 0x3u;
323 
324  while(tapCnt > 0u)
325  {
326  /* Read the coefficient */
327  c0 = *(ptr2);
328 
329  /* Increment the coefficient pointer by interpolation factor times. */
330  ptr2 += S->L;
331 
332  /* Read the input sample */
333  x0 = *(ptr1++);
334 
335  /* Perform the multiply-accumulate */
336  sum0 += (q63_t) x0 *c0;
337 
338  /* Decrement the loop counter */
339  tapCnt--;
340  }
341 
342  /* The result is in the accumulator, store in the destination buffer. */
343  *pDst++ = (q15_t) (__SSAT((sum0 >> 15), 16));
344 
345  j++;
346 
347  /* Decrement the loop counter */
348  i--;
349  }
350 
351  /* Advance the state pointer by 1
352  * to process the next group of interpolation factor number samples */
353  pState = pState + 1;
354 
355  /* Decrement the loop counter */
356  blkCnt--;
357  }
358 
359 
360  /* Processing is complete.
361  ** Now copy the last phaseLen - 1 samples to the satrt of the state buffer.
362  ** This prepares the state buffer for the next function call. */
363 
364  /* Points to the start of the state buffer */
365  pStateCurnt = S->pState;
366 
367  i = ((uint32_t) phaseLen - 1u) >> 2u;
368 
369  /* copy data */
370  while(i > 0u)
371  {
372 #ifndef UNALIGNED_SUPPORT_DISABLE
373 
374  *__SIMD32(pStateCurnt)++ = *__SIMD32(pState)++;
375  *__SIMD32(pStateCurnt)++ = *__SIMD32(pState)++;
376 
377 #else
378 
379  *pStateCurnt++ = *pState++;
380  *pStateCurnt++ = *pState++;
381  *pStateCurnt++ = *pState++;
382  *pStateCurnt++ = *pState++;
383 
384 #endif /* #ifndef UNALIGNED_SUPPORT_DISABLE */
385 
386  /* Decrement the loop counter */
387  i--;
388  }
389 
390  i = ((uint32_t) phaseLen - 1u) % 0x04u;
391 
392  while(i > 0u)
393  {
394  *pStateCurnt++ = *pState++;
395 
396  /* Decrement the loop counter */
397  i--;
398  }
399 }
400 
401 #else
402 
403  /* Run the below code for Cortex-M0 */
404 
407  q15_t * pSrc,
408  q15_t * pDst,
409  uint32_t blockSize)
410 {
411  q15_t *pState = S->pState; /* State pointer */
412  q15_t *pCoeffs = S->pCoeffs; /* Coefficient pointer */
413  q15_t *pStateCurnt; /* Points to the current sample of the state */
414  q15_t *ptr1, *ptr2; /* Temporary pointers for state and coefficient buffers */
415  q63_t sum; /* Accumulator */
416  q15_t x0, c0; /* Temporary variables to hold state and coefficient values */
417  uint32_t i, blkCnt, tapCnt; /* Loop counters */
418  uint16_t phaseLen = S->phaseLength; /* Length of each polyphase filter component */
419 
420 
421  /* S->pState buffer contains previous frame (phaseLen - 1) samples */
422  /* pStateCurnt points to the location where the new input data should be written */
423  pStateCurnt = S->pState + (phaseLen - 1u);
424 
425  /* Total number of intput samples */
426  blkCnt = blockSize;
427 
428  /* Loop over the blockSize. */
429  while(blkCnt > 0u)
430  {
431  /* Copy new input sample into the state buffer */
432  *pStateCurnt++ = *pSrc++;
433 
434  /* Loop over the Interpolation factor. */
435  i = S->L;
436 
437  while(i > 0u)
438  {
439  /* Set accumulator to zero */
440  sum = 0;
441 
442  /* Initialize state pointer */
443  ptr1 = pState;
444 
445  /* Initialize coefficient pointer */
446  ptr2 = pCoeffs + (i - 1u);
447 
448  /* Loop over the polyPhase length */
449  tapCnt = (uint32_t) phaseLen;
450 
451  while(tapCnt > 0u)
452  {
453  /* Read the coefficient */
454  c0 = *ptr2;
455 
456  /* Increment the coefficient pointer by interpolation factor times. */
457  ptr2 += S->L;
458 
459  /* Read the input sample */
460  x0 = *ptr1++;
461 
462  /* Perform the multiply-accumulate */
463  sum += ((q31_t) x0 * c0);
464 
465  /* Decrement the loop counter */
466  tapCnt--;
467  }
468 
469  /* Store the result after converting to 1.15 format in the destination buffer */
470  *pDst++ = (q15_t) (__SSAT((sum >> 15), 16));
471 
472  /* Decrement the loop counter */
473  i--;
474  }
475 
476  /* Advance the state pointer by 1
477  * to process the next group of interpolation factor number samples */
478  pState = pState + 1;
479 
480  /* Decrement the loop counter */
481  blkCnt--;
482  }
483 
484  /* Processing is complete.
485  ** Now copy the last phaseLen - 1 samples to the start of the state buffer.
486  ** This prepares the state buffer for the next function call. */
487 
488  /* Points to the start of the state buffer */
489  pStateCurnt = S->pState;
490 
491  i = (uint32_t) phaseLen - 1u;
492 
493  while(i > 0u)
494  {
495  *pStateCurnt++ = *pState++;
496 
497  /* Decrement the loop counter */
498  i--;
499  }
500 
501 }
502 
503 #endif /* #ifndef ARM_MATH_CM0_FAMILY */
504 
505 
int64_t q63_t
64-bit fractional data type in 1.63 format.
Definition: arm_math.h:402
int16_t q15_t
16-bit fractional data type in 1.15 format.
Definition: arm_math.h:392
#define __SIMD32(addr)
definition to read/write two 16 bit values.
Definition: arm_math.h:445
void arm_fir_interpolate_q15(const arm_fir_interpolate_instance_q15 *S, q15_t *pSrc, q15_t *pDst, uint32_t blockSize)
Processing function for the Q15 FIR interpolator.
Instance structure for the Q15 FIR interpolator.
Definition: arm_math.h:3432
int32_t q31_t
32-bit fractional data type in 1.31 format.
Definition: arm_math.h:397