STM32F769IDiscovery  1.00
uDANTE Audio Networking with STM32F7 DISCO board
arm_lms_q15.c
Go to the documentation of this file.
1 /* ----------------------------------------------------------------------
2 * Copyright (C) 2010-2014 ARM Limited. All rights reserved.
3 *
4 * $Date: 19. March 2015
5 * $Revision: V.1.4.5
6 *
7 * Project: CMSIS DSP Library
8 * Title: arm_lms_q15.c
9 *
10 * Description: Processing function for the Q15 LMS filter.
11 *
12 * Target Processor: Cortex-M4/Cortex-M3/Cortex-M0
13 *
14 * Redistribution and use in source and binary forms, with or without
15 * modification, are permitted provided that the following conditions
16 * are met:
17 * - Redistributions of source code must retain the above copyright
18 * notice, this list of conditions and the following disclaimer.
19 * - Redistributions in binary form must reproduce the above copyright
20 * notice, this list of conditions and the following disclaimer in
21 * the documentation and/or other materials provided with the
22 * distribution.
23 * - Neither the name of ARM LIMITED nor the names of its contributors
24 * may be used to endorse or promote products derived from this
25 * software without specific prior written permission.
26 *
27 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
28 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
29 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
30 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
31 * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
32 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
33 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
34 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
35 * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
36 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
37 * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
38 * POSSIBILITY OF SUCH DAMAGE.
39 * -------------------------------------------------------------------- */
40 
41 #include "arm_math.h"
75  const arm_lms_instance_q15 * S,
76  q15_t * pSrc,
77  q15_t * pRef,
78  q15_t * pOut,
79  q15_t * pErr,
80  uint32_t blockSize)
81 {
82  q15_t *pState = S->pState; /* State pointer */
83  uint32_t numTaps = S->numTaps; /* Number of filter coefficients in the filter */
84  q15_t *pCoeffs = S->pCoeffs; /* Coefficient pointer */
85  q15_t *pStateCurnt; /* Points to the current sample of the state */
86  q15_t mu = S->mu; /* Adaptive factor */
87  q15_t *px; /* Temporary pointer for state */
88  q15_t *pb; /* Temporary pointer for coefficient buffer */
89  uint32_t tapCnt, blkCnt; /* Loop counters */
90  q63_t acc; /* Accumulator */
91  q15_t e = 0; /* error of data sample */
92  q15_t alpha; /* Intermediate constant for taps update */
93  q31_t coef; /* Teporary variable for coefficient */
94  q31_t acc_l, acc_h;
95  int32_t lShift = (15 - (int32_t) S->postShift); /* Post shift */
96  int32_t uShift = (32 - lShift);
97 
98 
99 #ifndef ARM_MATH_CM0_FAMILY
100 
101  /* Run the below code for Cortex-M4 and Cortex-M3 */
102 
103 
104  /* S->pState points to buffer which contains previous frame (numTaps - 1) samples */
105  /* pStateCurnt points to the location where the new input data should be written */
106  pStateCurnt = &(S->pState[(numTaps - 1u)]);
107 
108  /* Initializing blkCnt with blockSize */
109  blkCnt = blockSize;
110 
111  while(blkCnt > 0u)
112  {
113  /* Copy the new input sample into the state buffer */
114  *pStateCurnt++ = *pSrc++;
115 
116  /* Initialize state pointer */
117  px = pState;
118 
119  /* Initialize coefficient pointer */
120  pb = pCoeffs;
121 
122  /* Set the accumulator to zero */
123  acc = 0;
124 
125  /* Loop unrolling. Process 4 taps at a time. */
126  tapCnt = numTaps >> 2u;
127 
128  while(tapCnt > 0u)
129  {
130  /* acc += b[N] * x[n-N] + b[N-1] * x[n-N-1] */
131  /* Perform the multiply-accumulate */
132 #ifndef UNALIGNED_SUPPORT_DISABLE
133 
134  acc = __SMLALD(*__SIMD32(px)++, (*__SIMD32(pb)++), acc);
135  acc = __SMLALD(*__SIMD32(px)++, (*__SIMD32(pb)++), acc);
136 
137 #else
138 
139  acc += (q63_t) (((q31_t) (*px++) * (*pb++)));
140  acc += (q63_t) (((q31_t) (*px++) * (*pb++)));
141  acc += (q63_t) (((q31_t) (*px++) * (*pb++)));
142  acc += (q63_t) (((q31_t) (*px++) * (*pb++)));
143 
144 
145 #endif /* #ifndef UNALIGNED_SUPPORT_DISABLE */
146 
147  /* Decrement the loop counter */
148  tapCnt--;
149  }
150 
151  /* If the filter length is not a multiple of 4, compute the remaining filter taps */
152  tapCnt = numTaps % 0x4u;
153 
154  while(tapCnt > 0u)
155  {
156  /* Perform the multiply-accumulate */
157  acc += (q63_t) (((q31_t) (*px++) * (*pb++)));
158 
159  /* Decrement the loop counter */
160  tapCnt--;
161  }
162 
163  /* Calc lower part of acc */
164  acc_l = acc & 0xffffffff;
165 
166  /* Calc upper part of acc */
167  acc_h = (acc >> 32) & 0xffffffff;
168 
169  /* Apply shift for lower part of acc and upper part of acc */
170  acc = (uint32_t) acc_l >> lShift | acc_h << uShift;
171 
172  /* Converting the result to 1.15 format and saturate the output */
173  acc = __SSAT(acc, 16);
174 
175  /* Store the result from accumulator into the destination buffer. */
176  *pOut++ = (q15_t) acc;
177 
178  /* Compute and store error */
179  e = *pRef++ - (q15_t) acc;
180 
181  *pErr++ = (q15_t) e;
182 
183  /* Compute alpha i.e. intermediate constant for taps update */
184  alpha = (q15_t) (((q31_t) e * (mu)) >> 15);
185 
186  /* Initialize state pointer */
187  /* Advance state pointer by 1 for the next sample */
188  px = pState++;
189 
190  /* Initialize coefficient pointer */
191  pb = pCoeffs;
192 
193  /* Loop unrolling. Process 4 taps at a time. */
194  tapCnt = numTaps >> 2u;
195 
196  /* Update filter coefficients */
197  while(tapCnt > 0u)
198  {
199  coef = (q31_t) * pb + (((q31_t) alpha * (*px++)) >> 15);
200  *pb++ = (q15_t) __SSAT((coef), 16);
201  coef = (q31_t) * pb + (((q31_t) alpha * (*px++)) >> 15);
202  *pb++ = (q15_t) __SSAT((coef), 16);
203  coef = (q31_t) * pb + (((q31_t) alpha * (*px++)) >> 15);
204  *pb++ = (q15_t) __SSAT((coef), 16);
205  coef = (q31_t) * pb + (((q31_t) alpha * (*px++)) >> 15);
206  *pb++ = (q15_t) __SSAT((coef), 16);
207 
208  /* Decrement the loop counter */
209  tapCnt--;
210  }
211 
212  /* If the filter length is not a multiple of 4, compute the remaining filter taps */
213  tapCnt = numTaps % 0x4u;
214 
215  while(tapCnt > 0u)
216  {
217  /* Perform the multiply-accumulate */
218  coef = (q31_t) * pb + (((q31_t) alpha * (*px++)) >> 15);
219  *pb++ = (q15_t) __SSAT((coef), 16);
220 
221  /* Decrement the loop counter */
222  tapCnt--;
223  }
224 
225  /* Decrement the loop counter */
226  blkCnt--;
227 
228  }
229 
230  /* Processing is complete. Now copy the last numTaps - 1 samples to the
231  satrt of the state buffer. This prepares the state buffer for the
232  next function call. */
233 
234  /* Points to the start of the pState buffer */
235  pStateCurnt = S->pState;
236 
237  /* Calculation of count for copying integer writes */
238  tapCnt = (numTaps - 1u) >> 2;
239 
240  while(tapCnt > 0u)
241  {
242 
243 #ifndef UNALIGNED_SUPPORT_DISABLE
244 
245  *__SIMD32(pStateCurnt)++ = *__SIMD32(pState)++;
246  *__SIMD32(pStateCurnt)++ = *__SIMD32(pState)++;
247 #else
248  *pStateCurnt++ = *pState++;
249  *pStateCurnt++ = *pState++;
250  *pStateCurnt++ = *pState++;
251  *pStateCurnt++ = *pState++;
252 #endif
253 
254  tapCnt--;
255 
256  }
257 
258  /* Calculation of count for remaining q15_t data */
259  tapCnt = (numTaps - 1u) % 0x4u;
260 
261  /* copy data */
262  while(tapCnt > 0u)
263  {
264  *pStateCurnt++ = *pState++;
265 
266  /* Decrement the loop counter */
267  tapCnt--;
268  }
269 
270 #else
271 
272  /* Run the below code for Cortex-M0 */
273 
274  /* S->pState points to buffer which contains previous frame (numTaps - 1) samples */
275  /* pStateCurnt points to the location where the new input data should be written */
276  pStateCurnt = &(S->pState[(numTaps - 1u)]);
277 
278  /* Loop over blockSize number of values */
279  blkCnt = blockSize;
280 
281  while(blkCnt > 0u)
282  {
283  /* Copy the new input sample into the state buffer */
284  *pStateCurnt++ = *pSrc++;
285 
286  /* Initialize pState pointer */
287  px = pState;
288 
289  /* Initialize pCoeffs pointer */
290  pb = pCoeffs;
291 
292  /* Set the accumulator to zero */
293  acc = 0;
294 
295  /* Loop over numTaps number of values */
296  tapCnt = numTaps;
297 
298  while(tapCnt > 0u)
299  {
300  /* Perform the multiply-accumulate */
301  acc += (q63_t) ((q31_t) (*px++) * (*pb++));
302 
303  /* Decrement the loop counter */
304  tapCnt--;
305  }
306 
307  /* Calc lower part of acc */
308  acc_l = acc & 0xffffffff;
309 
310  /* Calc upper part of acc */
311  acc_h = (acc >> 32) & 0xffffffff;
312 
313  /* Apply shift for lower part of acc and upper part of acc */
314  acc = (uint32_t) acc_l >> lShift | acc_h << uShift;
315 
316  /* Converting the result to 1.15 format and saturate the output */
317  acc = __SSAT(acc, 16);
318 
319  /* Store the result from accumulator into the destination buffer. */
320  *pOut++ = (q15_t) acc;
321 
322  /* Compute and store error */
323  e = *pRef++ - (q15_t) acc;
324 
325  *pErr++ = (q15_t) e;
326 
327  /* Compute alpha i.e. intermediate constant for taps update */
328  alpha = (q15_t) (((q31_t) e * (mu)) >> 15);
329 
330  /* Initialize pState pointer */
331  /* Advance state pointer by 1 for the next sample */
332  px = pState++;
333 
334  /* Initialize pCoeffs pointer */
335  pb = pCoeffs;
336 
337  /* Loop over numTaps number of values */
338  tapCnt = numTaps;
339 
340  while(tapCnt > 0u)
341  {
342  /* Perform the multiply-accumulate */
343  coef = (q31_t) * pb + (((q31_t) alpha * (*px++)) >> 15);
344  *pb++ = (q15_t) __SSAT((coef), 16);
345 
346  /* Decrement the loop counter */
347  tapCnt--;
348  }
349 
350  /* Decrement the loop counter */
351  blkCnt--;
352 
353  }
354 
355  /* Processing is complete. Now copy the last numTaps - 1 samples to the
356  start of the state buffer. This prepares the state buffer for the
357  next function call. */
358 
359  /* Points to the start of the pState buffer */
360  pStateCurnt = S->pState;
361 
362  /* Copy (numTaps - 1u) samples */
363  tapCnt = (numTaps - 1u);
364 
365  /* Copy the data */
366  while(tapCnt > 0u)
367  {
368  *pStateCurnt++ = *pState++;
369 
370  /* Decrement the loop counter */
371  tapCnt--;
372  }
373 
374 #endif /* #ifndef ARM_MATH_CM0_FAMILY */
375 
376 }
377 
void arm_lms_q15(const arm_lms_instance_q15 *S, q15_t *pSrc, q15_t *pRef, q15_t *pOut, q15_t *pErr, uint32_t blockSize)
Processing function for Q15 LMS filter.
Definition: arm_lms_q15.c:74
int64_t q63_t
64-bit fractional data type in 1.63 format.
Definition: arm_math.h:402
int16_t q15_t
16-bit fractional data type in 1.15 format.
Definition: arm_math.h:392
#define __SIMD32(addr)
definition to read/write two 16 bit values.
Definition: arm_math.h:445
Instance structure for the Q15 LMS filter.
Definition: arm_math.h:4016
int32_t q31_t
32-bit fractional data type in 1.31 format.
Definition: arm_math.h:397