STM32F769IDiscovery  1.00
uDANTE Audio Networking with STM32F7 DISCO board
arm_lms_norm_q15.c
Go to the documentation of this file.
1 /* ----------------------------------------------------------------------
2 * Copyright (C) 2010-2014 ARM Limited. All rights reserved.
3 *
4 * $Date: 19. March 2015
5 * $Revision: V.1.4.5
6 *
7 * Project: CMSIS DSP Library
8 * Title: arm_lms_norm_q15.c
9 *
10 * Description: Q15 NLMS filter.
11 *
12 * Target Processor: Cortex-M4/Cortex-M3/Cortex-M0
13 *
14 * Redistribution and use in source and binary forms, with or without
15 * modification, are permitted provided that the following conditions
16 * are met:
17 * - Redistributions of source code must retain the above copyright
18 * notice, this list of conditions and the following disclaimer.
19 * - Redistributions in binary form must reproduce the above copyright
20 * notice, this list of conditions and the following disclaimer in
21 * the documentation and/or other materials provided with the
22 * distribution.
23 * - Neither the name of ARM LIMITED nor the names of its contributors
24 * may be used to endorse or promote products derived from this
25 * software without specific prior written permission.
26 *
27 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
28 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
29 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
30 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
31 * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
32 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
33 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
34 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
35 * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
36 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
37 * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
38 * POSSIBILITY OF SUCH DAMAGE.
39 * -------------------------------------------------------------------- */
40 
41 #include "arm_math.h"
42 
81  q15_t * pSrc,
82  q15_t * pRef,
83  q15_t * pOut,
84  q15_t * pErr,
85  uint32_t blockSize)
86 {
87  q15_t *pState = S->pState; /* State pointer */
88  q15_t *pCoeffs = S->pCoeffs; /* Coefficient pointer */
89  q15_t *pStateCurnt; /* Points to the current sample of the state */
90  q15_t *px, *pb; /* Temporary pointers for state and coefficient buffers */
91  q15_t mu = S->mu; /* Adaptive factor */
92  uint32_t numTaps = S->numTaps; /* Number of filter coefficients in the filter */
93  uint32_t tapCnt, blkCnt; /* Loop counters */
94  q31_t energy; /* Energy of the input */
95  q63_t acc; /* Accumulator */
96  q15_t e = 0, d = 0; /* error, reference data sample */
97  q15_t w = 0, in; /* weight factor and state */
98  q15_t x0; /* temporary variable to hold input sample */
99  //uint32_t shift = (uint32_t) S->postShift + 1u; /* Shift to be applied to the output */
100  q15_t errorXmu, oneByEnergy; /* Temporary variables to store error and mu product and reciprocal of energy */
101  q15_t postShift; /* Post shift to be applied to weight after reciprocal calculation */
102  q31_t coef; /* Teporary variable for coefficient */
103  q31_t acc_l, acc_h;
104  int32_t lShift = (15 - (int32_t) S->postShift); /* Post shift */
105  int32_t uShift = (32 - lShift);
106 
107  energy = S->energy;
108  x0 = S->x0;
109 
110  /* S->pState points to buffer which contains previous frame (numTaps - 1) samples */
111  /* pStateCurnt points to the location where the new input data should be written */
112  pStateCurnt = &(S->pState[(numTaps - 1u)]);
113 
114  /* Loop over blockSize number of values */
115  blkCnt = blockSize;
116 
117 
118 #ifndef ARM_MATH_CM0_FAMILY
119 
120  /* Run the below code for Cortex-M4 and Cortex-M3 */
121 
122  while(blkCnt > 0u)
123  {
124  /* Copy the new input sample into the state buffer */
125  *pStateCurnt++ = *pSrc;
126 
127  /* Initialize pState pointer */
128  px = pState;
129 
130  /* Initialize coeff pointer */
131  pb = (pCoeffs);
132 
133  /* Read the sample from input buffer */
134  in = *pSrc++;
135 
136  /* Update the energy calculation */
137  energy -= (((q31_t) x0 * (x0)) >> 15);
138  energy += (((q31_t) in * (in)) >> 15);
139 
140  /* Set the accumulator to zero */
141  acc = 0;
142 
143  /* Loop unrolling. Process 4 taps at a time. */
144  tapCnt = numTaps >> 2;
145 
146  while(tapCnt > 0u)
147  {
148 
149  /* Perform the multiply-accumulate */
150 #ifndef UNALIGNED_SUPPORT_DISABLE
151 
152  acc = __SMLALD(*__SIMD32(px)++, (*__SIMD32(pb)++), acc);
153  acc = __SMLALD(*__SIMD32(px)++, (*__SIMD32(pb)++), acc);
154 
155 #else
156 
157  acc += (((q31_t) * px++ * (*pb++)));
158  acc += (((q31_t) * px++ * (*pb++)));
159  acc += (((q31_t) * px++ * (*pb++)));
160  acc += (((q31_t) * px++ * (*pb++)));
161 
162 #endif /* #ifndef UNALIGNED_SUPPORT_DISABLE */
163 
164  /* Decrement the loop counter */
165  tapCnt--;
166  }
167 
168  /* If the filter length is not a multiple of 4, compute the remaining filter taps */
169  tapCnt = numTaps % 0x4u;
170 
171  while(tapCnt > 0u)
172  {
173  /* Perform the multiply-accumulate */
174  acc += (((q31_t) * px++ * (*pb++)));
175 
176  /* Decrement the loop counter */
177  tapCnt--;
178  }
179 
180  /* Calc lower part of acc */
181  acc_l = acc & 0xffffffff;
182 
183  /* Calc upper part of acc */
184  acc_h = (acc >> 32) & 0xffffffff;
185 
186  /* Apply shift for lower part of acc and upper part of acc */
187  acc = (uint32_t) acc_l >> lShift | acc_h << uShift;
188 
189  /* Converting the result to 1.15 format and saturate the output */
190  acc = __SSAT(acc, 16u);
191 
192  /* Store the result from accumulator into the destination buffer. */
193  *pOut++ = (q15_t) acc;
194 
195  /* Compute and store error */
196  d = *pRef++;
197  e = d - (q15_t) acc;
198  *pErr++ = e;
199 
200  /* Calculation of 1/energy */
201  postShift = arm_recip_q15((q15_t) energy + DELTA_Q15,
202  &oneByEnergy, S->recipTable);
203 
204  /* Calculation of e * mu value */
205  errorXmu = (q15_t) (((q31_t) e * mu) >> 15);
206 
207  /* Calculation of (e * mu) * (1/energy) value */
208  acc = (((q31_t) errorXmu * oneByEnergy) >> (15 - postShift));
209 
210  /* Weighting factor for the normalized version */
211  w = (q15_t) __SSAT((q31_t) acc, 16);
212 
213  /* Initialize pState pointer */
214  px = pState;
215 
216  /* Initialize coeff pointer */
217  pb = (pCoeffs);
218 
219  /* Loop unrolling. Process 4 taps at a time. */
220  tapCnt = numTaps >> 2;
221 
222  /* Update filter coefficients */
223  while(tapCnt > 0u)
224  {
225  coef = *pb + (((q31_t) w * (*px++)) >> 15);
226  *pb++ = (q15_t) __SSAT((coef), 16);
227  coef = *pb + (((q31_t) w * (*px++)) >> 15);
228  *pb++ = (q15_t) __SSAT((coef), 16);
229  coef = *pb + (((q31_t) w * (*px++)) >> 15);
230  *pb++ = (q15_t) __SSAT((coef), 16);
231  coef = *pb + (((q31_t) w * (*px++)) >> 15);
232  *pb++ = (q15_t) __SSAT((coef), 16);
233 
234  /* Decrement the loop counter */
235  tapCnt--;
236  }
237 
238  /* If the filter length is not a multiple of 4, compute the remaining filter taps */
239  tapCnt = numTaps % 0x4u;
240 
241  while(tapCnt > 0u)
242  {
243  /* Perform the multiply-accumulate */
244  coef = *pb + (((q31_t) w * (*px++)) >> 15);
245  *pb++ = (q15_t) __SSAT((coef), 16);
246 
247  /* Decrement the loop counter */
248  tapCnt--;
249  }
250 
251  /* Read the sample from state buffer */
252  x0 = *pState;
253 
254  /* Advance state pointer by 1 for the next sample */
255  pState = pState + 1u;
256 
257  /* Decrement the loop counter */
258  blkCnt--;
259  }
260 
261  /* Save energy and x0 values for the next frame */
262  S->energy = (q15_t) energy;
263  S->x0 = x0;
264 
265  /* Processing is complete. Now copy the last numTaps - 1 samples to the
266  satrt of the state buffer. This prepares the state buffer for the
267  next function call. */
268 
269  /* Points to the start of the pState buffer */
270  pStateCurnt = S->pState;
271 
272  /* Calculation of count for copying integer writes */
273  tapCnt = (numTaps - 1u) >> 2;
274 
275  while(tapCnt > 0u)
276  {
277 
278 #ifndef UNALIGNED_SUPPORT_DISABLE
279 
280  *__SIMD32(pStateCurnt)++ = *__SIMD32(pState)++;
281  *__SIMD32(pStateCurnt)++ = *__SIMD32(pState)++;
282 
283 #else
284 
285  *pStateCurnt++ = *pState++;
286  *pStateCurnt++ = *pState++;
287  *pStateCurnt++ = *pState++;
288  *pStateCurnt++ = *pState++;
289 
290 #endif
291 
292  tapCnt--;
293 
294  }
295 
296  /* Calculation of count for remaining q15_t data */
297  tapCnt = (numTaps - 1u) % 0x4u;
298 
299  /* copy data */
300  while(tapCnt > 0u)
301  {
302  *pStateCurnt++ = *pState++;
303 
304  /* Decrement the loop counter */
305  tapCnt--;
306  }
307 
308 #else
309 
310  /* Run the below code for Cortex-M0 */
311 
312  while(blkCnt > 0u)
313  {
314  /* Copy the new input sample into the state buffer */
315  *pStateCurnt++ = *pSrc;
316 
317  /* Initialize pState pointer */
318  px = pState;
319 
320  /* Initialize pCoeffs pointer */
321  pb = pCoeffs;
322 
323  /* Read the sample from input buffer */
324  in = *pSrc++;
325 
326  /* Update the energy calculation */
327  energy -= (((q31_t) x0 * (x0)) >> 15);
328  energy += (((q31_t) in * (in)) >> 15);
329 
330  /* Set the accumulator to zero */
331  acc = 0;
332 
333  /* Loop over numTaps number of values */
334  tapCnt = numTaps;
335 
336  while(tapCnt > 0u)
337  {
338  /* Perform the multiply-accumulate */
339  acc += (((q31_t) * px++ * (*pb++)));
340 
341  /* Decrement the loop counter */
342  tapCnt--;
343  }
344 
345  /* Calc lower part of acc */
346  acc_l = acc & 0xffffffff;
347 
348  /* Calc upper part of acc */
349  acc_h = (acc >> 32) & 0xffffffff;
350 
351  /* Apply shift for lower part of acc and upper part of acc */
352  acc = (uint32_t) acc_l >> lShift | acc_h << uShift;
353 
354  /* Converting the result to 1.15 format and saturate the output */
355  acc = __SSAT(acc, 16u);
356 
357  /* Converting the result to 1.15 format */
358  //acc = __SSAT((acc >> (16u - shift)), 16u);
359 
360  /* Store the result from accumulator into the destination buffer. */
361  *pOut++ = (q15_t) acc;
362 
363  /* Compute and store error */
364  d = *pRef++;
365  e = d - (q15_t) acc;
366  *pErr++ = e;
367 
368  /* Calculation of 1/energy */
369  postShift = arm_recip_q15((q15_t) energy + DELTA_Q15,
370  &oneByEnergy, S->recipTable);
371 
372  /* Calculation of e * mu value */
373  errorXmu = (q15_t) (((q31_t) e * mu) >> 15);
374 
375  /* Calculation of (e * mu) * (1/energy) value */
376  acc = (((q31_t) errorXmu * oneByEnergy) >> (15 - postShift));
377 
378  /* Weighting factor for the normalized version */
379  w = (q15_t) __SSAT((q31_t) acc, 16);
380 
381  /* Initialize pState pointer */
382  px = pState;
383 
384  /* Initialize coeff pointer */
385  pb = (pCoeffs);
386 
387  /* Loop over numTaps number of values */
388  tapCnt = numTaps;
389 
390  while(tapCnt > 0u)
391  {
392  /* Perform the multiply-accumulate */
393  coef = *pb + (((q31_t) w * (*px++)) >> 15);
394  *pb++ = (q15_t) __SSAT((coef), 16);
395 
396  /* Decrement the loop counter */
397  tapCnt--;
398  }
399 
400  /* Read the sample from state buffer */
401  x0 = *pState;
402 
403  /* Advance state pointer by 1 for the next sample */
404  pState = pState + 1u;
405 
406  /* Decrement the loop counter */
407  blkCnt--;
408  }
409 
410  /* Save energy and x0 values for the next frame */
411  S->energy = (q15_t) energy;
412  S->x0 = x0;
413 
414  /* Processing is complete. Now copy the last numTaps - 1 samples to the
415  satrt of the state buffer. This prepares the state buffer for the
416  next function call. */
417 
418  /* Points to the start of the pState buffer */
419  pStateCurnt = S->pState;
420 
421  /* copy (numTaps - 1u) data */
422  tapCnt = (numTaps - 1u);
423 
424  /* copy data */
425  while(tapCnt > 0u)
426  {
427  *pStateCurnt++ = *pState++;
428 
429  /* Decrement the loop counter */
430  tapCnt--;
431  }
432 
433 #endif /* #ifndef ARM_MATH_CM0_FAMILY */
434 
435 }
436 
437 
#define DELTA_Q15
Definition: arm_math.h:331
int64_t q63_t
64-bit fractional data type in 1.63 format.
Definition: arm_math.h:402
void arm_lms_norm_q15(arm_lms_norm_instance_q15 *S, q15_t *pSrc, q15_t *pRef, q15_t *pOut, q15_t *pErr, uint32_t blockSize)
Processing function for Q15 normalized LMS filter.
int16_t q15_t
16-bit fractional data type in 1.15 format.
Definition: arm_math.h:392
#define __SIMD32(addr)
definition to read/write two 16 bit values.
Definition: arm_math.h:445
Instance structure for the Q15 normalized LMS filter.
Definition: arm_math.h:4222
int32_t q31_t
32-bit fractional data type in 1.31 format.
Definition: arm_math.h:397