STM32F769IDiscovery  1.00
uDANTE Audio Networking with STM32F7 DISCO board
arm_lms_norm_q31.c
Go to the documentation of this file.
1 /* ----------------------------------------------------------------------
2 * Copyright (C) 2010-2014 ARM Limited. All rights reserved.
3 *
4 * $Date: 19. March 2015
5 * $Revision: V.1.4.5
6 *
7 * Project: CMSIS DSP Library
8 * Title: arm_lms_norm_q31.c
9 *
10 * Description: Processing function for the Q31 NLMS filter.
11 *
12 * Target Processor: Cortex-M4/Cortex-M3/Cortex-M0
13 *
14 * Redistribution and use in source and binary forms, with or without
15 * modification, are permitted provided that the following conditions
16 * are met:
17 * - Redistributions of source code must retain the above copyright
18 * notice, this list of conditions and the following disclaimer.
19 * - Redistributions in binary form must reproduce the above copyright
20 * notice, this list of conditions and the following disclaimer in
21 * the documentation and/or other materials provided with the
22 * distribution.
23 * - Neither the name of ARM LIMITED nor the names of its contributors
24 * may be used to endorse or promote products derived from this
25 * software without specific prior written permission.
26 *
27 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
28 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
29 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
30 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
31 * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
32 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
33 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
34 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
35 * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
36 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
37 * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
38 * POSSIBILITY OF SUCH DAMAGE.
39 * -------------------------------------------------------------------- */
40 
41 #include "arm_math.h"
42 
82  q31_t * pSrc,
83  q31_t * pRef,
84  q31_t * pOut,
85  q31_t * pErr,
86  uint32_t blockSize)
87 {
88  q31_t *pState = S->pState; /* State pointer */
89  q31_t *pCoeffs = S->pCoeffs; /* Coefficient pointer */
90  q31_t *pStateCurnt; /* Points to the current sample of the state */
91  q31_t *px, *pb; /* Temporary pointers for state and coefficient buffers */
92  q31_t mu = S->mu; /* Adaptive factor */
93  uint32_t numTaps = S->numTaps; /* Number of filter coefficients in the filter */
94  uint32_t tapCnt, blkCnt; /* Loop counters */
95  q63_t energy; /* Energy of the input */
96  q63_t acc; /* Accumulator */
97  q31_t e = 0, d = 0; /* error, reference data sample */
98  q31_t w = 0, in; /* weight factor and state */
99  q31_t x0; /* temporary variable to hold input sample */
100 // uint32_t shift = 32u - ((uint32_t) S->postShift + 1u); /* Shift to be applied to the output */
101  q31_t errorXmu, oneByEnergy; /* Temporary variables to store error and mu product and reciprocal of energy */
102  q31_t postShift; /* Post shift to be applied to weight after reciprocal calculation */
103  q31_t coef; /* Temporary variable for coef */
104  q31_t acc_l, acc_h; /* temporary input */
105  uint32_t uShift = ((uint32_t) S->postShift + 1u);
106  uint32_t lShift = 32u - uShift; /* Shift to be applied to the output */
107 
108  energy = S->energy;
109  x0 = S->x0;
110 
111  /* S->pState points to buffer which contains previous frame (numTaps - 1) samples */
112  /* pStateCurnt points to the location where the new input data should be written */
113  pStateCurnt = &(S->pState[(numTaps - 1u)]);
114 
115  /* Loop over blockSize number of values */
116  blkCnt = blockSize;
117 
118 
119 #ifndef ARM_MATH_CM0_FAMILY
120 
121  /* Run the below code for Cortex-M4 and Cortex-M3 */
122 
123  while(blkCnt > 0u)
124  {
125 
126  /* Copy the new input sample into the state buffer */
127  *pStateCurnt++ = *pSrc;
128 
129  /* Initialize pState pointer */
130  px = pState;
131 
132  /* Initialize coeff pointer */
133  pb = (pCoeffs);
134 
135  /* Read the sample from input buffer */
136  in = *pSrc++;
137 
138  /* Update the energy calculation */
139  energy = (q31_t) ((((q63_t) energy << 32) -
140  (((q63_t) x0 * x0) << 1)) >> 32);
141  energy = (q31_t) (((((q63_t) in * in) << 1) + (energy << 32)) >> 32);
142 
143  /* Set the accumulator to zero */
144  acc = 0;
145 
146  /* Loop unrolling. Process 4 taps at a time. */
147  tapCnt = numTaps >> 2;
148 
149  while(tapCnt > 0u)
150  {
151  /* Perform the multiply-accumulate */
152  acc += ((q63_t) (*px++)) * (*pb++);
153  acc += ((q63_t) (*px++)) * (*pb++);
154  acc += ((q63_t) (*px++)) * (*pb++);
155  acc += ((q63_t) (*px++)) * (*pb++);
156 
157  /* Decrement the loop counter */
158  tapCnt--;
159  }
160 
161  /* If the filter length is not a multiple of 4, compute the remaining filter taps */
162  tapCnt = numTaps % 0x4u;
163 
164  while(tapCnt > 0u)
165  {
166  /* Perform the multiply-accumulate */
167  acc += ((q63_t) (*px++)) * (*pb++);
168 
169  /* Decrement the loop counter */
170  tapCnt--;
171  }
172 
173  /* Converting the result to 1.31 format */
174  /* Calc lower part of acc */
175  acc_l = acc & 0xffffffff;
176 
177  /* Calc upper part of acc */
178  acc_h = (acc >> 32) & 0xffffffff;
179 
180  acc = (uint32_t) acc_l >> lShift | acc_h << uShift;
181 
182  /* Store the result from accumulator into the destination buffer. */
183  *pOut++ = (q31_t) acc;
184 
185  /* Compute and store error */
186  d = *pRef++;
187  e = d - (q31_t) acc;
188  *pErr++ = e;
189 
190  /* Calculates the reciprocal of energy */
191  postShift = arm_recip_q31(energy + DELTA_Q31,
192  &oneByEnergy, &S->recipTable[0]);
193 
194  /* Calculation of product of (e * mu) */
195  errorXmu = (q31_t) (((q63_t) e * mu) >> 31);
196 
197  /* Weighting factor for the normalized version */
198  w = clip_q63_to_q31(((q63_t) errorXmu * oneByEnergy) >> (31 - postShift));
199 
200  /* Initialize pState pointer */
201  px = pState;
202 
203  /* Initialize coeff pointer */
204  pb = (pCoeffs);
205 
206  /* Loop unrolling. Process 4 taps at a time. */
207  tapCnt = numTaps >> 2;
208 
209  /* Update filter coefficients */
210  while(tapCnt > 0u)
211  {
212  /* Perform the multiply-accumulate */
213 
214  /* coef is in 2.30 format */
215  coef = (q31_t) (((q63_t) w * (*px++)) >> (32));
216  /* get coef in 1.31 format by left shifting */
217  *pb = clip_q63_to_q31((q63_t) * pb + (coef << 1u));
218  /* update coefficient buffer to next coefficient */
219  pb++;
220 
221  coef = (q31_t) (((q63_t) w * (*px++)) >> (32));
222  *pb = clip_q63_to_q31((q63_t) * pb + (coef << 1u));
223  pb++;
224 
225  coef = (q31_t) (((q63_t) w * (*px++)) >> (32));
226  *pb = clip_q63_to_q31((q63_t) * pb + (coef << 1u));
227  pb++;
228 
229  coef = (q31_t) (((q63_t) w * (*px++)) >> (32));
230  *pb = clip_q63_to_q31((q63_t) * pb + (coef << 1u));
231  pb++;
232 
233  /* Decrement the loop counter */
234  tapCnt--;
235  }
236 
237  /* If the filter length is not a multiple of 4, compute the remaining filter taps */
238  tapCnt = numTaps % 0x4u;
239 
240  while(tapCnt > 0u)
241  {
242  /* Perform the multiply-accumulate */
243  coef = (q31_t) (((q63_t) w * (*px++)) >> (32));
244  *pb = clip_q63_to_q31((q63_t) * pb + (coef << 1u));
245  pb++;
246 
247  /* Decrement the loop counter */
248  tapCnt--;
249  }
250 
251  /* Read the sample from state buffer */
252  x0 = *pState;
253 
254  /* Advance state pointer by 1 for the next sample */
255  pState = pState + 1;
256 
257  /* Decrement the loop counter */
258  blkCnt--;
259  }
260 
261  /* Save energy and x0 values for the next frame */
262  S->energy = (q31_t) energy;
263  S->x0 = x0;
264 
265  /* Processing is complete. Now copy the last numTaps - 1 samples to the
266  satrt of the state buffer. This prepares the state buffer for the
267  next function call. */
268 
269  /* Points to the start of the pState buffer */
270  pStateCurnt = S->pState;
271 
272  /* Loop unrolling for (numTaps - 1u) samples copy */
273  tapCnt = (numTaps - 1u) >> 2u;
274 
275  /* copy data */
276  while(tapCnt > 0u)
277  {
278  *pStateCurnt++ = *pState++;
279  *pStateCurnt++ = *pState++;
280  *pStateCurnt++ = *pState++;
281  *pStateCurnt++ = *pState++;
282 
283  /* Decrement the loop counter */
284  tapCnt--;
285  }
286 
287  /* Calculate remaining number of copies */
288  tapCnt = (numTaps - 1u) % 0x4u;
289 
290  /* Copy the remaining q31_t data */
291  while(tapCnt > 0u)
292  {
293  *pStateCurnt++ = *pState++;
294 
295  /* Decrement the loop counter */
296  tapCnt--;
297  }
298 
299 #else
300 
301  /* Run the below code for Cortex-M0 */
302 
303  while(blkCnt > 0u)
304  {
305 
306  /* Copy the new input sample into the state buffer */
307  *pStateCurnt++ = *pSrc;
308 
309  /* Initialize pState pointer */
310  px = pState;
311 
312  /* Initialize pCoeffs pointer */
313  pb = pCoeffs;
314 
315  /* Read the sample from input buffer */
316  in = *pSrc++;
317 
318  /* Update the energy calculation */
319  energy =
320  (q31_t) ((((q63_t) energy << 32) - (((q63_t) x0 * x0) << 1)) >> 32);
321  energy = (q31_t) (((((q63_t) in * in) << 1) + (energy << 32)) >> 32);
322 
323  /* Set the accumulator to zero */
324  acc = 0;
325 
326  /* Loop over numTaps number of values */
327  tapCnt = numTaps;
328 
329  while(tapCnt > 0u)
330  {
331  /* Perform the multiply-accumulate */
332  acc += ((q63_t) (*px++)) * (*pb++);
333 
334  /* Decrement the loop counter */
335  tapCnt--;
336  }
337 
338  /* Converting the result to 1.31 format */
339  /* Converting the result to 1.31 format */
340  /* Calc lower part of acc */
341  acc_l = acc & 0xffffffff;
342 
343  /* Calc upper part of acc */
344  acc_h = (acc >> 32) & 0xffffffff;
345 
346  acc = (uint32_t) acc_l >> lShift | acc_h << uShift;
347 
348 
349  //acc = (q31_t) (acc >> shift);
350 
351  /* Store the result from accumulator into the destination buffer. */
352  *pOut++ = (q31_t) acc;
353 
354  /* Compute and store error */
355  d = *pRef++;
356  e = d - (q31_t) acc;
357  *pErr++ = e;
358 
359  /* Calculates the reciprocal of energy */
360  postShift =
361  arm_recip_q31(energy + DELTA_Q31, &oneByEnergy, &S->recipTable[0]);
362 
363  /* Calculation of product of (e * mu) */
364  errorXmu = (q31_t) (((q63_t) e * mu) >> 31);
365 
366  /* Weighting factor for the normalized version */
367  w = clip_q63_to_q31(((q63_t) errorXmu * oneByEnergy) >> (31 - postShift));
368 
369  /* Initialize pState pointer */
370  px = pState;
371 
372  /* Initialize coeff pointer */
373  pb = (pCoeffs);
374 
375  /* Loop over numTaps number of values */
376  tapCnt = numTaps;
377 
378  while(tapCnt > 0u)
379  {
380  /* Perform the multiply-accumulate */
381  /* coef is in 2.30 format */
382  coef = (q31_t) (((q63_t) w * (*px++)) >> (32));
383  /* get coef in 1.31 format by left shifting */
384  *pb = clip_q63_to_q31((q63_t) * pb + (coef << 1u));
385  /* update coefficient buffer to next coefficient */
386  pb++;
387 
388  /* Decrement the loop counter */
389  tapCnt--;
390  }
391 
392  /* Read the sample from state buffer */
393  x0 = *pState;
394 
395  /* Advance state pointer by 1 for the next sample */
396  pState = pState + 1;
397 
398  /* Decrement the loop counter */
399  blkCnt--;
400  }
401 
402  /* Save energy and x0 values for the next frame */
403  S->energy = (q31_t) energy;
404  S->x0 = x0;
405 
406  /* Processing is complete. Now copy the last numTaps - 1 samples to the
407  start of the state buffer. This prepares the state buffer for the
408  next function call. */
409 
410  /* Points to the start of the pState buffer */
411  pStateCurnt = S->pState;
412 
413  /* Loop for (numTaps - 1u) samples copy */
414  tapCnt = (numTaps - 1u);
415 
416  /* Copy the remaining q31_t data */
417  while(tapCnt > 0u)
418  {
419  *pStateCurnt++ = *pState++;
420 
421  /* Decrement the loop counter */
422  tapCnt--;
423  }
424 
425 #endif /* #ifndef ARM_MATH_CM0_FAMILY */
426 
427 }
428 
#define DELTA_Q31
Macros required for reciprocal calculation in Normalized LMS.
Definition: arm_math.h:330
int64_t q63_t
64-bit fractional data type in 1.63 format.
Definition: arm_math.h:402
void arm_lms_norm_q31(arm_lms_norm_instance_q31 *S, q31_t *pSrc, q31_t *pRef, q31_t *pOut, q31_t *pErr, uint32_t blockSize)
Processing function for Q31 normalized LMS filter.
Instance structure for the Q31 normalized LMS filter.
Definition: arm_math.h:4168
int32_t q31_t
32-bit fractional data type in 1.31 format.
Definition: arm_math.h:397