STM32F769IDiscovery  1.00
uDANTE Audio Networking with STM32F7 DISCO board
arm_lms_norm_f32.c
Go to the documentation of this file.
1 /* ----------------------------------------------------------------------
2 * Copyright (C) 2010-2014 ARM Limited. All rights reserved.
3 *
4 * $Date: 19. March 2015
5 * $Revision: V.1.4.5
6 *
7 * Project: CMSIS DSP Library
8 * Title: arm_lms_norm_f32.c
9 *
10 * Description: Processing function for the floating-point Normalised LMS.
11 *
12 * Target Processor: Cortex-M4/Cortex-M3/Cortex-M0
13 *
14 * Redistribution and use in source and binary forms, with or without
15 * modification, are permitted provided that the following conditions
16 * are met:
17 * - Redistributions of source code must retain the above copyright
18 * notice, this list of conditions and the following disclaimer.
19 * - Redistributions in binary form must reproduce the above copyright
20 * notice, this list of conditions and the following disclaimer in
21 * the documentation and/or other materials provided with the
22 * distribution.
23 * - Neither the name of ARM LIMITED nor the names of its contributors
24 * may be used to endorse or promote products derived from this
25 * software without specific prior written permission.
26 *
27 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
28 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
29 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
30 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
31 * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
32 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
33 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
34 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
35 * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
36 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
37 * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
38 * POSSIBILITY OF SUCH DAMAGE.
39 * -------------------------------------------------------------------- */
40 
41 #include "arm_math.h"
42 
175  float32_t * pSrc,
176  float32_t * pRef,
177  float32_t * pOut,
178  float32_t * pErr,
179  uint32_t blockSize)
180 {
181  float32_t *pState = S->pState; /* State pointer */
182  float32_t *pCoeffs = S->pCoeffs; /* Coefficient pointer */
183  float32_t *pStateCurnt; /* Points to the current sample of the state */
184  float32_t *px, *pb; /* Temporary pointers for state and coefficient buffers */
185  float32_t mu = S->mu; /* Adaptive factor */
186  uint32_t numTaps = S->numTaps; /* Number of filter coefficients in the filter */
187  uint32_t tapCnt, blkCnt; /* Loop counters */
188  float32_t energy; /* Energy of the input */
189  float32_t sum, e, d; /* accumulator, error, reference data sample */
190  float32_t w, x0, in; /* weight factor, temporary variable to hold input sample and state */
191 
192  /* Initializations of error, difference, Coefficient update */
193  e = 0.0f;
194  d = 0.0f;
195  w = 0.0f;
196 
197  energy = S->energy;
198  x0 = S->x0;
199 
200  /* S->pState points to buffer which contains previous frame (numTaps - 1) samples */
201  /* pStateCurnt points to the location where the new input data should be written */
202  pStateCurnt = &(S->pState[(numTaps - 1u)]);
203 
204  /* Loop over blockSize number of values */
205  blkCnt = blockSize;
206 
207 
208 #ifndef ARM_MATH_CM0_FAMILY
209 
210  /* Run the below code for Cortex-M4 and Cortex-M3 */
211 
212  while(blkCnt > 0u)
213  {
214  /* Copy the new input sample into the state buffer */
215  *pStateCurnt++ = *pSrc;
216 
217  /* Initialize pState pointer */
218  px = pState;
219 
220  /* Initialize coeff pointer */
221  pb = (pCoeffs);
222 
223  /* Read the sample from input buffer */
224  in = *pSrc++;
225 
226  /* Update the energy calculation */
227  energy -= x0 * x0;
228  energy += in * in;
229 
230  /* Set the accumulator to zero */
231  sum = 0.0f;
232 
233  /* Loop unrolling. Process 4 taps at a time. */
234  tapCnt = numTaps >> 2;
235 
236  while(tapCnt > 0u)
237  {
238  /* Perform the multiply-accumulate */
239  sum += (*px++) * (*pb++);
240  sum += (*px++) * (*pb++);
241  sum += (*px++) * (*pb++);
242  sum += (*px++) * (*pb++);
243 
244  /* Decrement the loop counter */
245  tapCnt--;
246  }
247 
248  /* If the filter length is not a multiple of 4, compute the remaining filter taps */
249  tapCnt = numTaps % 0x4u;
250 
251  while(tapCnt > 0u)
252  {
253  /* Perform the multiply-accumulate */
254  sum += (*px++) * (*pb++);
255 
256  /* Decrement the loop counter */
257  tapCnt--;
258  }
259 
260  /* The result in the accumulator, store in the destination buffer. */
261  *pOut++ = sum;
262 
263  /* Compute and store error */
264  d = (float32_t) (*pRef++);
265  e = d - sum;
266  *pErr++ = e;
267 
268  /* Calculation of Weighting factor for updating filter coefficients */
269  /* epsilon value 0.000000119209289f */
270  w = (e * mu) / (energy + 0.000000119209289f);
271 
272  /* Initialize pState pointer */
273  px = pState;
274 
275  /* Initialize coeff pointer */
276  pb = (pCoeffs);
277 
278  /* Loop unrolling. Process 4 taps at a time. */
279  tapCnt = numTaps >> 2;
280 
281  /* Update filter coefficients */
282  while(tapCnt > 0u)
283  {
284  /* Perform the multiply-accumulate */
285  *pb += w * (*px++);
286  pb++;
287 
288  *pb += w * (*px++);
289  pb++;
290 
291  *pb += w * (*px++);
292  pb++;
293 
294  *pb += w * (*px++);
295  pb++;
296 
297 
298  /* Decrement the loop counter */
299  tapCnt--;
300  }
301 
302  /* If the filter length is not a multiple of 4, compute the remaining filter taps */
303  tapCnt = numTaps % 0x4u;
304 
305  while(tapCnt > 0u)
306  {
307  /* Perform the multiply-accumulate */
308  *pb += w * (*px++);
309  pb++;
310 
311  /* Decrement the loop counter */
312  tapCnt--;
313  }
314 
315  x0 = *pState;
316 
317  /* Advance state pointer by 1 for the next sample */
318  pState = pState + 1;
319 
320  /* Decrement the loop counter */
321  blkCnt--;
322  }
323 
324  S->energy = energy;
325  S->x0 = x0;
326 
327  /* Processing is complete. Now copy the last numTaps - 1 samples to the
328  satrt of the state buffer. This prepares the state buffer for the
329  next function call. */
330 
331  /* Points to the start of the pState buffer */
332  pStateCurnt = S->pState;
333 
334  /* Loop unrolling for (numTaps - 1u)/4 samples copy */
335  tapCnt = (numTaps - 1u) >> 2u;
336 
337  /* copy data */
338  while(tapCnt > 0u)
339  {
340  *pStateCurnt++ = *pState++;
341  *pStateCurnt++ = *pState++;
342  *pStateCurnt++ = *pState++;
343  *pStateCurnt++ = *pState++;
344 
345  /* Decrement the loop counter */
346  tapCnt--;
347  }
348 
349  /* Calculate remaining number of copies */
350  tapCnt = (numTaps - 1u) % 0x4u;
351 
352  /* Copy the remaining q31_t data */
353  while(tapCnt > 0u)
354  {
355  *pStateCurnt++ = *pState++;
356 
357  /* Decrement the loop counter */
358  tapCnt--;
359  }
360 
361 #else
362 
363  /* Run the below code for Cortex-M0 */
364 
365  while(blkCnt > 0u)
366  {
367  /* Copy the new input sample into the state buffer */
368  *pStateCurnt++ = *pSrc;
369 
370  /* Initialize pState pointer */
371  px = pState;
372 
373  /* Initialize pCoeffs pointer */
374  pb = pCoeffs;
375 
376  /* Read the sample from input buffer */
377  in = *pSrc++;
378 
379  /* Update the energy calculation */
380  energy -= x0 * x0;
381  energy += in * in;
382 
383  /* Set the accumulator to zero */
384  sum = 0.0f;
385 
386  /* Loop over numTaps number of values */
387  tapCnt = numTaps;
388 
389  while(tapCnt > 0u)
390  {
391  /* Perform the multiply-accumulate */
392  sum += (*px++) * (*pb++);
393 
394  /* Decrement the loop counter */
395  tapCnt--;
396  }
397 
398  /* The result in the accumulator is stored in the destination buffer. */
399  *pOut++ = sum;
400 
401  /* Compute and store error */
402  d = (float32_t) (*pRef++);
403  e = d - sum;
404  *pErr++ = e;
405 
406  /* Calculation of Weighting factor for updating filter coefficients */
407  /* epsilon value 0.000000119209289f */
408  w = (e * mu) / (energy + 0.000000119209289f);
409 
410  /* Initialize pState pointer */
411  px = pState;
412 
413  /* Initialize pCcoeffs pointer */
414  pb = pCoeffs;
415 
416  /* Loop over numTaps number of values */
417  tapCnt = numTaps;
418 
419  while(tapCnt > 0u)
420  {
421  /* Perform the multiply-accumulate */
422  *pb += w * (*px++);
423  pb++;
424 
425  /* Decrement the loop counter */
426  tapCnt--;
427  }
428 
429  x0 = *pState;
430 
431  /* Advance state pointer by 1 for the next sample */
432  pState = pState + 1;
433 
434  /* Decrement the loop counter */
435  blkCnt--;
436  }
437 
438  S->energy = energy;
439  S->x0 = x0;
440 
441  /* Processing is complete. Now copy the last numTaps - 1 samples to the
442  satrt of the state buffer. This prepares the state buffer for the
443  next function call. */
444 
445  /* Points to the start of the pState buffer */
446  pStateCurnt = S->pState;
447 
448  /* Copy (numTaps - 1u) samples */
449  tapCnt = (numTaps - 1u);
450 
451  /* Copy the remaining q31_t data */
452  while(tapCnt > 0u)
453  {
454  *pStateCurnt++ = *pState++;
455 
456  /* Decrement the loop counter */
457  tapCnt--;
458  }
459 
460 #endif /* #ifndef ARM_MATH_CM0_FAMILY */
461 
462 }
463 
void arm_lms_norm_f32(arm_lms_norm_instance_f32 *S, float32_t *pSrc, float32_t *pRef, float32_t *pOut, float32_t *pErr, uint32_t blockSize)
Processing function for floating-point normalized LMS filter.
float float32_t
32-bit floating-point type definition.
Definition: arm_math.h:407
Instance structure for the floating-point normalized LMS filter.
Definition: arm_math.h:4118