STM32F769IDiscovery  1.00
uDANTE Audio Networking with STM32F7 DISCO board
arm_conv_partial_fast_opt_q15.c
Go to the documentation of this file.
1 /* ----------------------------------------------------------------------
2 * Copyright (C) 2010-2014 ARM Limited. All rights reserved.
3 *
4 * $Date: 19. March 2015
5 * $Revision: V.1.4.5
6 *
7 * Project: CMSIS DSP Library
8 * Title: arm_conv_partial_fast_opt_q15.c
9 *
10 * Description: Fast Q15 Partial convolution.
11 *
12 * Target Processor: Cortex-M4/Cortex-M3
13 *
14 * Redistribution and use in source and binary forms, with or without
15 * modification, are permitted provided that the following conditions
16 * are met:
17 * - Redistributions of source code must retain the above copyright
18 * notice, this list of conditions and the following disclaimer.
19 * - Redistributions in binary form must reproduce the above copyright
20 * notice, this list of conditions and the following disclaimer in
21 * the documentation and/or other materials provided with the
22 * distribution.
23 * - Neither the name of ARM LIMITED nor the names of its contributors
24 * may be used to endorse or promote products derived from this
25 * software without specific prior written permission.
26 *
27 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
28 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
29 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
30 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
31 * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
32 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
33 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
34 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
35 * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
36 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
37 * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
38 * POSSIBILITY OF SUCH DAMAGE.
39 * -------------------------------------------------------------------- */
40 
41 #include "arm_math.h"
42 
73 #ifndef UNALIGNED_SUPPORT_DISABLE
74 
76  q15_t * pSrcA,
77  uint32_t srcALen,
78  q15_t * pSrcB,
79  uint32_t srcBLen,
80  q15_t * pDst,
81  uint32_t firstIndex,
82  uint32_t numPoints,
83  q15_t * pScratch1,
84  q15_t * pScratch2)
85 {
86 
87  q15_t *pOut = pDst; /* output pointer */
88  q15_t *pScr1 = pScratch1; /* Temporary pointer for scratch1 */
89  q15_t *pScr2 = pScratch2; /* Temporary pointer for scratch1 */
90  q31_t acc0, acc1, acc2, acc3; /* Accumulator */
91  q31_t x1, x2, x3; /* Temporary variables to hold state and coefficient values */
92  q31_t y1, y2; /* State variables */
93  q15_t *pIn1; /* inputA pointer */
94  q15_t *pIn2; /* inputB pointer */
95  q15_t *px; /* Intermediate inputA pointer */
96  q15_t *py; /* Intermediate inputB pointer */
97  uint32_t j, k, blkCnt; /* loop counter */
98  arm_status status;
99 
100  uint32_t tapCnt; /* loop count */
101 
102  /* Check for range of output samples to be calculated */
103  if((firstIndex + numPoints) > ((srcALen + (srcBLen - 1u))))
104  {
105  /* Set status as ARM_MATH_ARGUMENT_ERROR */
106  status = ARM_MATH_ARGUMENT_ERROR;
107  }
108  else
109  {
110 
111  /* The algorithm implementation is based on the lengths of the inputs. */
112  /* srcB is always made to slide across srcA. */
113  /* So srcBLen is always considered as shorter or equal to srcALen */
114  if(srcALen >= srcBLen)
115  {
116  /* Initialization of inputA pointer */
117  pIn1 = pSrcA;
118 
119  /* Initialization of inputB pointer */
120  pIn2 = pSrcB;
121  }
122  else
123  {
124  /* Initialization of inputA pointer */
125  pIn1 = pSrcB;
126 
127  /* Initialization of inputB pointer */
128  pIn2 = pSrcA;
129 
130  /* srcBLen is always considered as shorter or equal to srcALen */
131  j = srcBLen;
132  srcBLen = srcALen;
133  srcALen = j;
134  }
135 
136  /* Temporary pointer for scratch2 */
137  py = pScratch2;
138 
139  /* pointer to take end of scratch2 buffer */
140  pScr2 = pScratch2 + srcBLen - 1;
141 
142  /* points to smaller length sequence */
143  px = pIn2;
144 
145  /* Apply loop unrolling and do 4 Copies simultaneously. */
146  k = srcBLen >> 2u;
147 
148  /* First part of the processing with loop unrolling copies 4 data points at a time.
149  ** a second loop below copies for the remaining 1 to 3 samples. */
150 
151  /* Copy smaller length input sequence in reverse order into second scratch buffer */
152  while(k > 0u)
153  {
154  /* copy second buffer in reversal manner */
155  *pScr2-- = *px++;
156  *pScr2-- = *px++;
157  *pScr2-- = *px++;
158  *pScr2-- = *px++;
159 
160  /* Decrement the loop counter */
161  k--;
162  }
163 
164  /* If the count is not a multiple of 4, copy remaining samples here.
165  ** No loop unrolling is used. */
166  k = srcBLen % 0x4u;
167 
168  while(k > 0u)
169  {
170  /* copy second buffer in reversal manner for remaining samples */
171  *pScr2-- = *px++;
172 
173  /* Decrement the loop counter */
174  k--;
175  }
176 
177  /* Initialze temporary scratch pointer */
178  pScr1 = pScratch1;
179 
180  /* Assuming scratch1 buffer is aligned by 32-bit */
181  /* Fill (srcBLen - 1u) zeros in scratch buffer */
182  arm_fill_q15(0, pScr1, (srcBLen - 1u));
183 
184  /* Update temporary scratch pointer */
185  pScr1 += (srcBLen - 1u);
186 
187  /* Copy bigger length sequence(srcALen) samples in scratch1 buffer */
188 
189  /* Copy (srcALen) samples in scratch buffer */
190  arm_copy_q15(pIn1, pScr1, srcALen);
191 
192  /* Update pointers */
193  pScr1 += srcALen;
194 
195  /* Fill (srcBLen - 1u) zeros at end of scratch buffer */
196  arm_fill_q15(0, pScr1, (srcBLen - 1u));
197 
198  /* Update pointer */
199  pScr1 += (srcBLen - 1u);
200 
201  /* Initialization of pIn2 pointer */
202  pIn2 = py;
203 
204  pScratch1 += firstIndex;
205 
206  pOut = pDst + firstIndex;
207 
208  /* First part of the processing with loop unrolling process 4 data points at a time.
209  ** a second loop below process for the remaining 1 to 3 samples. */
210 
211  /* Actual convolution process starts here */
212  blkCnt = (numPoints) >> 2;
213 
214  while(blkCnt > 0)
215  {
216  /* Initialze temporary scratch pointer as scratch1 */
217  pScr1 = pScratch1;
218 
219  /* Clear Accumlators */
220  acc0 = 0;
221  acc1 = 0;
222  acc2 = 0;
223  acc3 = 0;
224 
225  /* Read two samples from scratch1 buffer */
226  x1 = *__SIMD32(pScr1)++;
227 
228  /* Read next two samples from scratch1 buffer */
229  x2 = *__SIMD32(pScr1)++;
230 
231  tapCnt = (srcBLen) >> 2u;
232 
233  while(tapCnt > 0u)
234  {
235 
236  /* Read four samples from smaller buffer */
237  y1 = _SIMD32_OFFSET(pIn2);
238  y2 = _SIMD32_OFFSET(pIn2 + 2u);
239 
240  /* multiply and accumlate */
241  acc0 = __SMLAD(x1, y1, acc0);
242  acc2 = __SMLAD(x2, y1, acc2);
243 
244  /* pack input data */
245 #ifndef ARM_MATH_BIG_ENDIAN
246  x3 = __PKHBT(x2, x1, 0);
247 #else
248  x3 = __PKHBT(x1, x2, 0);
249 #endif
250 
251  /* multiply and accumlate */
252  acc1 = __SMLADX(x3, y1, acc1);
253 
254  /* Read next two samples from scratch1 buffer */
255  x1 = _SIMD32_OFFSET(pScr1);
256 
257  /* multiply and accumlate */
258  acc0 = __SMLAD(x2, y2, acc0);
259 
260  acc2 = __SMLAD(x1, y2, acc2);
261 
262  /* pack input data */
263 #ifndef ARM_MATH_BIG_ENDIAN
264  x3 = __PKHBT(x1, x2, 0);
265 #else
266  x3 = __PKHBT(x2, x1, 0);
267 #endif
268 
269  acc3 = __SMLADX(x3, y1, acc3);
270  acc1 = __SMLADX(x3, y2, acc1);
271 
272  x2 = _SIMD32_OFFSET(pScr1 + 2u);
273 
274 #ifndef ARM_MATH_BIG_ENDIAN
275  x3 = __PKHBT(x2, x1, 0);
276 #else
277  x3 = __PKHBT(x1, x2, 0);
278 #endif
279 
280  acc3 = __SMLADX(x3, y2, acc3);
281 
282  /* update scratch pointers */
283  pIn2 += 4u;
284  pScr1 += 4u;
285 
286 
287  /* Decrement the loop counter */
288  tapCnt--;
289  }
290 
291  /* Update scratch pointer for remaining samples of smaller length sequence */
292  pScr1 -= 4u;
293 
294  /* apply same above for remaining samples of smaller length sequence */
295  tapCnt = (srcBLen) & 3u;
296 
297  while(tapCnt > 0u)
298  {
299 
300  /* accumlate the results */
301  acc0 += (*pScr1++ * *pIn2);
302  acc1 += (*pScr1++ * *pIn2);
303  acc2 += (*pScr1++ * *pIn2);
304  acc3 += (*pScr1++ * *pIn2++);
305 
306  pScr1 -= 3u;
307 
308  /* Decrement the loop counter */
309  tapCnt--;
310  }
311 
312  blkCnt--;
313 
314 
315  /* Store the results in the accumulators in the destination buffer. */
316 
317 #ifndef ARM_MATH_BIG_ENDIAN
318 
319  *__SIMD32(pOut)++ =
320  __PKHBT(__SSAT((acc0 >> 15), 16), __SSAT((acc1 >> 15), 16), 16);
321  *__SIMD32(pOut)++ =
322  __PKHBT(__SSAT((acc2 >> 15), 16), __SSAT((acc3 >> 15), 16), 16);
323 
324 #else
325 
326  *__SIMD32(pOut)++ =
327  __PKHBT(__SSAT((acc1 >> 15), 16), __SSAT((acc0 >> 15), 16), 16);
328  *__SIMD32(pOut)++ =
329  __PKHBT(__SSAT((acc3 >> 15), 16), __SSAT((acc2 >> 15), 16), 16);
330 
331 #endif /* #ifndef ARM_MATH_BIG_ENDIAN */
332 
333  /* Initialization of inputB pointer */
334  pIn2 = py;
335 
336  pScratch1 += 4u;
337 
338  }
339 
340 
341  blkCnt = numPoints & 0x3;
342 
343  /* Calculate convolution for remaining samples of Bigger length sequence */
344  while(blkCnt > 0)
345  {
346  /* Initialze temporary scratch pointer as scratch1 */
347  pScr1 = pScratch1;
348 
349  /* Clear Accumlators */
350  acc0 = 0;
351 
352  tapCnt = (srcBLen) >> 1u;
353 
354  while(tapCnt > 0u)
355  {
356 
357  /* Read next two samples from scratch1 buffer */
358  x1 = *__SIMD32(pScr1)++;
359 
360  /* Read two samples from smaller buffer */
361  y1 = *__SIMD32(pIn2)++;
362 
363  acc0 = __SMLAD(x1, y1, acc0);
364 
365  /* Decrement the loop counter */
366  tapCnt--;
367  }
368 
369  tapCnt = (srcBLen) & 1u;
370 
371  /* apply same above for remaining samples of smaller length sequence */
372  while(tapCnt > 0u)
373  {
374 
375  /* accumlate the results */
376  acc0 += (*pScr1++ * *pIn2++);
377 
378  /* Decrement the loop counter */
379  tapCnt--;
380  }
381 
382  blkCnt--;
383 
384  /* The result is in 2.30 format. Convert to 1.15 with saturation.
385  ** Then store the output in the destination buffer. */
386  *pOut++ = (q15_t) (__SSAT((acc0 >> 15), 16));
387 
388  /* Initialization of inputB pointer */
389  pIn2 = py;
390 
391  pScratch1 += 1u;
392 
393  }
394  /* set status as ARM_MATH_SUCCESS */
395  status = ARM_MATH_SUCCESS;
396  }
397  /* Return to application */
398  return (status);
399 }
400 
401 #else
402 
404  q15_t * pSrcA,
405  uint32_t srcALen,
406  q15_t * pSrcB,
407  uint32_t srcBLen,
408  q15_t * pDst,
409  uint32_t firstIndex,
410  uint32_t numPoints,
411  q15_t * pScratch1,
412  q15_t * pScratch2)
413 {
414 
415  q15_t *pOut = pDst; /* output pointer */
416  q15_t *pScr1 = pScratch1; /* Temporary pointer for scratch1 */
417  q15_t *pScr2 = pScratch2; /* Temporary pointer for scratch1 */
418  q31_t acc0, acc1, acc2, acc3; /* Accumulator */
419  q15_t *pIn1; /* inputA pointer */
420  q15_t *pIn2; /* inputB pointer */
421  q15_t *px; /* Intermediate inputA pointer */
422  q15_t *py; /* Intermediate inputB pointer */
423  uint32_t j, k, blkCnt; /* loop counter */
424  arm_status status; /* Status variable */
425  uint32_t tapCnt; /* loop count */
426  q15_t x10, x11, x20, x21; /* Temporary variables to hold srcA buffer */
427  q15_t y10, y11; /* Temporary variables to hold srcB buffer */
428 
429 
430  /* Check for range of output samples to be calculated */
431  if((firstIndex + numPoints) > ((srcALen + (srcBLen - 1u))))
432  {
433  /* Set status as ARM_MATH_ARGUMENT_ERROR */
434  status = ARM_MATH_ARGUMENT_ERROR;
435  }
436  else
437  {
438 
439  /* The algorithm implementation is based on the lengths of the inputs. */
440  /* srcB is always made to slide across srcA. */
441  /* So srcBLen is always considered as shorter or equal to srcALen */
442  if(srcALen >= srcBLen)
443  {
444  /* Initialization of inputA pointer */
445  pIn1 = pSrcA;
446 
447  /* Initialization of inputB pointer */
448  pIn2 = pSrcB;
449  }
450  else
451  {
452  /* Initialization of inputA pointer */
453  pIn1 = pSrcB;
454 
455  /* Initialization of inputB pointer */
456  pIn2 = pSrcA;
457 
458  /* srcBLen is always considered as shorter or equal to srcALen */
459  j = srcBLen;
460  srcBLen = srcALen;
461  srcALen = j;
462  }
463 
464  /* Temporary pointer for scratch2 */
465  py = pScratch2;
466 
467  /* pointer to take end of scratch2 buffer */
468  pScr2 = pScratch2 + srcBLen - 1;
469 
470  /* points to smaller length sequence */
471  px = pIn2;
472 
473  /* Apply loop unrolling and do 4 Copies simultaneously. */
474  k = srcBLen >> 2u;
475 
476  /* First part of the processing with loop unrolling copies 4 data points at a time.
477  ** a second loop below copies for the remaining 1 to 3 samples. */
478  while(k > 0u)
479  {
480  /* copy second buffer in reversal manner */
481  *pScr2-- = *px++;
482  *pScr2-- = *px++;
483  *pScr2-- = *px++;
484  *pScr2-- = *px++;
485 
486  /* Decrement the loop counter */
487  k--;
488  }
489 
490  /* If the count is not a multiple of 4, copy remaining samples here.
491  ** No loop unrolling is used. */
492  k = srcBLen % 0x4u;
493 
494  while(k > 0u)
495  {
496  /* copy second buffer in reversal manner for remaining samples */
497  *pScr2-- = *px++;
498 
499  /* Decrement the loop counter */
500  k--;
501  }
502 
503  /* Initialze temporary scratch pointer */
504  pScr1 = pScratch1;
505 
506  /* Fill (srcBLen - 1u) zeros in scratch buffer */
507  arm_fill_q15(0, pScr1, (srcBLen - 1u));
508 
509  /* Update temporary scratch pointer */
510  pScr1 += (srcBLen - 1u);
511 
512  /* Copy bigger length sequence(srcALen) samples in scratch1 buffer */
513 
514 
515  /* Apply loop unrolling and do 4 Copies simultaneously. */
516  k = srcALen >> 2u;
517 
518  /* First part of the processing with loop unrolling copies 4 data points at a time.
519  ** a second loop below copies for the remaining 1 to 3 samples. */
520  while(k > 0u)
521  {
522  /* copy second buffer in reversal manner */
523  *pScr1++ = *pIn1++;
524  *pScr1++ = *pIn1++;
525  *pScr1++ = *pIn1++;
526  *pScr1++ = *pIn1++;
527 
528  /* Decrement the loop counter */
529  k--;
530  }
531 
532  /* If the count is not a multiple of 4, copy remaining samples here.
533  ** No loop unrolling is used. */
534  k = srcALen % 0x4u;
535 
536  while(k > 0u)
537  {
538  /* copy second buffer in reversal manner for remaining samples */
539  *pScr1++ = *pIn1++;
540 
541  /* Decrement the loop counter */
542  k--;
543  }
544 
545 
546  /* Apply loop unrolling and do 4 Copies simultaneously. */
547  k = (srcBLen - 1u) >> 2u;
548 
549  /* First part of the processing with loop unrolling copies 4 data points at a time.
550  ** a second loop below copies for the remaining 1 to 3 samples. */
551  while(k > 0u)
552  {
553  /* copy second buffer in reversal manner */
554  *pScr1++ = 0;
555  *pScr1++ = 0;
556  *pScr1++ = 0;
557  *pScr1++ = 0;
558 
559  /* Decrement the loop counter */
560  k--;
561  }
562 
563  /* If the count is not a multiple of 4, copy remaining samples here.
564  ** No loop unrolling is used. */
565  k = (srcBLen - 1u) % 0x4u;
566 
567  while(k > 0u)
568  {
569  /* copy second buffer in reversal manner for remaining samples */
570  *pScr1++ = 0;
571 
572  /* Decrement the loop counter */
573  k--;
574  }
575 
576 
577  /* Initialization of pIn2 pointer */
578  pIn2 = py;
579 
580  pScratch1 += firstIndex;
581 
582  pOut = pDst + firstIndex;
583 
584  /* Actual convolution process starts here */
585  blkCnt = (numPoints) >> 2;
586 
587  while(blkCnt > 0)
588  {
589  /* Initialze temporary scratch pointer as scratch1 */
590  pScr1 = pScratch1;
591 
592  /* Clear Accumlators */
593  acc0 = 0;
594  acc1 = 0;
595  acc2 = 0;
596  acc3 = 0;
597 
598  /* Read two samples from scratch1 buffer */
599  x10 = *pScr1++;
600  x11 = *pScr1++;
601 
602  /* Read next two samples from scratch1 buffer */
603  x20 = *pScr1++;
604  x21 = *pScr1++;
605 
606  tapCnt = (srcBLen) >> 2u;
607 
608  while(tapCnt > 0u)
609  {
610 
611  /* Read two samples from smaller buffer */
612  y10 = *pIn2;
613  y11 = *(pIn2 + 1u);
614 
615  /* multiply and accumlate */
616  acc0 += (q31_t) x10 *y10;
617  acc0 += (q31_t) x11 *y11;
618  acc2 += (q31_t) x20 *y10;
619  acc2 += (q31_t) x21 *y11;
620 
621  /* multiply and accumlate */
622  acc1 += (q31_t) x11 *y10;
623  acc1 += (q31_t) x20 *y11;
624 
625  /* Read next two samples from scratch1 buffer */
626  x10 = *pScr1;
627  x11 = *(pScr1 + 1u);
628 
629  /* multiply and accumlate */
630  acc3 += (q31_t) x21 *y10;
631  acc3 += (q31_t) x10 *y11;
632 
633  /* Read next two samples from scratch2 buffer */
634  y10 = *(pIn2 + 2u);
635  y11 = *(pIn2 + 3u);
636 
637  /* multiply and accumlate */
638  acc0 += (q31_t) x20 *y10;
639  acc0 += (q31_t) x21 *y11;
640  acc2 += (q31_t) x10 *y10;
641  acc2 += (q31_t) x11 *y11;
642  acc1 += (q31_t) x21 *y10;
643  acc1 += (q31_t) x10 *y11;
644 
645  /* Read next two samples from scratch1 buffer */
646  x20 = *(pScr1 + 2);
647  x21 = *(pScr1 + 3);
648 
649  /* multiply and accumlate */
650  acc3 += (q31_t) x11 *y10;
651  acc3 += (q31_t) x20 *y11;
652 
653  /* update scratch pointers */
654  pIn2 += 4u;
655  pScr1 += 4u;
656 
657  /* Decrement the loop counter */
658  tapCnt--;
659  }
660 
661  /* Update scratch pointer for remaining samples of smaller length sequence */
662  pScr1 -= 4u;
663 
664  /* apply same above for remaining samples of smaller length sequence */
665  tapCnt = (srcBLen) & 3u;
666 
667  while(tapCnt > 0u)
668  {
669  /* accumlate the results */
670  acc0 += (*pScr1++ * *pIn2);
671  acc1 += (*pScr1++ * *pIn2);
672  acc2 += (*pScr1++ * *pIn2);
673  acc3 += (*pScr1++ * *pIn2++);
674 
675  pScr1 -= 3u;
676 
677  /* Decrement the loop counter */
678  tapCnt--;
679  }
680 
681  blkCnt--;
682 
683 
684  /* Store the results in the accumulators in the destination buffer. */
685  *pOut++ = __SSAT((acc0 >> 15), 16);
686  *pOut++ = __SSAT((acc1 >> 15), 16);
687  *pOut++ = __SSAT((acc2 >> 15), 16);
688  *pOut++ = __SSAT((acc3 >> 15), 16);
689 
690  /* Initialization of inputB pointer */
691  pIn2 = py;
692 
693  pScratch1 += 4u;
694 
695  }
696 
697 
698  blkCnt = numPoints & 0x3;
699 
700  /* Calculate convolution for remaining samples of Bigger length sequence */
701  while(blkCnt > 0)
702  {
703  /* Initialze temporary scratch pointer as scratch1 */
704  pScr1 = pScratch1;
705 
706  /* Clear Accumlators */
707  acc0 = 0;
708 
709  tapCnt = (srcBLen) >> 1u;
710 
711  while(tapCnt > 0u)
712  {
713 
714  /* Read next two samples from scratch1 buffer */
715  x10 = *pScr1++;
716  x11 = *pScr1++;
717 
718  /* Read two samples from smaller buffer */
719  y10 = *pIn2++;
720  y11 = *pIn2++;
721 
722  /* multiply and accumlate */
723  acc0 += (q31_t) x10 *y10;
724  acc0 += (q31_t) x11 *y11;
725 
726  /* Decrement the loop counter */
727  tapCnt--;
728  }
729 
730  tapCnt = (srcBLen) & 1u;
731 
732  /* apply same above for remaining samples of smaller length sequence */
733  while(tapCnt > 0u)
734  {
735 
736  /* accumlate the results */
737  acc0 += (*pScr1++ * *pIn2++);
738 
739  /* Decrement the loop counter */
740  tapCnt--;
741  }
742 
743  blkCnt--;
744 
745  /* Store the result in the accumulator in the destination buffer. */
746  *pOut++ = (q15_t) (__SSAT((acc0 >> 15), 16));
747 
748  /* Initialization of inputB pointer */
749  pIn2 = py;
750 
751  pScratch1 += 1u;
752 
753  }
754 
755  /* set status as ARM_MATH_SUCCESS */
756  status = ARM_MATH_SUCCESS;
757 
758  }
759 
760  /* Return to application */
761  return (status);
762 }
763 
764 #endif /* #ifndef UNALIGNED_SUPPORT_DISABLE */
765 
void arm_copy_q15(q15_t *pSrc, q15_t *pDst, uint32_t blockSize)
Copies the elements of a Q15 vector.
Definition: arm_copy_q15.c:60
int16_t q15_t
16-bit fractional data type in 1.15 format.
Definition: arm_math.h:392
#define __SIMD32(addr)
definition to read/write two 16 bit values.
Definition: arm_math.h:445
#define _SIMD32_OFFSET(addr)
Definition: arm_math.h:447
int32_t q31_t
32-bit fractional data type in 1.31 format.
Definition: arm_math.h:397
arm_status arm_conv_partial_fast_opt_q15(q15_t *pSrcA, uint32_t srcALen, q15_t *pSrcB, uint32_t srcBLen, q15_t *pDst, uint32_t firstIndex, uint32_t numPoints, q15_t *pScratch1, q15_t *pScratch2)
Partial convolution of Q15 sequences (fast version) for Cortex-M3 and Cortex-M4.
void arm_fill_q15(q15_t value, q15_t *pDst, uint32_t blockSize)
Fills a constant value into a Q15 vector.
Definition: arm_fill_q15.c:61
arm_status
Error status returned by some functions in the library.
Definition: arm_math.h:373