STM32F769IDiscovery  1.00
uDANTE Audio Networking with STM32F7 DISCO board
arm_conv_partial_opt_q15.c
Go to the documentation of this file.
1 /* ----------------------------------------------------------------------
2 * Copyright (C) 2010-2014 ARM Limited. All rights reserved.
3 *
4 * $Date: 19. March 2015
5 * $Revision: V.1.4.5
6 *
7 * Project: CMSIS DSP Library
8 * Title: arm_conv_partial_opt_q15.c
9 *
10 * Description: Partial convolution of Q15 sequences.
11 *
12 * Target Processor: Cortex-M4/Cortex-M3
13 *
14 * Redistribution and use in source and binary forms, with or without
15 * modification, are permitted provided that the following conditions
16 * are met:
17 * - Redistributions of source code must retain the above copyright
18 * notice, this list of conditions and the following disclaimer.
19 * - Redistributions in binary form must reproduce the above copyright
20 * notice, this list of conditions and the following disclaimer in
21 * the documentation and/or other materials provided with the
22 * distribution.
23 * - Neither the name of ARM LIMITED nor the names of its contributors
24 * may be used to endorse or promote products derived from this
25 * software without specific prior written permission.
26 *
27 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
28 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
29 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
30 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
31 * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
32 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
33 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
34 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
35 * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
36 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
37 * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
38 * POSSIBILITY OF SUCH DAMAGE.
39 * -------------------------------------------------------------------- */
40 
41 #include "arm_math.h"
42 
74 #ifndef UNALIGNED_SUPPORT_DISABLE
75 
77  q15_t * pSrcA,
78  uint32_t srcALen,
79  q15_t * pSrcB,
80  uint32_t srcBLen,
81  q15_t * pDst,
82  uint32_t firstIndex,
83  uint32_t numPoints,
84  q15_t * pScratch1,
85  q15_t * pScratch2)
86 {
87 
88  q15_t *pOut = pDst; /* output pointer */
89  q15_t *pScr1 = pScratch1; /* Temporary pointer for scratch1 */
90  q15_t *pScr2 = pScratch2; /* Temporary pointer for scratch1 */
91  q63_t acc0, acc1, acc2, acc3; /* Accumulator */
92  q31_t x1, x2, x3; /* Temporary variables to hold state and coefficient values */
93  q31_t y1, y2; /* State variables */
94  q15_t *pIn1; /* inputA pointer */
95  q15_t *pIn2; /* inputB pointer */
96  q15_t *px; /* Intermediate inputA pointer */
97  q15_t *py; /* Intermediate inputB pointer */
98  uint32_t j, k, blkCnt; /* loop counter */
99  arm_status status; /* Status variable */
100  uint32_t tapCnt; /* loop count */
101 
102  /* Check for range of output samples to be calculated */
103  if((firstIndex + numPoints) > ((srcALen + (srcBLen - 1u))))
104  {
105  /* Set status as ARM_MATH_ARGUMENT_ERROR */
106  status = ARM_MATH_ARGUMENT_ERROR;
107  }
108  else
109  {
110 
111  /* The algorithm implementation is based on the lengths of the inputs. */
112  /* srcB is always made to slide across srcA. */
113  /* So srcBLen is always considered as shorter or equal to srcALen */
114  if(srcALen >= srcBLen)
115  {
116  /* Initialization of inputA pointer */
117  pIn1 = pSrcA;
118 
119  /* Initialization of inputB pointer */
120  pIn2 = pSrcB;
121  }
122  else
123  {
124  /* Initialization of inputA pointer */
125  pIn1 = pSrcB;
126 
127  /* Initialization of inputB pointer */
128  pIn2 = pSrcA;
129 
130  /* srcBLen is always considered as shorter or equal to srcALen */
131  j = srcBLen;
132  srcBLen = srcALen;
133  srcALen = j;
134  }
135 
136  /* Temporary pointer for scratch2 */
137  py = pScratch2;
138 
139  /* pointer to take end of scratch2 buffer */
140  pScr2 = pScratch2 + srcBLen - 1;
141 
142  /* points to smaller length sequence */
143  px = pIn2;
144 
145  /* Apply loop unrolling and do 4 Copies simultaneously. */
146  k = srcBLen >> 2u;
147 
148  /* First part of the processing with loop unrolling copies 4 data points at a time.
149  ** a second loop below copies for the remaining 1 to 3 samples. */
150  while(k > 0u)
151  {
152  /* copy second buffer in reversal manner */
153  *pScr2-- = *px++;
154  *pScr2-- = *px++;
155  *pScr2-- = *px++;
156  *pScr2-- = *px++;
157 
158  /* Decrement the loop counter */
159  k--;
160  }
161 
162  /* If the count is not a multiple of 4, copy remaining samples here.
163  ** No loop unrolling is used. */
164  k = srcBLen % 0x4u;
165 
166  while(k > 0u)
167  {
168  /* copy second buffer in reversal manner for remaining samples */
169  *pScr2-- = *px++;
170 
171  /* Decrement the loop counter */
172  k--;
173  }
174 
175  /* Initialze temporary scratch pointer */
176  pScr1 = pScratch1;
177 
178  /* Fill (srcBLen - 1u) zeros in scratch buffer */
179  arm_fill_q15(0, pScr1, (srcBLen - 1u));
180 
181  /* Update temporary scratch pointer */
182  pScr1 += (srcBLen - 1u);
183 
184  /* Copy bigger length sequence(srcALen) samples in scratch1 buffer */
185 
186  /* Copy (srcALen) samples in scratch buffer */
187  arm_copy_q15(pIn1, pScr1, srcALen);
188 
189  /* Update pointers */
190  pScr1 += srcALen;
191 
192  /* Fill (srcBLen - 1u) zeros at end of scratch buffer */
193  arm_fill_q15(0, pScr1, (srcBLen - 1u));
194 
195  /* Update pointer */
196  pScr1 += (srcBLen - 1u);
197 
198  /* Initialization of pIn2 pointer */
199  pIn2 = py;
200 
201  pScratch1 += firstIndex;
202 
203  pOut = pDst + firstIndex;
204 
205  /* Actual convolution process starts here */
206  blkCnt = (numPoints) >> 2;
207 
208  while(blkCnt > 0)
209  {
210  /* Initialze temporary scratch pointer as scratch1 */
211  pScr1 = pScratch1;
212 
213  /* Clear Accumlators */
214  acc0 = 0;
215  acc1 = 0;
216  acc2 = 0;
217  acc3 = 0;
218 
219  /* Read two samples from scratch1 buffer */
220  x1 = *__SIMD32(pScr1)++;
221 
222  /* Read next two samples from scratch1 buffer */
223  x2 = *__SIMD32(pScr1)++;
224 
225  tapCnt = (srcBLen) >> 2u;
226 
227  while(tapCnt > 0u)
228  {
229 
230  /* Read four samples from smaller buffer */
231  y1 = _SIMD32_OFFSET(pIn2);
232  y2 = _SIMD32_OFFSET(pIn2 + 2u);
233 
234  /* multiply and accumlate */
235  acc0 = __SMLALD(x1, y1, acc0);
236  acc2 = __SMLALD(x2, y1, acc2);
237 
238  /* pack input data */
239 #ifndef ARM_MATH_BIG_ENDIAN
240  x3 = __PKHBT(x2, x1, 0);
241 #else
242  x3 = __PKHBT(x1, x2, 0);
243 #endif
244 
245  /* multiply and accumlate */
246  acc1 = __SMLALDX(x3, y1, acc1);
247 
248  /* Read next two samples from scratch1 buffer */
249  x1 = _SIMD32_OFFSET(pScr1);
250 
251  /* multiply and accumlate */
252  acc0 = __SMLALD(x2, y2, acc0);
253  acc2 = __SMLALD(x1, y2, acc2);
254 
255  /* pack input data */
256 #ifndef ARM_MATH_BIG_ENDIAN
257  x3 = __PKHBT(x1, x2, 0);
258 #else
259  x3 = __PKHBT(x2, x1, 0);
260 #endif
261 
262  acc3 = __SMLALDX(x3, y1, acc3);
263  acc1 = __SMLALDX(x3, y2, acc1);
264 
265  x2 = _SIMD32_OFFSET(pScr1 + 2u);
266 
267 #ifndef ARM_MATH_BIG_ENDIAN
268  x3 = __PKHBT(x2, x1, 0);
269 #else
270  x3 = __PKHBT(x1, x2, 0);
271 #endif
272 
273  acc3 = __SMLALDX(x3, y2, acc3);
274 
275  /* update scratch pointers */
276  pIn2 += 4u;
277  pScr1 += 4u;
278 
279 
280  /* Decrement the loop counter */
281  tapCnt--;
282  }
283 
284  /* Update scratch pointer for remaining samples of smaller length sequence */
285  pScr1 -= 4u;
286 
287  /* apply same above for remaining samples of smaller length sequence */
288  tapCnt = (srcBLen) & 3u;
289 
290  while(tapCnt > 0u)
291  {
292  /* accumlate the results */
293  acc0 += (*pScr1++ * *pIn2);
294  acc1 += (*pScr1++ * *pIn2);
295  acc2 += (*pScr1++ * *pIn2);
296  acc3 += (*pScr1++ * *pIn2++);
297 
298  pScr1 -= 3u;
299 
300  /* Decrement the loop counter */
301  tapCnt--;
302  }
303 
304  blkCnt--;
305 
306 
307  /* Store the results in the accumulators in the destination buffer. */
308 
309 #ifndef ARM_MATH_BIG_ENDIAN
310 
311  *__SIMD32(pOut)++ =
312  __PKHBT(__SSAT((acc0 >> 15), 16), __SSAT((acc1 >> 15), 16), 16);
313  *__SIMD32(pOut)++ =
314  __PKHBT(__SSAT((acc2 >> 15), 16), __SSAT((acc3 >> 15), 16), 16);
315 
316 #else
317 
318  *__SIMD32(pOut)++ =
319  __PKHBT(__SSAT((acc1 >> 15), 16), __SSAT((acc0 >> 15), 16), 16);
320  *__SIMD32(pOut)++ =
321  __PKHBT(__SSAT((acc3 >> 15), 16), __SSAT((acc2 >> 15), 16), 16);
322 
323 #endif /* #ifndef ARM_MATH_BIG_ENDIAN */
324 
325  /* Initialization of inputB pointer */
326  pIn2 = py;
327 
328  pScratch1 += 4u;
329 
330  }
331 
332 
333  blkCnt = numPoints & 0x3;
334 
335  /* Calculate convolution for remaining samples of Bigger length sequence */
336  while(blkCnt > 0)
337  {
338  /* Initialze temporary scratch pointer as scratch1 */
339  pScr1 = pScratch1;
340 
341  /* Clear Accumlators */
342  acc0 = 0;
343 
344  tapCnt = (srcBLen) >> 1u;
345 
346  while(tapCnt > 0u)
347  {
348 
349  /* Read next two samples from scratch1 buffer */
350  x1 = *__SIMD32(pScr1)++;
351 
352  /* Read two samples from smaller buffer */
353  y1 = *__SIMD32(pIn2)++;
354 
355  acc0 = __SMLALD(x1, y1, acc0);
356 
357  /* Decrement the loop counter */
358  tapCnt--;
359  }
360 
361  tapCnt = (srcBLen) & 1u;
362 
363  /* apply same above for remaining samples of smaller length sequence */
364  while(tapCnt > 0u)
365  {
366 
367  /* accumlate the results */
368  acc0 += (*pScr1++ * *pIn2++);
369 
370  /* Decrement the loop counter */
371  tapCnt--;
372  }
373 
374  blkCnt--;
375 
376  /* Store the result in the accumulator in the destination buffer. */
377  *pOut++ = (q15_t) (__SSAT((acc0 >> 15), 16));
378 
379  /* Initialization of inputB pointer */
380  pIn2 = py;
381 
382  pScratch1 += 1u;
383 
384  }
385 
386  /* set status as ARM_MATH_SUCCESS */
387  status = ARM_MATH_SUCCESS;
388 
389  }
390 
391  /* Return to application */
392  return (status);
393 }
394 
395 #else
396 
398  q15_t * pSrcA,
399  uint32_t srcALen,
400  q15_t * pSrcB,
401  uint32_t srcBLen,
402  q15_t * pDst,
403  uint32_t firstIndex,
404  uint32_t numPoints,
405  q15_t * pScratch1,
406  q15_t * pScratch2)
407 {
408 
409  q15_t *pOut = pDst; /* output pointer */
410  q15_t *pScr1 = pScratch1; /* Temporary pointer for scratch1 */
411  q15_t *pScr2 = pScratch2; /* Temporary pointer for scratch1 */
412  q63_t acc0, acc1, acc2, acc3; /* Accumulator */
413  q15_t *pIn1; /* inputA pointer */
414  q15_t *pIn2; /* inputB pointer */
415  q15_t *px; /* Intermediate inputA pointer */
416  q15_t *py; /* Intermediate inputB pointer */
417  uint32_t j, k, blkCnt; /* loop counter */
418  arm_status status; /* Status variable */
419  uint32_t tapCnt; /* loop count */
420  q15_t x10, x11, x20, x21; /* Temporary variables to hold srcA buffer */
421  q15_t y10, y11; /* Temporary variables to hold srcB buffer */
422 
423 
424  /* Check for range of output samples to be calculated */
425  if((firstIndex + numPoints) > ((srcALen + (srcBLen - 1u))))
426  {
427  /* Set status as ARM_MATH_ARGUMENT_ERROR */
428  status = ARM_MATH_ARGUMENT_ERROR;
429  }
430  else
431  {
432 
433  /* The algorithm implementation is based on the lengths of the inputs. */
434  /* srcB is always made to slide across srcA. */
435  /* So srcBLen is always considered as shorter or equal to srcALen */
436  if(srcALen >= srcBLen)
437  {
438  /* Initialization of inputA pointer */
439  pIn1 = pSrcA;
440 
441  /* Initialization of inputB pointer */
442  pIn2 = pSrcB;
443  }
444  else
445  {
446  /* Initialization of inputA pointer */
447  pIn1 = pSrcB;
448 
449  /* Initialization of inputB pointer */
450  pIn2 = pSrcA;
451 
452  /* srcBLen is always considered as shorter or equal to srcALen */
453  j = srcBLen;
454  srcBLen = srcALen;
455  srcALen = j;
456  }
457 
458  /* Temporary pointer for scratch2 */
459  py = pScratch2;
460 
461  /* pointer to take end of scratch2 buffer */
462  pScr2 = pScratch2 + srcBLen - 1;
463 
464  /* points to smaller length sequence */
465  px = pIn2;
466 
467  /* Apply loop unrolling and do 4 Copies simultaneously. */
468  k = srcBLen >> 2u;
469 
470  /* First part of the processing with loop unrolling copies 4 data points at a time.
471  ** a second loop below copies for the remaining 1 to 3 samples. */
472  while(k > 0u)
473  {
474  /* copy second buffer in reversal manner */
475  *pScr2-- = *px++;
476  *pScr2-- = *px++;
477  *pScr2-- = *px++;
478  *pScr2-- = *px++;
479 
480  /* Decrement the loop counter */
481  k--;
482  }
483 
484  /* If the count is not a multiple of 4, copy remaining samples here.
485  ** No loop unrolling is used. */
486  k = srcBLen % 0x4u;
487 
488  while(k > 0u)
489  {
490  /* copy second buffer in reversal manner for remaining samples */
491  *pScr2-- = *px++;
492 
493  /* Decrement the loop counter */
494  k--;
495  }
496 
497  /* Initialze temporary scratch pointer */
498  pScr1 = pScratch1;
499 
500  /* Fill (srcBLen - 1u) zeros in scratch buffer */
501  arm_fill_q15(0, pScr1, (srcBLen - 1u));
502 
503  /* Update temporary scratch pointer */
504  pScr1 += (srcBLen - 1u);
505 
506  /* Copy bigger length sequence(srcALen) samples in scratch1 buffer */
507 
508 
509  /* Apply loop unrolling and do 4 Copies simultaneously. */
510  k = srcALen >> 2u;
511 
512  /* First part of the processing with loop unrolling copies 4 data points at a time.
513  ** a second loop below copies for the remaining 1 to 3 samples. */
514  while(k > 0u)
515  {
516  /* copy second buffer in reversal manner */
517  *pScr1++ = *pIn1++;
518  *pScr1++ = *pIn1++;
519  *pScr1++ = *pIn1++;
520  *pScr1++ = *pIn1++;
521 
522  /* Decrement the loop counter */
523  k--;
524  }
525 
526  /* If the count is not a multiple of 4, copy remaining samples here.
527  ** No loop unrolling is used. */
528  k = srcALen % 0x4u;
529 
530  while(k > 0u)
531  {
532  /* copy second buffer in reversal manner for remaining samples */
533  *pScr1++ = *pIn1++;
534 
535  /* Decrement the loop counter */
536  k--;
537  }
538 
539 
540  /* Apply loop unrolling and do 4 Copies simultaneously. */
541  k = (srcBLen - 1u) >> 2u;
542 
543  /* First part of the processing with loop unrolling copies 4 data points at a time.
544  ** a second loop below copies for the remaining 1 to 3 samples. */
545  while(k > 0u)
546  {
547  /* copy second buffer in reversal manner */
548  *pScr1++ = 0;
549  *pScr1++ = 0;
550  *pScr1++ = 0;
551  *pScr1++ = 0;
552 
553  /* Decrement the loop counter */
554  k--;
555  }
556 
557  /* If the count is not a multiple of 4, copy remaining samples here.
558  ** No loop unrolling is used. */
559  k = (srcBLen - 1u) % 0x4u;
560 
561  while(k > 0u)
562  {
563  /* copy second buffer in reversal manner for remaining samples */
564  *pScr1++ = 0;
565 
566  /* Decrement the loop counter */
567  k--;
568  }
569 
570 
571  /* Initialization of pIn2 pointer */
572  pIn2 = py;
573 
574  pScratch1 += firstIndex;
575 
576  pOut = pDst + firstIndex;
577 
578  /* Actual convolution process starts here */
579  blkCnt = (numPoints) >> 2;
580 
581  while(blkCnt > 0)
582  {
583  /* Initialze temporary scratch pointer as scratch1 */
584  pScr1 = pScratch1;
585 
586  /* Clear Accumlators */
587  acc0 = 0;
588  acc1 = 0;
589  acc2 = 0;
590  acc3 = 0;
591 
592  /* Read two samples from scratch1 buffer */
593  x10 = *pScr1++;
594  x11 = *pScr1++;
595 
596  /* Read next two samples from scratch1 buffer */
597  x20 = *pScr1++;
598  x21 = *pScr1++;
599 
600  tapCnt = (srcBLen) >> 2u;
601 
602  while(tapCnt > 0u)
603  {
604 
605  /* Read two samples from smaller buffer */
606  y10 = *pIn2;
607  y11 = *(pIn2 + 1u);
608 
609  /* multiply and accumlate */
610  acc0 += (q63_t) x10 *y10;
611  acc0 += (q63_t) x11 *y11;
612  acc2 += (q63_t) x20 *y10;
613  acc2 += (q63_t) x21 *y11;
614 
615  /* multiply and accumlate */
616  acc1 += (q63_t) x11 *y10;
617  acc1 += (q63_t) x20 *y11;
618 
619  /* Read next two samples from scratch1 buffer */
620  x10 = *pScr1;
621  x11 = *(pScr1 + 1u);
622 
623  /* multiply and accumlate */
624  acc3 += (q63_t) x21 *y10;
625  acc3 += (q63_t) x10 *y11;
626 
627  /* Read next two samples from scratch2 buffer */
628  y10 = *(pIn2 + 2u);
629  y11 = *(pIn2 + 3u);
630 
631  /* multiply and accumlate */
632  acc0 += (q63_t) x20 *y10;
633  acc0 += (q63_t) x21 *y11;
634  acc2 += (q63_t) x10 *y10;
635  acc2 += (q63_t) x11 *y11;
636  acc1 += (q63_t) x21 *y10;
637  acc1 += (q63_t) x10 *y11;
638 
639  /* Read next two samples from scratch1 buffer */
640  x20 = *(pScr1 + 2);
641  x21 = *(pScr1 + 3);
642 
643  /* multiply and accumlate */
644  acc3 += (q63_t) x11 *y10;
645  acc3 += (q63_t) x20 *y11;
646 
647  /* update scratch pointers */
648  pIn2 += 4u;
649  pScr1 += 4u;
650 
651  /* Decrement the loop counter */
652  tapCnt--;
653  }
654 
655  /* Update scratch pointer for remaining samples of smaller length sequence */
656  pScr1 -= 4u;
657 
658  /* apply same above for remaining samples of smaller length sequence */
659  tapCnt = (srcBLen) & 3u;
660 
661  while(tapCnt > 0u)
662  {
663  /* accumlate the results */
664  acc0 += (*pScr1++ * *pIn2);
665  acc1 += (*pScr1++ * *pIn2);
666  acc2 += (*pScr1++ * *pIn2);
667  acc3 += (*pScr1++ * *pIn2++);
668 
669  pScr1 -= 3u;
670 
671  /* Decrement the loop counter */
672  tapCnt--;
673  }
674 
675  blkCnt--;
676 
677 
678  /* Store the results in the accumulators in the destination buffer. */
679  *pOut++ = __SSAT((acc0 >> 15), 16);
680  *pOut++ = __SSAT((acc1 >> 15), 16);
681  *pOut++ = __SSAT((acc2 >> 15), 16);
682  *pOut++ = __SSAT((acc3 >> 15), 16);
683 
684 
685  /* Initialization of inputB pointer */
686  pIn2 = py;
687 
688  pScratch1 += 4u;
689 
690  }
691 
692 
693  blkCnt = numPoints & 0x3;
694 
695  /* Calculate convolution for remaining samples of Bigger length sequence */
696  while(blkCnt > 0)
697  {
698  /* Initialze temporary scratch pointer as scratch1 */
699  pScr1 = pScratch1;
700 
701  /* Clear Accumlators */
702  acc0 = 0;
703 
704  tapCnt = (srcBLen) >> 1u;
705 
706  while(tapCnt > 0u)
707  {
708 
709  /* Read next two samples from scratch1 buffer */
710  x10 = *pScr1++;
711  x11 = *pScr1++;
712 
713  /* Read two samples from smaller buffer */
714  y10 = *pIn2++;
715  y11 = *pIn2++;
716 
717  /* multiply and accumlate */
718  acc0 += (q63_t) x10 *y10;
719  acc0 += (q63_t) x11 *y11;
720 
721  /* Decrement the loop counter */
722  tapCnt--;
723  }
724 
725  tapCnt = (srcBLen) & 1u;
726 
727  /* apply same above for remaining samples of smaller length sequence */
728  while(tapCnt > 0u)
729  {
730 
731  /* accumlate the results */
732  acc0 += (*pScr1++ * *pIn2++);
733 
734  /* Decrement the loop counter */
735  tapCnt--;
736  }
737 
738  blkCnt--;
739 
740  /* Store the result in the accumulator in the destination buffer. */
741  *pOut++ = (q15_t) (__SSAT((acc0 >> 15), 16));
742 
743 
744  /* Initialization of inputB pointer */
745  pIn2 = py;
746 
747  pScratch1 += 1u;
748 
749  }
750 
751  /* set status as ARM_MATH_SUCCESS */
752  status = ARM_MATH_SUCCESS;
753 
754  }
755 
756  /* Return to application */
757  return (status);
758 }
759 
760 #endif /* #ifndef UNALIGNED_SUPPORT_DISABLE */
761 
762 
int64_t q63_t
64-bit fractional data type in 1.63 format.
Definition: arm_math.h:402
arm_status arm_conv_partial_opt_q15(q15_t *pSrcA, uint32_t srcALen, q15_t *pSrcB, uint32_t srcBLen, q15_t *pDst, uint32_t firstIndex, uint32_t numPoints, q15_t *pScratch1, q15_t *pScratch2)
Partial convolution of Q15 sequences.
void arm_copy_q15(q15_t *pSrc, q15_t *pDst, uint32_t blockSize)
Copies the elements of a Q15 vector.
Definition: arm_copy_q15.c:60
int16_t q15_t
16-bit fractional data type in 1.15 format.
Definition: arm_math.h:392
#define __SIMD32(addr)
definition to read/write two 16 bit values.
Definition: arm_math.h:445
#define _SIMD32_OFFSET(addr)
Definition: arm_math.h:447
int32_t q31_t
32-bit fractional data type in 1.31 format.
Definition: arm_math.h:397
void arm_fill_q15(q15_t value, q15_t *pDst, uint32_t blockSize)
Fills a constant value into a Q15 vector.
Definition: arm_fill_q15.c:61
arm_status
Error status returned by some functions in the library.
Definition: arm_math.h:373