STM32F769IDiscovery  1.00
uDANTE Audio Networking with STM32F7 DISCO board
arm_conv_partial_opt_q7.c
Go to the documentation of this file.
1 /* ----------------------------------------------------------------------
2 * Copyright (C) 2010-2014 ARM Limited. All rights reserved.
3 *
4 * $Date: 19. March 2015
5 * $Revision: V.1.4.5
6 *
7 * Project: CMSIS DSP Library
8 * Title: arm_conv_partial_opt_q7.c
9 *
10 * Description: Partial convolution of Q7 sequences.
11 *
12 * Target Processor: Cortex-M4/Cortex-M3
13 *
14 * Redistribution and use in source and binary forms, with or without
15 * modification, are permitted provided that the following conditions
16 * are met:
17 * - Redistributions of source code must retain the above copyright
18 * notice, this list of conditions and the following disclaimer.
19 * - Redistributions in binary form must reproduce the above copyright
20 * notice, this list of conditions and the following disclaimer in
21 * the documentation and/or other materials provided with the
22 * distribution.
23 * - Neither the name of ARM LIMITED nor the names of its contributors
24 * may be used to endorse or promote products derived from this
25 * software without specific prior written permission.
26 *
27 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
28 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
29 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
30 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
31 * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
32 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
33 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
34 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
35 * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
36 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
37 * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
38 * POSSIBILITY OF SUCH DAMAGE.
39 * -------------------------------------------------------------------- */
40 
41 #include "arm_math.h"
42 
74 #ifndef UNALIGNED_SUPPORT_DISABLE
75 
77  q7_t * pSrcA,
78  uint32_t srcALen,
79  q7_t * pSrcB,
80  uint32_t srcBLen,
81  q7_t * pDst,
82  uint32_t firstIndex,
83  uint32_t numPoints,
84  q15_t * pScratch1,
85  q15_t * pScratch2)
86 {
87 
88  q15_t *pScr2, *pScr1; /* Intermediate pointers for scratch pointers */
89  q15_t x4; /* Temporary input variable */
90  q7_t *pIn1, *pIn2; /* inputA and inputB pointer */
91  uint32_t j, k, blkCnt, tapCnt; /* loop counter */
92  q7_t *px; /* Temporary input1 pointer */
93  q15_t *py; /* Temporary input2 pointer */
94  q31_t acc0, acc1, acc2, acc3; /* Accumulator */
95  q31_t x1, x2, x3, y1; /* Temporary input variables */
96  arm_status status;
97  q7_t *pOut = pDst; /* output pointer */
98  q7_t out0, out1, out2, out3; /* temporary variables */
99 
100  /* Check for range of output samples to be calculated */
101  if((firstIndex + numPoints) > ((srcALen + (srcBLen - 1u))))
102  {
103  /* Set status as ARM_MATH_ARGUMENT_ERROR */
104  status = ARM_MATH_ARGUMENT_ERROR;
105  }
106  else
107  {
108 
109  /* The algorithm implementation is based on the lengths of the inputs. */
110  /* srcB is always made to slide across srcA. */
111  /* So srcBLen is always considered as shorter or equal to srcALen */
112  if(srcALen >= srcBLen)
113  {
114  /* Initialization of inputA pointer */
115  pIn1 = pSrcA;
116 
117  /* Initialization of inputB pointer */
118  pIn2 = pSrcB;
119  }
120  else
121  {
122  /* Initialization of inputA pointer */
123  pIn1 = pSrcB;
124 
125  /* Initialization of inputB pointer */
126  pIn2 = pSrcA;
127 
128  /* srcBLen is always considered as shorter or equal to srcALen */
129  j = srcBLen;
130  srcBLen = srcALen;
131  srcALen = j;
132  }
133 
134  /* pointer to take end of scratch2 buffer */
135  pScr2 = pScratch2;
136 
137  /* points to smaller length sequence */
138  px = pIn2 + srcBLen - 1;
139 
140  /* Apply loop unrolling and do 4 Copies simultaneously. */
141  k = srcBLen >> 2u;
142 
143  /* First part of the processing with loop unrolling copies 4 data points at a time.
144  ** a second loop below copies for the remaining 1 to 3 samples. */
145  while(k > 0u)
146  {
147  /* copy second buffer in reversal manner */
148  x4 = (q15_t) * px--;
149  *pScr2++ = x4;
150  x4 = (q15_t) * px--;
151  *pScr2++ = x4;
152  x4 = (q15_t) * px--;
153  *pScr2++ = x4;
154  x4 = (q15_t) * px--;
155  *pScr2++ = x4;
156 
157  /* Decrement the loop counter */
158  k--;
159  }
160 
161  /* If the count is not a multiple of 4, copy remaining samples here.
162  ** No loop unrolling is used. */
163  k = srcBLen % 0x4u;
164 
165  while(k > 0u)
166  {
167  /* copy second buffer in reversal manner for remaining samples */
168  x4 = (q15_t) * px--;
169  *pScr2++ = x4;
170 
171  /* Decrement the loop counter */
172  k--;
173  }
174 
175  /* Initialze temporary scratch pointer */
176  pScr1 = pScratch1;
177 
178  /* Fill (srcBLen - 1u) zeros in scratch buffer */
179  arm_fill_q15(0, pScr1, (srcBLen - 1u));
180 
181  /* Update temporary scratch pointer */
182  pScr1 += (srcBLen - 1u);
183 
184  /* Copy (srcALen) samples in scratch buffer */
185  /* Apply loop unrolling and do 4 Copies simultaneously. */
186  k = srcALen >> 2u;
187 
188  /* First part of the processing with loop unrolling copies 4 data points at a time.
189  ** a second loop below copies for the remaining 1 to 3 samples. */
190  while(k > 0u)
191  {
192  /* copy second buffer in reversal manner */
193  x4 = (q15_t) * pIn1++;
194  *pScr1++ = x4;
195  x4 = (q15_t) * pIn1++;
196  *pScr1++ = x4;
197  x4 = (q15_t) * pIn1++;
198  *pScr1++ = x4;
199  x4 = (q15_t) * pIn1++;
200  *pScr1++ = x4;
201 
202  /* Decrement the loop counter */
203  k--;
204  }
205 
206  /* If the count is not a multiple of 4, copy remaining samples here.
207  ** No loop unrolling is used. */
208  k = srcALen % 0x4u;
209 
210  while(k > 0u)
211  {
212  /* copy second buffer in reversal manner for remaining samples */
213  x4 = (q15_t) * pIn1++;
214  *pScr1++ = x4;
215 
216  /* Decrement the loop counter */
217  k--;
218  }
219 
220  /* Fill (srcBLen - 1u) zeros at end of scratch buffer */
221  arm_fill_q15(0, pScr1, (srcBLen - 1u));
222 
223  /* Update pointer */
224  pScr1 += (srcBLen - 1u);
225 
226 
227  /* Temporary pointer for scratch2 */
228  py = pScratch2;
229 
230  /* Initialization of pIn2 pointer */
231  pIn2 = (q7_t *) py;
232 
233  pScr2 = py;
234 
235  pOut = pDst + firstIndex;
236 
237  pScratch1 += firstIndex;
238 
239  /* Actual convolution process starts here */
240  blkCnt = (numPoints) >> 2;
241 
242 
243  while(blkCnt > 0)
244  {
245  /* Initialze temporary scratch pointer as scratch1 */
246  pScr1 = pScratch1;
247 
248  /* Clear Accumlators */
249  acc0 = 0;
250  acc1 = 0;
251  acc2 = 0;
252  acc3 = 0;
253 
254  /* Read two samples from scratch1 buffer */
255  x1 = *__SIMD32(pScr1)++;
256 
257  /* Read next two samples from scratch1 buffer */
258  x2 = *__SIMD32(pScr1)++;
259 
260  tapCnt = (srcBLen) >> 2u;
261 
262  while(tapCnt > 0u)
263  {
264 
265  /* Read four samples from smaller buffer */
266  y1 = _SIMD32_OFFSET(pScr2);
267 
268  /* multiply and accumlate */
269  acc0 = __SMLAD(x1, y1, acc0);
270  acc2 = __SMLAD(x2, y1, acc2);
271 
272  /* pack input data */
273 #ifndef ARM_MATH_BIG_ENDIAN
274  x3 = __PKHBT(x2, x1, 0);
275 #else
276  x3 = __PKHBT(x1, x2, 0);
277 #endif
278 
279  /* multiply and accumlate */
280  acc1 = __SMLADX(x3, y1, acc1);
281 
282  /* Read next two samples from scratch1 buffer */
283  x1 = *__SIMD32(pScr1)++;
284 
285  /* pack input data */
286 #ifndef ARM_MATH_BIG_ENDIAN
287  x3 = __PKHBT(x1, x2, 0);
288 #else
289  x3 = __PKHBT(x2, x1, 0);
290 #endif
291 
292  acc3 = __SMLADX(x3, y1, acc3);
293 
294  /* Read four samples from smaller buffer */
295  y1 = _SIMD32_OFFSET(pScr2 + 2u);
296 
297  acc0 = __SMLAD(x2, y1, acc0);
298 
299  acc2 = __SMLAD(x1, y1, acc2);
300 
301  acc1 = __SMLADX(x3, y1, acc1);
302 
303  x2 = *__SIMD32(pScr1)++;
304 
305 #ifndef ARM_MATH_BIG_ENDIAN
306  x3 = __PKHBT(x2, x1, 0);
307 #else
308  x3 = __PKHBT(x1, x2, 0);
309 #endif
310 
311  acc3 = __SMLADX(x3, y1, acc3);
312 
313  pScr2 += 4u;
314 
315 
316  /* Decrement the loop counter */
317  tapCnt--;
318  }
319 
320 
321 
322  /* Update scratch pointer for remaining samples of smaller length sequence */
323  pScr1 -= 4u;
324 
325 
326  /* apply same above for remaining samples of smaller length sequence */
327  tapCnt = (srcBLen) & 3u;
328 
329  while(tapCnt > 0u)
330  {
331 
332  /* accumlate the results */
333  acc0 += (*pScr1++ * *pScr2);
334  acc1 += (*pScr1++ * *pScr2);
335  acc2 += (*pScr1++ * *pScr2);
336  acc3 += (*pScr1++ * *pScr2++);
337 
338  pScr1 -= 3u;
339 
340  /* Decrement the loop counter */
341  tapCnt--;
342  }
343 
344  blkCnt--;
345 
346  /* Store the result in the accumulator in the destination buffer. */
347  out0 = (q7_t) (__SSAT(acc0 >> 7u, 8));
348  out1 = (q7_t) (__SSAT(acc1 >> 7u, 8));
349  out2 = (q7_t) (__SSAT(acc2 >> 7u, 8));
350  out3 = (q7_t) (__SSAT(acc3 >> 7u, 8));
351 
352  *__SIMD32(pOut)++ = __PACKq7(out0, out1, out2, out3);
353 
354  /* Initialization of inputB pointer */
355  pScr2 = py;
356 
357  pScratch1 += 4u;
358 
359  }
360 
361  blkCnt = (numPoints) & 0x3;
362 
363  /* Calculate convolution for remaining samples of Bigger length sequence */
364  while(blkCnt > 0)
365  {
366  /* Initialze temporary scratch pointer as scratch1 */
367  pScr1 = pScratch1;
368 
369  /* Clear Accumlators */
370  acc0 = 0;
371 
372  tapCnt = (srcBLen) >> 1u;
373 
374  while(tapCnt > 0u)
375  {
376 
377  /* Read next two samples from scratch1 buffer */
378  x1 = *__SIMD32(pScr1)++;
379 
380  /* Read two samples from smaller buffer */
381  y1 = *__SIMD32(pScr2)++;
382 
383  acc0 = __SMLAD(x1, y1, acc0);
384 
385  /* Decrement the loop counter */
386  tapCnt--;
387  }
388 
389  tapCnt = (srcBLen) & 1u;
390 
391  /* apply same above for remaining samples of smaller length sequence */
392  while(tapCnt > 0u)
393  {
394 
395  /* accumlate the results */
396  acc0 += (*pScr1++ * *pScr2++);
397 
398  /* Decrement the loop counter */
399  tapCnt--;
400  }
401 
402  blkCnt--;
403 
404  /* Store the result in the accumulator in the destination buffer. */
405  *pOut++ = (q7_t) (__SSAT(acc0 >> 7u, 8));
406 
407  /* Initialization of inputB pointer */
408  pScr2 = py;
409 
410  pScratch1 += 1u;
411 
412  }
413 
414  /* set status as ARM_MATH_SUCCESS */
415  status = ARM_MATH_SUCCESS;
416 
417 
418  }
419 
420  return (status);
421 
422 }
423 
424 #else
425 
427  q7_t * pSrcA,
428  uint32_t srcALen,
429  q7_t * pSrcB,
430  uint32_t srcBLen,
431  q7_t * pDst,
432  uint32_t firstIndex,
433  uint32_t numPoints,
434  q15_t * pScratch1,
435  q15_t * pScratch2)
436 {
437 
438  q15_t *pScr2, *pScr1; /* Intermediate pointers for scratch pointers */
439  q15_t x4; /* Temporary input variable */
440  q7_t *pIn1, *pIn2; /* inputA and inputB pointer */
441  uint32_t j, k, blkCnt, tapCnt; /* loop counter */
442  q7_t *px; /* Temporary input1 pointer */
443  q15_t *py; /* Temporary input2 pointer */
444  q31_t acc0, acc1, acc2, acc3; /* Accumulator */
445  arm_status status;
446  q7_t *pOut = pDst; /* output pointer */
447  q15_t x10, x11, x20, x21; /* Temporary input variables */
448  q15_t y10, y11; /* Temporary input variables */
449 
450  /* Check for range of output samples to be calculated */
451  if((firstIndex + numPoints) > ((srcALen + (srcBLen - 1u))))
452  {
453  /* Set status as ARM_MATH_ARGUMENT_ERROR */
454  status = ARM_MATH_ARGUMENT_ERROR;
455  }
456  else
457  {
458 
459  /* The algorithm implementation is based on the lengths of the inputs. */
460  /* srcB is always made to slide across srcA. */
461  /* So srcBLen is always considered as shorter or equal to srcALen */
462  if(srcALen >= srcBLen)
463  {
464  /* Initialization of inputA pointer */
465  pIn1 = pSrcA;
466 
467  /* Initialization of inputB pointer */
468  pIn2 = pSrcB;
469  }
470  else
471  {
472  /* Initialization of inputA pointer */
473  pIn1 = pSrcB;
474 
475  /* Initialization of inputB pointer */
476  pIn2 = pSrcA;
477 
478  /* srcBLen is always considered as shorter or equal to srcALen */
479  j = srcBLen;
480  srcBLen = srcALen;
481  srcALen = j;
482  }
483 
484  /* pointer to take end of scratch2 buffer */
485  pScr2 = pScratch2;
486 
487  /* points to smaller length sequence */
488  px = pIn2 + srcBLen - 1;
489 
490  /* Apply loop unrolling and do 4 Copies simultaneously. */
491  k = srcBLen >> 2u;
492 
493  /* First part of the processing with loop unrolling copies 4 data points at a time.
494  ** a second loop below copies for the remaining 1 to 3 samples. */
495  while(k > 0u)
496  {
497  /* copy second buffer in reversal manner */
498  x4 = (q15_t) * px--;
499  *pScr2++ = x4;
500  x4 = (q15_t) * px--;
501  *pScr2++ = x4;
502  x4 = (q15_t) * px--;
503  *pScr2++ = x4;
504  x4 = (q15_t) * px--;
505  *pScr2++ = x4;
506 
507  /* Decrement the loop counter */
508  k--;
509  }
510 
511  /* If the count is not a multiple of 4, copy remaining samples here.
512  ** No loop unrolling is used. */
513  k = srcBLen % 0x4u;
514 
515  while(k > 0u)
516  {
517  /* copy second buffer in reversal manner for remaining samples */
518  x4 = (q15_t) * px--;
519  *pScr2++ = x4;
520 
521  /* Decrement the loop counter */
522  k--;
523  }
524 
525  /* Initialze temporary scratch pointer */
526  pScr1 = pScratch1;
527 
528  /* Fill (srcBLen - 1u) zeros in scratch buffer */
529  arm_fill_q15(0, pScr1, (srcBLen - 1u));
530 
531  /* Update temporary scratch pointer */
532  pScr1 += (srcBLen - 1u);
533 
534  /* Copy (srcALen) samples in scratch buffer */
535  /* Apply loop unrolling and do 4 Copies simultaneously. */
536  k = srcALen >> 2u;
537 
538  /* First part of the processing with loop unrolling copies 4 data points at a time.
539  ** a second loop below copies for the remaining 1 to 3 samples. */
540  while(k > 0u)
541  {
542  /* copy second buffer in reversal manner */
543  x4 = (q15_t) * pIn1++;
544  *pScr1++ = x4;
545  x4 = (q15_t) * pIn1++;
546  *pScr1++ = x4;
547  x4 = (q15_t) * pIn1++;
548  *pScr1++ = x4;
549  x4 = (q15_t) * pIn1++;
550  *pScr1++ = x4;
551 
552  /* Decrement the loop counter */
553  k--;
554  }
555 
556  /* If the count is not a multiple of 4, copy remaining samples here.
557  ** No loop unrolling is used. */
558  k = srcALen % 0x4u;
559 
560  while(k > 0u)
561  {
562  /* copy second buffer in reversal manner for remaining samples */
563  x4 = (q15_t) * pIn1++;
564  *pScr1++ = x4;
565 
566  /* Decrement the loop counter */
567  k--;
568  }
569 
570  /* Apply loop unrolling and do 4 Copies simultaneously. */
571  k = (srcBLen - 1u) >> 2u;
572 
573  /* First part of the processing with loop unrolling copies 4 data points at a time.
574  ** a second loop below copies for the remaining 1 to 3 samples. */
575  while(k > 0u)
576  {
577  /* copy second buffer in reversal manner */
578  *pScr1++ = 0;
579  *pScr1++ = 0;
580  *pScr1++ = 0;
581  *pScr1++ = 0;
582 
583  /* Decrement the loop counter */
584  k--;
585  }
586 
587  /* If the count is not a multiple of 4, copy remaining samples here.
588  ** No loop unrolling is used. */
589  k = (srcBLen - 1u) % 0x4u;
590 
591  while(k > 0u)
592  {
593  /* copy second buffer in reversal manner for remaining samples */
594  *pScr1++ = 0;
595 
596  /* Decrement the loop counter */
597  k--;
598  }
599 
600 
601  /* Temporary pointer for scratch2 */
602  py = pScratch2;
603 
604  /* Initialization of pIn2 pointer */
605  pIn2 = (q7_t *) py;
606 
607  pScr2 = py;
608 
609  pOut = pDst + firstIndex;
610 
611  pScratch1 += firstIndex;
612 
613  /* Actual convolution process starts here */
614  blkCnt = (numPoints) >> 2;
615 
616 
617  while(blkCnt > 0)
618  {
619  /* Initialze temporary scratch pointer as scratch1 */
620  pScr1 = pScratch1;
621 
622  /* Clear Accumlators */
623  acc0 = 0;
624  acc1 = 0;
625  acc2 = 0;
626  acc3 = 0;
627 
628  /* Read two samples from scratch1 buffer */
629  x10 = *pScr1++;
630  x11 = *pScr1++;
631 
632  /* Read next two samples from scratch1 buffer */
633  x20 = *pScr1++;
634  x21 = *pScr1++;
635 
636  tapCnt = (srcBLen) >> 2u;
637 
638  while(tapCnt > 0u)
639  {
640 
641  /* Read four samples from smaller buffer */
642  y10 = *pScr2;
643  y11 = *(pScr2 + 1u);
644 
645  /* multiply and accumlate */
646  acc0 += (q31_t) x10 *y10;
647  acc0 += (q31_t) x11 *y11;
648  acc2 += (q31_t) x20 *y10;
649  acc2 += (q31_t) x21 *y11;
650 
651 
652  acc1 += (q31_t) x11 *y10;
653  acc1 += (q31_t) x20 *y11;
654 
655  /* Read next two samples from scratch1 buffer */
656  x10 = *pScr1;
657  x11 = *(pScr1 + 1u);
658 
659  /* multiply and accumlate */
660  acc3 += (q31_t) x21 *y10;
661  acc3 += (q31_t) x10 *y11;
662 
663  /* Read next two samples from scratch2 buffer */
664  y10 = *(pScr2 + 2u);
665  y11 = *(pScr2 + 3u);
666 
667  /* multiply and accumlate */
668  acc0 += (q31_t) x20 *y10;
669  acc0 += (q31_t) x21 *y11;
670  acc2 += (q31_t) x10 *y10;
671  acc2 += (q31_t) x11 *y11;
672  acc1 += (q31_t) x21 *y10;
673  acc1 += (q31_t) x10 *y11;
674 
675  /* Read next two samples from scratch1 buffer */
676  x20 = *(pScr1 + 2);
677  x21 = *(pScr1 + 3);
678 
679  /* multiply and accumlate */
680  acc3 += (q31_t) x11 *y10;
681  acc3 += (q31_t) x20 *y11;
682 
683  /* update scratch pointers */
684 
685  pScr1 += 4u;
686  pScr2 += 4u;
687 
688  /* Decrement the loop counter */
689  tapCnt--;
690  }
691 
692 
693 
694  /* Update scratch pointer for remaining samples of smaller length sequence */
695  pScr1 -= 4u;
696 
697 
698  /* apply same above for remaining samples of smaller length sequence */
699  tapCnt = (srcBLen) & 3u;
700 
701  while(tapCnt > 0u)
702  {
703 
704  /* accumlate the results */
705  acc0 += (*pScr1++ * *pScr2);
706  acc1 += (*pScr1++ * *pScr2);
707  acc2 += (*pScr1++ * *pScr2);
708  acc3 += (*pScr1++ * *pScr2++);
709 
710  pScr1 -= 3u;
711 
712  /* Decrement the loop counter */
713  tapCnt--;
714  }
715 
716  blkCnt--;
717 
718  /* Store the result in the accumulator in the destination buffer. */
719  *pOut++ = (q7_t) (__SSAT(acc0 >> 7u, 8));
720  *pOut++ = (q7_t) (__SSAT(acc1 >> 7u, 8));
721  *pOut++ = (q7_t) (__SSAT(acc2 >> 7u, 8));
722  *pOut++ = (q7_t) (__SSAT(acc3 >> 7u, 8));
723 
724  /* Initialization of inputB pointer */
725  pScr2 = py;
726 
727  pScratch1 += 4u;
728 
729  }
730 
731  blkCnt = (numPoints) & 0x3;
732 
733  /* Calculate convolution for remaining samples of Bigger length sequence */
734  while(blkCnt > 0)
735  {
736  /* Initialze temporary scratch pointer as scratch1 */
737  pScr1 = pScratch1;
738 
739  /* Clear Accumlators */
740  acc0 = 0;
741 
742  tapCnt = (srcBLen) >> 1u;
743 
744  while(tapCnt > 0u)
745  {
746 
747  /* Read next two samples from scratch1 buffer */
748  x10 = *pScr1++;
749  x11 = *pScr1++;
750 
751  /* Read two samples from smaller buffer */
752  y10 = *pScr2++;
753  y11 = *pScr2++;
754 
755  /* multiply and accumlate */
756  acc0 += (q31_t) x10 *y10;
757  acc0 += (q31_t) x11 *y11;
758 
759  /* Decrement the loop counter */
760  tapCnt--;
761  }
762 
763  tapCnt = (srcBLen) & 1u;
764 
765  /* apply same above for remaining samples of smaller length sequence */
766  while(tapCnt > 0u)
767  {
768 
769  /* accumlate the results */
770  acc0 += (*pScr1++ * *pScr2++);
771 
772  /* Decrement the loop counter */
773  tapCnt--;
774  }
775 
776  blkCnt--;
777 
778  /* Store the result in the accumulator in the destination buffer. */
779  *pOut++ = (q7_t) (__SSAT(acc0 >> 7u, 8));
780 
781  /* Initialization of inputB pointer */
782  pScr2 = py;
783 
784  pScratch1 += 1u;
785 
786  }
787 
788  /* set status as ARM_MATH_SUCCESS */
789  status = ARM_MATH_SUCCESS;
790 
791  }
792 
793  return (status);
794 
795 }
796 
797 #endif /* #ifndef UNALIGNED_SUPPORT_DISABLE */
798 
799 
800 
int8_t q7_t
8-bit fractional data type in 1.7 format.
Definition: arm_math.h:387
#define __PACKq7(v0, v1, v2, v3)
definition to pack four 8 bit values.
Definition: arm_math.h:467
int16_t q15_t
16-bit fractional data type in 1.15 format.
Definition: arm_math.h:392
#define __SIMD32(addr)
definition to read/write two 16 bit values.
Definition: arm_math.h:445
#define _SIMD32_OFFSET(addr)
Definition: arm_math.h:447
int32_t q31_t
32-bit fractional data type in 1.31 format.
Definition: arm_math.h:397
arm_status arm_conv_partial_opt_q7(q7_t *pSrcA, uint32_t srcALen, q7_t *pSrcB, uint32_t srcBLen, q7_t *pDst, uint32_t firstIndex, uint32_t numPoints, q15_t *pScratch1, q15_t *pScratch2)
Partial convolution of Q7 sequences.
void arm_fill_q15(q15_t value, q15_t *pDst, uint32_t blockSize)
Fills a constant value into a Q15 vector.
Definition: arm_fill_q15.c:61
arm_status
Error status returned by some functions in the library.
Definition: arm_math.h:373