74 #ifndef UNALIGNED_SUPPORT_DISABLE 91 uint32_t j, k, blkCnt, tapCnt;
94 q31_t acc0, acc1, acc2, acc3;
98 q7_t out0, out1, out2, out3;
101 if((firstIndex + numPoints) > ((srcALen + (srcBLen - 1u))))
112 if(srcALen >= srcBLen)
138 px = pIn2 + srcBLen - 1;
182 pScr1 += (srcBLen - 1u);
193 x4 = (
q15_t) * pIn1++;
195 x4 = (
q15_t) * pIn1++;
197 x4 = (
q15_t) * pIn1++;
199 x4 = (
q15_t) * pIn1++;
213 x4 = (
q15_t) * pIn1++;
224 pScr1 += (srcBLen - 1u);
235 pOut = pDst + firstIndex;
237 pScratch1 += firstIndex;
240 blkCnt = (numPoints) >> 2;
260 tapCnt = (srcBLen) >> 2u;
269 acc0 = __SMLAD(x1, y1, acc0);
270 acc2 = __SMLAD(x2, y1, acc2);
273 #ifndef ARM_MATH_BIG_ENDIAN 274 x3 = __PKHBT(x2, x1, 0);
276 x3 = __PKHBT(x1, x2, 0);
280 acc1 = __SMLADX(x3, y1, acc1);
286 #ifndef ARM_MATH_BIG_ENDIAN 287 x3 = __PKHBT(x1, x2, 0);
289 x3 = __PKHBT(x2, x1, 0);
292 acc3 = __SMLADX(x3, y1, acc3);
297 acc0 = __SMLAD(x2, y1, acc0);
299 acc2 = __SMLAD(x1, y1, acc2);
301 acc1 = __SMLADX(x3, y1, acc1);
305 #ifndef ARM_MATH_BIG_ENDIAN 306 x3 = __PKHBT(x2, x1, 0);
308 x3 = __PKHBT(x1, x2, 0);
311 acc3 = __SMLADX(x3, y1, acc3);
327 tapCnt = (srcBLen) & 3u;
333 acc0 += (*pScr1++ * *pScr2);
334 acc1 += (*pScr1++ * *pScr2);
335 acc2 += (*pScr1++ * *pScr2);
336 acc3 += (*pScr1++ * *pScr2++);
347 out0 = (
q7_t) (__SSAT(acc0 >> 7u, 8));
348 out1 = (
q7_t) (__SSAT(acc1 >> 7u, 8));
349 out2 = (
q7_t) (__SSAT(acc2 >> 7u, 8));
350 out3 = (
q7_t) (__SSAT(acc3 >> 7u, 8));
361 blkCnt = (numPoints) & 0x3;
372 tapCnt = (srcBLen) >> 1u;
383 acc0 = __SMLAD(x1, y1, acc0);
389 tapCnt = (srcBLen) & 1u;
396 acc0 += (*pScr1++ * *pScr2++);
405 *pOut++ = (
q7_t) (__SSAT(acc0 >> 7u, 8));
438 q15_t *pScr2, *pScr1;
441 uint32_t j, k, blkCnt, tapCnt;
444 q31_t acc0, acc1, acc2, acc3;
447 q15_t x10, x11, x20, x21;
451 if((firstIndex + numPoints) > ((srcALen + (srcBLen - 1u))))
462 if(srcALen >= srcBLen)
488 px = pIn2 + srcBLen - 1;
532 pScr1 += (srcBLen - 1u);
543 x4 = (
q15_t) * pIn1++;
545 x4 = (
q15_t) * pIn1++;
547 x4 = (
q15_t) * pIn1++;
549 x4 = (
q15_t) * pIn1++;
563 x4 = (
q15_t) * pIn1++;
571 k = (srcBLen - 1u) >> 2u;
589 k = (srcBLen - 1u) % 0x4u;
609 pOut = pDst + firstIndex;
611 pScratch1 += firstIndex;
614 blkCnt = (numPoints) >> 2;
636 tapCnt = (srcBLen) >> 2u;
646 acc0 += (
q31_t) x10 *y10;
647 acc0 += (
q31_t) x11 *y11;
648 acc2 += (
q31_t) x20 *y10;
649 acc2 += (
q31_t) x21 *y11;
652 acc1 += (
q31_t) x11 *y10;
653 acc1 += (
q31_t) x20 *y11;
660 acc3 += (
q31_t) x21 *y10;
661 acc3 += (
q31_t) x10 *y11;
668 acc0 += (
q31_t) x20 *y10;
669 acc0 += (
q31_t) x21 *y11;
670 acc2 += (
q31_t) x10 *y10;
671 acc2 += (
q31_t) x11 *y11;
672 acc1 += (
q31_t) x21 *y10;
673 acc1 += (
q31_t) x10 *y11;
680 acc3 += (
q31_t) x11 *y10;
681 acc3 += (
q31_t) x20 *y11;
699 tapCnt = (srcBLen) & 3u;
705 acc0 += (*pScr1++ * *pScr2);
706 acc1 += (*pScr1++ * *pScr2);
707 acc2 += (*pScr1++ * *pScr2);
708 acc3 += (*pScr1++ * *pScr2++);
719 *pOut++ = (
q7_t) (__SSAT(acc0 >> 7u, 8));
720 *pOut++ = (
q7_t) (__SSAT(acc1 >> 7u, 8));
721 *pOut++ = (
q7_t) (__SSAT(acc2 >> 7u, 8));
722 *pOut++ = (
q7_t) (__SSAT(acc3 >> 7u, 8));
731 blkCnt = (numPoints) & 0x3;
742 tapCnt = (srcBLen) >> 1u;
756 acc0 += (
q31_t) x10 *y10;
757 acc0 += (
q31_t) x11 *y11;
763 tapCnt = (srcBLen) & 1u;
770 acc0 += (*pScr1++ * *pScr2++);
779 *pOut++ = (
q7_t) (__SSAT(acc0 >> 7u, 8));
int8_t q7_t
8-bit fractional data type in 1.7 format.
#define __PACKq7(v0, v1, v2, v3)
definition to pack four 8 bit values.
int16_t q15_t
16-bit fractional data type in 1.15 format.
#define __SIMD32(addr)
definition to read/write two 16 bit values.
#define _SIMD32_OFFSET(addr)
int32_t q31_t
32-bit fractional data type in 1.31 format.
arm_status arm_conv_partial_opt_q7(q7_t *pSrcA, uint32_t srcALen, q7_t *pSrcB, uint32_t srcBLen, q7_t *pDst, uint32_t firstIndex, uint32_t numPoints, q15_t *pScratch1, q15_t *pScratch2)
Partial convolution of Q7 sequences.
void arm_fill_q15(q15_t value, q15_t *pDst, uint32_t blockSize)
Fills a constant value into a Q15 vector.
arm_status
Error status returned by some functions in the library.