74 #ifndef UNALIGNED_SUPPORT_DISABLE 89 q15_t *pScr1 = pScratch1;
90 q15_t *pScr2 = pScratch2;
91 q63_t acc0, acc1, acc2, acc3;
98 uint32_t j, k, blkCnt;
103 if((firstIndex + numPoints) > ((srcALen + (srcBLen - 1u))))
114 if(srcALen >= srcBLen)
140 pScr2 = pScratch2 + srcBLen - 1;
182 pScr1 += (srcBLen - 1u);
196 pScr1 += (srcBLen - 1u);
201 pScratch1 += firstIndex;
203 pOut = pDst + firstIndex;
206 blkCnt = (numPoints) >> 2;
225 tapCnt = (srcBLen) >> 2u;
235 acc0 = __SMLALD(x1, y1, acc0);
236 acc2 = __SMLALD(x2, y1, acc2);
239 #ifndef ARM_MATH_BIG_ENDIAN 240 x3 = __PKHBT(x2, x1, 0);
242 x3 = __PKHBT(x1, x2, 0);
246 acc1 = __SMLALDX(x3, y1, acc1);
252 acc0 = __SMLALD(x2, y2, acc0);
253 acc2 = __SMLALD(x1, y2, acc2);
256 #ifndef ARM_MATH_BIG_ENDIAN 257 x3 = __PKHBT(x1, x2, 0);
259 x3 = __PKHBT(x2, x1, 0);
262 acc3 = __SMLALDX(x3, y1, acc3);
263 acc1 = __SMLALDX(x3, y2, acc1);
267 #ifndef ARM_MATH_BIG_ENDIAN 268 x3 = __PKHBT(x2, x1, 0);
270 x3 = __PKHBT(x1, x2, 0);
273 acc3 = __SMLALDX(x3, y2, acc3);
288 tapCnt = (srcBLen) & 3u;
293 acc0 += (*pScr1++ * *pIn2);
294 acc1 += (*pScr1++ * *pIn2);
295 acc2 += (*pScr1++ * *pIn2);
296 acc3 += (*pScr1++ * *pIn2++);
309 #ifndef ARM_MATH_BIG_ENDIAN 312 __PKHBT(__SSAT((acc0 >> 15), 16), __SSAT((acc1 >> 15), 16), 16);
314 __PKHBT(__SSAT((acc2 >> 15), 16), __SSAT((acc3 >> 15), 16), 16);
319 __PKHBT(__SSAT((acc1 >> 15), 16), __SSAT((acc0 >> 15), 16), 16);
321 __PKHBT(__SSAT((acc3 >> 15), 16), __SSAT((acc2 >> 15), 16), 16);
333 blkCnt = numPoints & 0x3;
344 tapCnt = (srcBLen) >> 1u;
355 acc0 = __SMLALD(x1, y1, acc0);
361 tapCnt = (srcBLen) & 1u;
368 acc0 += (*pScr1++ * *pIn2++);
377 *pOut++ = (
q15_t) (__SSAT((acc0 >> 15), 16));
410 q15_t *pScr1 = pScratch1;
411 q15_t *pScr2 = pScratch2;
412 q63_t acc0, acc1, acc2, acc3;
417 uint32_t j, k, blkCnt;
420 q15_t x10, x11, x20, x21;
425 if((firstIndex + numPoints) > ((srcALen + (srcBLen - 1u))))
436 if(srcALen >= srcBLen)
462 pScr2 = pScratch2 + srcBLen - 1;
504 pScr1 += (srcBLen - 1u);
541 k = (srcBLen - 1u) >> 2u;
559 k = (srcBLen - 1u) % 0x4u;
574 pScratch1 += firstIndex;
576 pOut = pDst + firstIndex;
579 blkCnt = (numPoints) >> 2;
600 tapCnt = (srcBLen) >> 2u;
610 acc0 += (
q63_t) x10 *y10;
611 acc0 += (
q63_t) x11 *y11;
612 acc2 += (
q63_t) x20 *y10;
613 acc2 += (
q63_t) x21 *y11;
616 acc1 += (
q63_t) x11 *y10;
617 acc1 += (
q63_t) x20 *y11;
624 acc3 += (
q63_t) x21 *y10;
625 acc3 += (
q63_t) x10 *y11;
632 acc0 += (
q63_t) x20 *y10;
633 acc0 += (
q63_t) x21 *y11;
634 acc2 += (
q63_t) x10 *y10;
635 acc2 += (
q63_t) x11 *y11;
636 acc1 += (
q63_t) x21 *y10;
637 acc1 += (
q63_t) x10 *y11;
644 acc3 += (
q63_t) x11 *y10;
645 acc3 += (
q63_t) x20 *y11;
659 tapCnt = (srcBLen) & 3u;
664 acc0 += (*pScr1++ * *pIn2);
665 acc1 += (*pScr1++ * *pIn2);
666 acc2 += (*pScr1++ * *pIn2);
667 acc3 += (*pScr1++ * *pIn2++);
679 *pOut++ = __SSAT((acc0 >> 15), 16);
680 *pOut++ = __SSAT((acc1 >> 15), 16);
681 *pOut++ = __SSAT((acc2 >> 15), 16);
682 *pOut++ = __SSAT((acc3 >> 15), 16);
693 blkCnt = numPoints & 0x3;
704 tapCnt = (srcBLen) >> 1u;
718 acc0 += (
q63_t) x10 *y10;
719 acc0 += (
q63_t) x11 *y11;
725 tapCnt = (srcBLen) & 1u;
732 acc0 += (*pScr1++ * *pIn2++);
741 *pOut++ = (
q15_t) (__SSAT((acc0 >> 15), 16));
int64_t q63_t
64-bit fractional data type in 1.63 format.
arm_status arm_conv_partial_opt_q15(q15_t *pSrcA, uint32_t srcALen, q15_t *pSrcB, uint32_t srcBLen, q15_t *pDst, uint32_t firstIndex, uint32_t numPoints, q15_t *pScratch1, q15_t *pScratch2)
Partial convolution of Q15 sequences.
void arm_copy_q15(q15_t *pSrc, q15_t *pDst, uint32_t blockSize)
Copies the elements of a Q15 vector.
int16_t q15_t
16-bit fractional data type in 1.15 format.
#define __SIMD32(addr)
definition to read/write two 16 bit values.
#define _SIMD32_OFFSET(addr)
int32_t q31_t
32-bit fractional data type in 1.31 format.
void arm_fill_q15(q15_t value, q15_t *pDst, uint32_t blockSize)
Fills a constant value into a Q15 vector.
arm_status
Error status returned by some functions in the library.