73 #ifndef UNALIGNED_SUPPORT_DISABLE 88 q15_t *pScr1 = pScratch1;
89 q15_t *pScr2 = pScratch2;
90 q31_t acc0, acc1, acc2, acc3;
97 uint32_t j, k, blkCnt;
103 if((firstIndex + numPoints) > ((srcALen + (srcBLen - 1u))))
114 if(srcALen >= srcBLen)
140 pScr2 = pScratch2 + srcBLen - 1;
185 pScr1 += (srcBLen - 1u);
199 pScr1 += (srcBLen - 1u);
204 pScratch1 += firstIndex;
206 pOut = pDst + firstIndex;
212 blkCnt = (numPoints) >> 2;
231 tapCnt = (srcBLen) >> 2u;
241 acc0 = __SMLAD(x1, y1, acc0);
242 acc2 = __SMLAD(x2, y1, acc2);
245 #ifndef ARM_MATH_BIG_ENDIAN 246 x3 = __PKHBT(x2, x1, 0);
248 x3 = __PKHBT(x1, x2, 0);
252 acc1 = __SMLADX(x3, y1, acc1);
258 acc0 = __SMLAD(x2, y2, acc0);
260 acc2 = __SMLAD(x1, y2, acc2);
263 #ifndef ARM_MATH_BIG_ENDIAN 264 x3 = __PKHBT(x1, x2, 0);
266 x3 = __PKHBT(x2, x1, 0);
269 acc3 = __SMLADX(x3, y1, acc3);
270 acc1 = __SMLADX(x3, y2, acc1);
274 #ifndef ARM_MATH_BIG_ENDIAN 275 x3 = __PKHBT(x2, x1, 0);
277 x3 = __PKHBT(x1, x2, 0);
280 acc3 = __SMLADX(x3, y2, acc3);
295 tapCnt = (srcBLen) & 3u;
301 acc0 += (*pScr1++ * *pIn2);
302 acc1 += (*pScr1++ * *pIn2);
303 acc2 += (*pScr1++ * *pIn2);
304 acc3 += (*pScr1++ * *pIn2++);
317 #ifndef ARM_MATH_BIG_ENDIAN 320 __PKHBT(__SSAT((acc0 >> 15), 16), __SSAT((acc1 >> 15), 16), 16);
322 __PKHBT(__SSAT((acc2 >> 15), 16), __SSAT((acc3 >> 15), 16), 16);
327 __PKHBT(__SSAT((acc1 >> 15), 16), __SSAT((acc0 >> 15), 16), 16);
329 __PKHBT(__SSAT((acc3 >> 15), 16), __SSAT((acc2 >> 15), 16), 16);
341 blkCnt = numPoints & 0x3;
352 tapCnt = (srcBLen) >> 1u;
363 acc0 = __SMLAD(x1, y1, acc0);
369 tapCnt = (srcBLen) & 1u;
376 acc0 += (*pScr1++ * *pIn2++);
386 *pOut++ = (
q15_t) (__SSAT((acc0 >> 15), 16));
416 q15_t *pScr1 = pScratch1;
417 q15_t *pScr2 = pScratch2;
418 q31_t acc0, acc1, acc2, acc3;
423 uint32_t j, k, blkCnt;
426 q15_t x10, x11, x20, x21;
431 if((firstIndex + numPoints) > ((srcALen + (srcBLen - 1u))))
442 if(srcALen >= srcBLen)
468 pScr2 = pScratch2 + srcBLen - 1;
510 pScr1 += (srcBLen - 1u);
547 k = (srcBLen - 1u) >> 2u;
565 k = (srcBLen - 1u) % 0x4u;
580 pScratch1 += firstIndex;
582 pOut = pDst + firstIndex;
585 blkCnt = (numPoints) >> 2;
606 tapCnt = (srcBLen) >> 2u;
616 acc0 += (
q31_t) x10 *y10;
617 acc0 += (
q31_t) x11 *y11;
618 acc2 += (
q31_t) x20 *y10;
619 acc2 += (
q31_t) x21 *y11;
622 acc1 += (
q31_t) x11 *y10;
623 acc1 += (
q31_t) x20 *y11;
630 acc3 += (
q31_t) x21 *y10;
631 acc3 += (
q31_t) x10 *y11;
638 acc0 += (
q31_t) x20 *y10;
639 acc0 += (
q31_t) x21 *y11;
640 acc2 += (
q31_t) x10 *y10;
641 acc2 += (
q31_t) x11 *y11;
642 acc1 += (
q31_t) x21 *y10;
643 acc1 += (
q31_t) x10 *y11;
650 acc3 += (
q31_t) x11 *y10;
651 acc3 += (
q31_t) x20 *y11;
665 tapCnt = (srcBLen) & 3u;
670 acc0 += (*pScr1++ * *pIn2);
671 acc1 += (*pScr1++ * *pIn2);
672 acc2 += (*pScr1++ * *pIn2);
673 acc3 += (*pScr1++ * *pIn2++);
685 *pOut++ = __SSAT((acc0 >> 15), 16);
686 *pOut++ = __SSAT((acc1 >> 15), 16);
687 *pOut++ = __SSAT((acc2 >> 15), 16);
688 *pOut++ = __SSAT((acc3 >> 15), 16);
698 blkCnt = numPoints & 0x3;
709 tapCnt = (srcBLen) >> 1u;
723 acc0 += (
q31_t) x10 *y10;
724 acc0 += (
q31_t) x11 *y11;
730 tapCnt = (srcBLen) & 1u;
737 acc0 += (*pScr1++ * *pIn2++);
746 *pOut++ = (
q15_t) (__SSAT((acc0 >> 15), 16));
void arm_copy_q15(q15_t *pSrc, q15_t *pDst, uint32_t blockSize)
Copies the elements of a Q15 vector.
int16_t q15_t
16-bit fractional data type in 1.15 format.
#define __SIMD32(addr)
definition to read/write two 16 bit values.
#define _SIMD32_OFFSET(addr)
int32_t q31_t
32-bit fractional data type in 1.31 format.
arm_status arm_conv_partial_fast_opt_q15(q15_t *pSrcA, uint32_t srcALen, q15_t *pSrcB, uint32_t srcBLen, q15_t *pDst, uint32_t firstIndex, uint32_t numPoints, q15_t *pScratch1, q15_t *pScratch2)
Partial convolution of Q15 sequences (fast version) for Cortex-M3 and Cortex-M4.
void arm_fill_q15(q15_t value, q15_t *pDst, uint32_t blockSize)
Fills a constant value into a Q15 vector.
arm_status
Error status returned by some functions in the library.