92 q31_t acc0, acc1, acc2, acc3;
96 q15_t *pScr1 = pScratch1;
97 q15_t *pScr2 = pScratch2;
102 uint32_t j, k, blkCnt;
104 #ifdef UNALIGNED_SUPPORT_DISABLE 113 if(srcALen >= srcBLen)
136 pScr2 = pScratch2 + srcBLen - 1;
181 pScr1 += (srcBLen - 1u);
185 #ifndef UNALIGNED_SUPPORT_DISABLE 228 #ifndef UNALIGNED_SUPPORT_DISABLE 234 pScr1 += (srcBLen - 1u);
239 k = (srcBLen - 1u) >> 2u;
257 k = (srcBLen - 1u) % 0x4u;
281 blkCnt = (srcALen + srcBLen - 1u) >> 2;
300 tapCnt = (srcBLen) >> 2u;
305 #ifndef UNALIGNED_SUPPORT_DISABLE 312 acc0 = __SMLAD(x1, y1, acc0);
313 acc2 = __SMLAD(x2, y1, acc2);
316 #ifndef ARM_MATH_BIG_ENDIAN 317 x3 = __PKHBT(x2, x1, 0);
319 x3 = __PKHBT(x1, x2, 0);
323 acc1 = __SMLADX(x3, y1, acc1);
329 acc0 = __SMLAD(x2, y2, acc0);
330 acc2 = __SMLAD(x1, y2, acc2);
333 #ifndef ARM_MATH_BIG_ENDIAN 334 x3 = __PKHBT(x1, x2, 0);
336 x3 = __PKHBT(x2, x1, 0);
339 acc3 = __SMLADX(x3, y1, acc3);
340 acc1 = __SMLADX(x3, y2, acc1);
344 #ifndef ARM_MATH_BIG_ENDIAN 345 x3 = __PKHBT(x2, x1, 0);
347 x3 = __PKHBT(x1, x2, 0);
350 acc3 = __SMLADX(x3, y2, acc3);
358 #ifndef ARM_MATH_BIG_ENDIAN 359 y1 = __PKHBT(a, b, 16);
361 y1 = __PKHBT(b, a, 16);
366 #ifndef ARM_MATH_BIG_ENDIAN 367 y2 = __PKHBT(a, b, 16);
369 y2 = __PKHBT(b, a, 16);
372 acc0 = __SMLAD(x1, y1, acc0);
374 acc2 = __SMLAD(x2, y1, acc2);
376 #ifndef ARM_MATH_BIG_ENDIAN 377 x3 = __PKHBT(x2, x1, 0);
379 x3 = __PKHBT(x1, x2, 0);
382 acc1 = __SMLADX(x3, y1, acc1);
387 #ifndef ARM_MATH_BIG_ENDIAN 388 x1 = __PKHBT(a, b, 16);
390 x1 = __PKHBT(b, a, 16);
393 acc0 = __SMLAD(x2, y2, acc0);
395 acc2 = __SMLAD(x1, y2, acc2);
397 #ifndef ARM_MATH_BIG_ENDIAN 398 x3 = __PKHBT(x1, x2, 0);
400 x3 = __PKHBT(x2, x1, 0);
403 acc3 = __SMLADX(x3, y1, acc3);
405 acc1 = __SMLADX(x3, y2, acc1);
410 #ifndef ARM_MATH_BIG_ENDIAN 411 x2 = __PKHBT(a, b, 16);
413 x2 = __PKHBT(b, a, 16);
416 #ifndef ARM_MATH_BIG_ENDIAN 417 x3 = __PKHBT(x2, x1, 0);
419 x3 = __PKHBT(x1, x2, 0);
422 acc3 = __SMLADX(x3, y2, acc3);
439 tapCnt = (srcBLen) & 3u;
445 acc0 += (*pScr1++ * *pIn2);
446 acc1 += (*pScr1++ * *pIn2);
447 acc2 += (*pScr1++ * *pIn2);
448 acc3 += (*pScr1++ * *pIn2++);
461 #ifndef ARM_MATH_BIG_ENDIAN 464 __PKHBT(__SSAT((acc0 >> 15), 16), __SSAT((acc1 >> 15), 16), 16);
467 __PKHBT(__SSAT((acc2 >> 15), 16), __SSAT((acc3 >> 15), 16), 16);
473 __PKHBT(__SSAT((acc1 >> 15), 16), __SSAT((acc0 >> 15), 16), 16);
476 __PKHBT(__SSAT((acc3 >> 15), 16), __SSAT((acc2 >> 15), 16), 16);
490 blkCnt = (srcALen + srcBLen - 1u) & 0x3;
501 tapCnt = (srcBLen) >> 1u;
506 acc0 += (*pScr1++ * *pIn2++);
507 acc0 += (*pScr1++ * *pIn2++);
513 tapCnt = (srcBLen) & 1u;
520 acc0 += (*pScr1++ * *pIn2++);
530 *pOut++ = (
q15_t) (__SSAT((acc0 >> 15), 16));
void arm_copy_q15(q15_t *pSrc, q15_t *pDst, uint32_t blockSize)
Copies the elements of a Q15 vector.
int16_t q15_t
16-bit fractional data type in 1.15 format.
void arm_conv_fast_opt_q15(q15_t *pSrcA, uint32_t srcALen, q15_t *pSrcB, uint32_t srcBLen, q15_t *pDst, q15_t *pScratch1, q15_t *pScratch2)
Convolution of Q15 sequences (fast version) for Cortex-M3 and Cortex-M4.
#define __SIMD32(addr)
definition to read/write two 16 bit values.
#define _SIMD32_OFFSET(addr)
int32_t q31_t
32-bit fractional data type in 1.31 format.
void arm_fill_q15(q15_t value, q15_t *pDst, uint32_t blockSize)
Fills a constant value into a Q15 vector.