87 #if (defined(ARM_MATH_CM4) || defined(ARM_MATH_CM3)) && !defined(UNALIGNED_SUPPORT_DISABLE) 94 q63_t sum, acc0, acc1, acc2, acc3;
98 q31_t x0, x1, x2, x3, c0;
99 uint32_t blockSize1, blockSize2, blockSize3, j, k, count, blkCnt;
104 if(srcALen >= srcBLen)
137 blockSize1 = srcBLen - 1u;
138 blockSize2 = srcALen - (srcBLen - 1u);
170 while((count < 4u) && (blockSize1 > 0u))
182 sum = __SMLALD(*px++, *py--, sum);
189 *pOut++ = (
q15_t) (__SSAT((sum >> 15), 16));
208 while(blockSize1 > 0u)
241 sum = __SMLALD(*px++, *py--, sum);
248 *pOut++ = (
q15_t) (__SSAT((sum >> 15), 16));
251 py = pIn2 + (count - 1u);
275 pSrc2 = pIn2 + (srcBLen - 1u);
292 blkCnt = blockSize2 >> 2u;
324 acc0 = __SMLALDX(x0, c0, acc0);
327 acc1 = __SMLALDX(x1, c0, acc1);
336 acc2 = __SMLALDX(x2, c0, acc2);
339 acc3 = __SMLALDX(x3, c0, acc3);
345 acc0 = __SMLALDX(x2, c0, acc0);
348 acc1 = __SMLALDX(x3, c0, acc1);
358 acc2 = __SMLALDX(x0, c0, acc2);
361 acc3 = __SMLALDX(x1, c0, acc3);
377 #ifdef ARM_MATH_BIG_ENDIAN 383 c0 = c0 & 0x0000FFFF;
391 acc0 = __SMLALD(x0, c0, acc0);
392 acc1 = __SMLALD(x1, c0, acc1);
393 acc2 = __SMLALDX(x1, c0, acc2);
394 acc3 = __SMLALDX(x3, c0, acc3);
410 acc0 = __SMLALDX(x0, c0, acc0);
411 acc1 = __SMLALDX(x1, c0, acc1);
412 acc2 = __SMLALDX(x3, c0, acc2);
413 acc3 = __SMLALDX(x2, c0, acc3);
428 acc0 = __SMLALDX(x0, c0, acc0);
429 acc1 = __SMLALDX(x1, c0, acc1);
430 acc2 = __SMLALDX(x3, c0, acc2);
431 acc3 = __SMLALDX(x2, c0, acc3);
435 #ifdef ARM_MATH_BIG_ENDIAN 440 c0 = c0 & 0x0000FFFF;
447 acc0 = __SMLALDX(x1, c0, acc0);
448 acc1 = __SMLALD(x2, c0, acc1);
449 acc2 = __SMLALDX(x2, c0, acc2);
450 acc3 = __SMLALDX(x3, c0, acc3);
456 #ifndef ARM_MATH_BIG_ENDIAN 459 __PKHBT(__SSAT((acc0 >> 15), 16), __SSAT((acc1 >> 15), 16), 16);
461 __PKHBT(__SSAT((acc2 >> 15), 16), __SSAT((acc3 >> 15), 16), 16);
466 __PKHBT(__SSAT((acc1 >> 15), 16), __SSAT((acc0 >> 15), 16), 16);
468 __PKHBT(__SSAT((acc3 >> 15), 16), __SSAT((acc2 >> 15), 16), 16);
485 blkCnt = blockSize2 % 0x4u;
523 *pOut++ = (
q15_t) (__SSAT(sum >> 15, 16));
560 *pOut++ = (
q15_t) (__SSAT(sum >> 15, 16));
589 blockSize3 = srcBLen - 1u;
592 pSrc1 = (pIn1 + srcALen) - (srcBLen - 1u);
596 pSrc2 = pIn2 + (srcBLen - 1u);
609 j = blockSize3 >> 2u;
611 while((j > 0u) && (blockSize3 > 0u))
617 k = blockSize3 >> 2u;
640 k = blockSize3 % 0x4u;
645 sum = __SMLALD(*px++, *py--, sum);
652 *pOut++ = (
q15_t) (__SSAT((sum >> 15), 16));
669 while(blockSize3 > 0u)
681 sum = __SMLALD(*px++, *py--, sum);
688 *pOut++ = (
q15_t) (__SSAT((sum >> 15), 16));
708 for (i = 0; i < (srcALen + srcBLen - 1); i++)
714 for (j = 0; j <= i; j++)
717 if(((i - j) < srcBLen) && (j < srcALen))
720 sum += (
q31_t) pIn1[j] * (pIn2[i - j]);
725 pDst[i] = (
q15_t) __SSAT((sum >> 15u), 16u);
int64_t q63_t
64-bit fractional data type in 1.63 format.
int16_t q15_t
16-bit fractional data type in 1.15 format.
#define __SIMD32(addr)
definition to read/write two 16 bit values.
#define _SIMD32_OFFSET(addr)
int32_t q31_t
32-bit fractional data type in 1.31 format.
void arm_conv_q15(q15_t *pSrcA, uint32_t srcALen, q15_t *pSrcB, uint32_t srcBLen, q15_t *pDst)
Convolution of Q15 sequences.