81 #if (defined(ARM_MATH_CM4) || defined(ARM_MATH_CM3)) && !defined(UNALIGNED_SUPPORT_DISABLE) 88 q63_t sum, acc0, acc1, acc2, acc3;
92 q31_t x0, x1, x2, x3, c0;
93 uint32_t j, k, count, check, blkCnt;
94 int32_t blockSize1, blockSize2, blockSize3;
98 if((firstIndex + numPoints) > ((srcALen + (srcBLen - 1u))))
109 if(srcALen >= srcBLen)
133 check = firstIndex + numPoints;
134 blockSize3 = ((int32_t)check > (int32_t)srcALen) ? (int32_t)check - (int32_t)srcALen : 0;
135 blockSize3 = ((int32_t)firstIndex > (int32_t)srcALen - 1) ? blockSize3 - (int32_t)firstIndex + (int32_t)srcALen : blockSize3;
136 blockSize1 = (((int32_t) srcBLen - 1) - (int32_t) firstIndex);
137 blockSize1 = (blockSize1 > 0) ? ((check > (srcBLen - 1u)) ? blockSize1 :
138 (int32_t) numPoints) : 0;
139 blockSize2 = (int32_t) check - ((blockSize3 + blockSize1) +
140 (int32_t) firstIndex);
141 blockSize2 = (blockSize2 > 0) ? blockSize2 : 0;
154 pOut = pDst + firstIndex;
170 count = 1u + firstIndex;
176 pSrc2 = pIn2 + firstIndex;
188 while((count < 4u) && (blockSize1 > 0))
200 sum = __SMLALD(*px++, *py--, sum);
207 *pOut++ = (
q15_t) (__SSAT((sum >> 15), 16));
226 while(blockSize1 > 0)
259 sum = __SMLALD(*px++, *py--, sum);
266 *pOut++ = (
q15_t) (__SSAT((sum >> 15), 16));
290 if((int32_t)firstIndex - (int32_t)srcBLen + 1 > 0)
292 px = pIn1 + firstIndex - srcBLen + 1;
300 pSrc2 = pIn2 + (srcBLen - 1u);
317 blkCnt = blockSize2 >> 2u;
349 acc0 = __SMLALDX(x0, c0, acc0);
352 acc1 = __SMLALDX(x1, c0, acc1);
361 acc2 = __SMLALDX(x2, c0, acc2);
364 acc3 = __SMLALDX(x3, c0, acc3);
370 acc0 = __SMLALDX(x2, c0, acc0);
373 acc1 = __SMLALDX(x3, c0, acc1);
383 acc2 = __SMLALDX(x0, c0, acc2);
386 acc3 = __SMLALDX(x1, c0, acc3);
402 #ifdef ARM_MATH_BIG_ENDIAN 408 c0 = c0 & 0x0000FFFF;
417 acc0 = __SMLALD(x0, c0, acc0);
418 acc1 = __SMLALD(x1, c0, acc1);
419 acc2 = __SMLALDX(x1, c0, acc2);
420 acc3 = __SMLALDX(x3, c0, acc3);
436 acc0 = __SMLALDX(x0, c0, acc0);
437 acc1 = __SMLALDX(x1, c0, acc1);
438 acc2 = __SMLALDX(x3, c0, acc2);
439 acc3 = __SMLALDX(x2, c0, acc3);
454 acc0 = __SMLALDX(x0, c0, acc0);
455 acc1 = __SMLALDX(x1, c0, acc1);
456 acc2 = __SMLALDX(x3, c0, acc2);
457 acc3 = __SMLALDX(x2, c0, acc3);
461 #ifdef ARM_MATH_BIG_ENDIAN 466 c0 = c0 & 0x0000FFFF;
474 acc0 = __SMLALDX(x1, c0, acc0);
475 acc1 = __SMLALD(x2, c0, acc1);
476 acc2 = __SMLALDX(x2, c0, acc2);
477 acc3 = __SMLALDX(x3, c0, acc3);
483 #ifndef ARM_MATH_BIG_ENDIAN 486 __PKHBT(__SSAT((acc0 >> 15), 16), __SSAT((acc1 >> 15), 16), 16);
488 __PKHBT(__SSAT((acc2 >> 15), 16), __SSAT((acc3 >> 15), 16), 16);
493 __PKHBT(__SSAT((acc1 >> 15), 16), __SSAT((acc0 >> 15), 16), 16);
495 __PKHBT(__SSAT((acc3 >> 15), 16), __SSAT((acc2 >> 15), 16), 16);
512 blkCnt = (uint32_t) blockSize2 % 0x4u;
550 *pOut++ = (
q15_t) (__SSAT(sum >> 15, 16));
567 blkCnt = (uint32_t) blockSize2;
587 *pOut++ = (
q15_t) (__SSAT(sum >> 15, 16));
615 count = srcBLen - 1u;
618 pSrc1 = (pIn1 + srcALen) - (srcBLen - 1u);
622 pSrc2 = pIn2 + (srcBLen - 1u);
637 while((j > 0u) && (blockSize3 > 0))
671 sum = __SMLALD(*px++, *py--, sum);
678 *pOut++ = (
q15_t) (__SSAT((sum >> 15), 16));
698 while(blockSize3 > 0)
710 sum = __SMLALD(*px++, *py--, sum);
717 *pOut++ = (
q15_t) (__SSAT((sum >> 15), 16));
748 if((firstIndex + numPoints) > ((srcALen + (srcBLen - 1u))))
756 for (i = firstIndex; i <= (firstIndex + numPoints - 1); i++)
762 for (j = 0; j <= i; j++)
765 if(((i - j) < srcBLen) && (j < srcALen))
768 sum += ((
q31_t) pIn1[j] * (pIn2[i - j]));
773 pDst[i] = (
q15_t) __SSAT((sum >> 15u), 16u);
int64_t q63_t
64-bit fractional data type in 1.63 format.
int16_t q15_t
16-bit fractional data type in 1.15 format.
#define __SIMD32(addr)
definition to read/write two 16 bit values.
#define _SIMD32_OFFSET(addr)
int32_t q31_t
32-bit fractional data type in 1.31 format.
arm_status arm_conv_partial_q15(q15_t *pSrcA, uint32_t srcALen, q15_t *pSrcB, uint32_t srcBLen, q15_t *pDst, uint32_t firstIndex, uint32_t numPoints)
Partial convolution of Q15 sequences.
arm_status
Error status returned by some functions in the library.