87 #if (defined(ARM_MATH_CM4) || defined(ARM_MATH_CM3)) && !defined(UNALIGNED_SUPPORT_DISABLE) 94 q63_t sum, acc0, acc1, acc2, acc3;
98 q31_t x0, x1, x2, x3, c0;
99 uint32_t j, k = 0u, count, blkCnt, outBlockSize, blockSize1, blockSize2, blockSize3;
116 if(srcALen >= srcBLen)
125 outBlockSize = (2u * srcALen) - 1u;
131 j = outBlockSize - (srcALen + (srcBLen - 1u));
152 pOut = pDst + ((srcALen + srcBLen) - 2u);
168 blockSize1 = srcBLen - 1u;
169 blockSize2 = srcALen - (srcBLen - 1u);
170 blockSize3 = blockSize1;
190 pSrc1 = pIn2 + (srcBLen - 1u);
198 while(blockSize1 > 0u)
227 sum = __SMLALD(*px++, *py++, sum);
234 *pOut = (
q15_t) (__SSAT((sum >> 15), 16));
278 blkCnt = blockSize2 >> 2u;
306 acc0 = __SMLALD(x0, c0, acc0);
309 acc1 = __SMLALD(x1, c0, acc1);
318 acc2 = __SMLALD(x2, c0, acc2);
321 acc3 = __SMLALD(x3, c0, acc3);
327 acc0 = __SMLALD(x2, c0, acc0);
330 acc1 = __SMLALD(x3, c0, acc1);
341 acc2 = __SMLALD(x0, c0, acc2);
344 acc3 = __SMLALD(x1, c0, acc3);
356 #ifdef ARM_MATH_BIG_ENDIAN 362 c0 = c0 & 0x0000FFFF;
370 acc0 = __SMLALD(x0, c0, acc0);
371 acc1 = __SMLALD(x1, c0, acc1);
372 acc2 = __SMLALDX(x1, c0, acc2);
373 acc3 = __SMLALDX(x3, c0, acc3);
389 acc0 = __SMLALD(x0, c0, acc0);
390 acc1 = __SMLALD(x1, c0, acc1);
391 acc2 = __SMLALD(x3, c0, acc2);
392 acc3 = __SMLALD(x2, c0, acc3);
407 acc0 = __SMLALD(x0, c0, acc0);
408 acc1 = __SMLALD(x1, c0, acc1);
409 acc2 = __SMLALD(x3, c0, acc2);
410 acc3 = __SMLALD(x2, c0, acc3);
415 #ifdef ARM_MATH_BIG_ENDIAN 420 c0 = c0 & 0x0000FFFF;
427 acc0 = __SMLALDX(x1, c0, acc0);
428 acc1 = __SMLALD(x2, c0, acc1);
429 acc2 = __SMLALDX(x2, c0, acc2);
430 acc3 = __SMLALDX(x3, c0, acc3);
434 *pOut = (
q15_t) (__SSAT(acc0 >> 15, 16));
438 *pOut = (
q15_t) (__SSAT(acc1 >> 15, 16));
441 *pOut = (
q15_t) (__SSAT(acc2 >> 15, 16));
444 *pOut = (
q15_t) (__SSAT(acc3 >> 15, 16));
460 blkCnt = blockSize2 % 0x4u;
475 sum += ((
q63_t) * px++ * *py++);
476 sum += ((
q63_t) * px++ * *py++);
477 sum += ((
q63_t) * px++ * *py++);
478 sum += ((
q63_t) * px++ * *py++);
491 sum += ((
q63_t) * px++ * *py++);
498 *pOut = (
q15_t) (__SSAT(sum >> 15, 16));
530 sum += ((
q63_t) * px++ * *py++);
537 *pOut = (
q15_t) (__SSAT(sum >> 15, 16));
566 count = srcBLen - 1u;
569 pSrc1 = (pIn1 + srcALen) - (srcBLen - 1u);
579 while(blockSize3 > 0u)
608 sum = __SMLALD(*px++, *py++, sum);
615 *pOut = (
q15_t) (__SSAT((sum >> 15), 16));
635 q15_t *pIn2 = pSrcB + (srcBLen - 1u);
658 tot = ((srcALen + srcBLen) - 2u);
660 if(srcALen > srcBLen)
663 j = srcALen - srcBLen;
669 else if(srcALen < srcBLen)
675 pIn2 = pSrcA + (srcALen - 1u);
691 for (i = 0u; i <= tot; i++)
697 for (j = 0u; j <= i; j++)
700 if((((i - j) < srcBLen) && (j < srcALen)))
703 sum += ((
q31_t) pIn1[j] * pIn2[-((int32_t) i - j)]);
708 *pDst-- = (
q15_t) __SSAT((sum >> 15u), 16u);
710 *pDst++ = (
q15_t) __SSAT((sum >> 15u), 16u);
int64_t q63_t
64-bit fractional data type in 1.63 format.
int16_t q15_t
16-bit fractional data type in 1.15 format.
#define __SIMD32(addr)
definition to read/write two 16 bit values.
void arm_correlate_q15(q15_t *pSrcA, uint32_t srcALen, q15_t *pSrcB, uint32_t srcBLen, q15_t *pDst)
Correlation of Q15 sequences.
#define _SIMD32_OFFSET(addr)
int32_t q31_t
32-bit fractional data type in 1.31 format.