84 #ifndef UNALIGNED_SUPPORT_DISABLE 89 q31_t sum, acc0, acc1, acc2, acc3;
93 q31_t x0, x1, x2, x3, c0;
94 uint32_t j, k = 0u, count, blkCnt, outBlockSize, blockSize1, blockSize2, blockSize3;
111 if(srcALen >= srcBLen)
120 outBlockSize = (2u * srcALen) - 1u;
126 j = outBlockSize - (srcALen + (srcBLen - 1u));
147 pOut = pDst + ((srcALen + srcBLen) - 2u);
163 blockSize1 = srcBLen - 1u;
164 blockSize2 = srcALen - (srcBLen - 1u);
165 blockSize3 = blockSize1;
185 pSrc1 = pIn2 + (srcBLen - 1u);
193 while(blockSize1 > 0u)
222 sum = __SMLAD(*px++, *py++, sum);
229 *pOut = (
q15_t) (sum >> 15);
273 blkCnt = blockSize2 >> 2u;
301 acc0 = __SMLAD(x0, c0, acc0);
304 acc1 = __SMLAD(x1, c0, acc1);
313 acc2 = __SMLAD(x2, c0, acc2);
316 acc3 = __SMLAD(x3, c0, acc3);
322 acc0 = __SMLAD(x2, c0, acc0);
325 acc1 = __SMLAD(x3, c0, acc1);
335 acc2 = __SMLAD(x0, c0, acc2);
338 acc3 = __SMLAD(x1, c0, acc3);
353 #ifdef ARM_MATH_BIG_ENDIAN 359 c0 = c0 & 0x0000FFFF;
368 acc0 = __SMLAD(x0, c0, acc0);
369 acc1 = __SMLAD(x1, c0, acc1);
370 acc2 = __SMLADX(x1, c0, acc2);
371 acc3 = __SMLADX(x3, c0, acc3);
387 acc0 = __SMLAD(x0, c0, acc0);
388 acc1 = __SMLAD(x1, c0, acc1);
389 acc2 = __SMLAD(x3, c0, acc2);
390 acc3 = __SMLAD(x2, c0, acc3);
405 acc0 = __SMLAD(x0, c0, acc0);
406 acc1 = __SMLAD(x1, c0, acc1);
407 acc2 = __SMLAD(x3, c0, acc2);
408 acc3 = __SMLAD(x2, c0, acc3);
412 #ifdef ARM_MATH_BIG_ENDIAN 417 c0 = c0 & 0x0000FFFF;
425 acc0 = __SMLADX(x1, c0, acc0);
426 acc1 = __SMLAD(x2, c0, acc1);
427 acc2 = __SMLADX(x2, c0, acc2);
428 acc3 = __SMLADX(x3, c0, acc3);
432 *pOut = (
q15_t) (acc0 >> 15);
436 *pOut = (
q15_t) (acc1 >> 15);
439 *pOut = (
q15_t) (acc2 >> 15);
442 *pOut = (
q15_t) (acc3 >> 15);
459 blkCnt = blockSize2 % 0x4u;
474 sum += ((
q31_t) * px++ * *py++);
475 sum += ((
q31_t) * px++ * *py++);
476 sum += ((
q31_t) * px++ * *py++);
477 sum += ((
q31_t) * px++ * *py++);
490 sum += ((
q31_t) * px++ * *py++);
497 *pOut = (
q15_t) (sum >> 15);
529 sum += ((
q31_t) * px++ * *py++);
536 *pOut = (
q15_t) (sum >> 15);
565 count = srcBLen - 1u;
568 pSrc1 = (pIn1 + srcALen) - (srcBLen - 1u);
578 while(blockSize3 > 0u)
607 sum = __SMLAD(*px++, *py++, sum);
614 *pOut = (
q15_t) (sum >> 15);
634 q31_t sum, acc0, acc1, acc2, acc3;
638 q31_t x0, x1, x2, x3, c0;
639 uint32_t j, k = 0u, count, blkCnt, outBlockSize, blockSize1, blockSize2, blockSize3;
657 if(srcALen >= srcBLen)
666 outBlockSize = (2u * srcALen) - 1u;
672 j = outBlockSize - (srcALen + (srcBLen - 1u));
693 pOut = pDst + ((srcALen + srcBLen) - 2u);
709 blockSize1 = srcBLen - 1u;
710 blockSize2 = srcALen - (srcBLen - 1u);
711 blockSize3 = blockSize1;
731 pSrc1 = pIn2 + (srcBLen - 1u);
739 while(blockSize1 > 0u)
752 sum += ((
q31_t) * px++ * *py++);
753 sum += ((
q31_t) * px++ * *py++);
754 sum += ((
q31_t) * px++ * *py++);
755 sum += ((
q31_t) * px++ * *py++);
769 sum += ((
q31_t) * px++ * *py++);
776 *pOut = (
q15_t) (sum >> 15);
820 blkCnt = blockSize2 >> 2u;
834 #ifndef ARM_MATH_BIG_ENDIAN 836 x0 = __PKHBT(a, b, 16);
838 x1 = __PKHBT(b, a, 16);
842 x0 = __PKHBT(b, a, 16);
844 x1 = __PKHBT(a, b, 16);
862 #ifndef ARM_MATH_BIG_ENDIAN 864 c0 = __PKHBT(a, b, 16);
868 c0 = __PKHBT(b, a, 16);
873 acc0 = __SMLAD(x0, c0, acc0);
876 acc1 = __SMLAD(x1, c0, acc1);
882 #ifndef ARM_MATH_BIG_ENDIAN 884 x2 = __PKHBT(a, b, 16);
886 x3 = __PKHBT(b, a, 16);
890 x2 = __PKHBT(b, a, 16);
892 x3 = __PKHBT(a, b, 16);
897 acc2 = __SMLAD(x2, c0, acc2);
900 acc3 = __SMLAD(x3, c0, acc3);
908 #ifndef ARM_MATH_BIG_ENDIAN 910 c0 = __PKHBT(a, b, 16);
914 c0 = __PKHBT(b, a, 16);
919 acc0 = __SMLAD(x2, c0, acc0);
922 acc1 = __SMLAD(x3, c0, acc1);
928 #ifndef ARM_MATH_BIG_ENDIAN 930 x0 = __PKHBT(a, b, 16);
932 x1 = __PKHBT(b, a, 16);
936 x0 = __PKHBT(b, a, 16);
938 x1 = __PKHBT(a, b, 16);
945 acc2 = __SMLAD(x0, c0, acc2);
948 acc3 = __SMLAD(x1, c0, acc3);
963 #ifdef ARM_MATH_BIG_ENDIAN 969 c0 = c0 & 0x0000FFFF;
979 #ifndef ARM_MATH_BIG_ENDIAN 981 x3 = __PKHBT(a, b, 16);
985 x3 = __PKHBT(b, a, 16);
992 acc0 = __SMLAD(x0, c0, acc0);
993 acc1 = __SMLAD(x1, c0, acc1);
994 acc2 = __SMLADX(x1, c0, acc2);
995 acc3 = __SMLADX(x3, c0, acc3);
1004 #ifndef ARM_MATH_BIG_ENDIAN 1006 c0 = __PKHBT(a, b, 16);
1010 c0 = __PKHBT(b, a, 16);
1018 #ifndef ARM_MATH_BIG_ENDIAN 1020 x3 = __PKHBT(a, b, 16);
1022 x2 = __PKHBT(b, a, 16);
1026 x3 = __PKHBT(b, a, 16);
1028 x2 = __PKHBT(a, b, 16);
1035 acc0 = __SMLAD(x0, c0, acc0);
1036 acc1 = __SMLAD(x1, c0, acc1);
1037 acc2 = __SMLAD(x3, c0, acc2);
1038 acc3 = __SMLAD(x2, c0, acc3);
1047 #ifndef ARM_MATH_BIG_ENDIAN 1049 c0 = __PKHBT(a, b, 16);
1053 c0 = __PKHBT(b, a, 16);
1063 #ifndef ARM_MATH_BIG_ENDIAN 1065 x3 = __PKHBT(a, b, 16);
1067 x2 = __PKHBT(b, a, 16);
1071 x3 = __PKHBT(b, a, 16);
1073 x2 = __PKHBT(a, b, 16);
1078 acc0 = __SMLAD(x0, c0, acc0);
1079 acc1 = __SMLAD(x1, c0, acc1);
1080 acc2 = __SMLAD(x3, c0, acc2);
1081 acc3 = __SMLAD(x2, c0, acc3);
1085 #ifdef ARM_MATH_BIG_ENDIAN 1090 c0 = c0 & 0x0000FFFF;
1096 #ifndef ARM_MATH_BIG_ENDIAN 1098 x3 = __PKHBT(a, b, 16);
1102 x3 = __PKHBT(b, a, 16);
1109 acc0 = __SMLADX(x1, c0, acc0);
1110 acc1 = __SMLAD(x2, c0, acc1);
1111 acc2 = __SMLADX(x2, c0, acc2);
1112 acc3 = __SMLADX(x3, c0, acc3);
1116 *pOut = (
q15_t) (acc0 >> 15);
1120 *pOut = (
q15_t) (acc1 >> 15);
1123 *pOut = (
q15_t) (acc2 >> 15);
1126 *pOut = (
q15_t) (acc3 >> 15);
1143 blkCnt = blockSize2 % 0x4u;
1158 sum += ((
q31_t) * px++ * *py++);
1159 sum += ((
q31_t) * px++ * *py++);
1160 sum += ((
q31_t) * px++ * *py++);
1161 sum += ((
q31_t) * px++ * *py++);
1174 sum += ((
q31_t) * px++ * *py++);
1181 *pOut = (
q15_t) (sum >> 15);
1200 blkCnt = blockSize2;
1213 sum += ((
q31_t) * px++ * *py++);
1220 *pOut = (
q15_t) (sum >> 15);
1249 count = srcBLen - 1u;
1252 pSrc1 = (pIn1 + srcALen) - (srcBLen - 1u);
1262 while(blockSize3 > 0u)
1275 sum += ((
q31_t) * px++ * *py++);
1276 sum += ((
q31_t) * px++ * *py++);
1277 sum += ((
q31_t) * px++ * *py++);
1278 sum += ((
q31_t) * px++ * *py++);
1291 sum += ((
q31_t) * px++ * *py++);
1298 *pOut = (
q15_t) (sum >> 15);
void arm_correlate_fast_q15(q15_t *pSrcA, uint32_t srcALen, q15_t *pSrcB, uint32_t srcBLen, q15_t *pDst)
Correlation of Q15 sequences (fast version) for Cortex-M3 and Cortex-M4.
int16_t q15_t
16-bit fractional data type in 1.15 format.
#define __SIMD32(addr)
definition to read/write two 16 bit values.
#define _SIMD32_OFFSET(addr)
int32_t q31_t
32-bit fractional data type in 1.31 format.