84 #ifndef UNALIGNED_SUPPORT_DISABLE 88 q31_t sum, acc0, acc1, acc2, acc3;
92 q31_t x0, x1, x2, x3, c0;
93 uint32_t blockSize1, blockSize2, blockSize3, j, k, count, blkCnt;
98 if(srcALen >= srcBLen)
131 blockSize1 = srcBLen - 1u;
132 blockSize2 = srcALen - (srcBLen - 1u);
133 blockSize3 = blockSize1;
165 while((count < 4u) && (blockSize1 > 0u))
177 sum = __SMLAD(*px++, *py--, sum);
184 *pOut++ = (
q15_t) (sum >> 15);
203 while(blockSize1 > 0u)
236 sum = __SMLAD(*px++, *py--, sum);
243 *pOut++ = (
q15_t) (sum >> 15);
246 py = pIn2 + (count - 1u);
270 pSrc2 = pIn2 + (srcBLen - 1u);
287 blkCnt = blockSize2 >> 2u;
319 acc0 = __SMLADX(x0, c0, acc0);
322 acc1 = __SMLADX(x1, c0, acc1);
331 acc2 = __SMLADX(x2, c0, acc2);
334 acc3 = __SMLADX(x3, c0, acc3);
340 acc0 = __SMLADX(x2, c0, acc0);
343 acc1 = __SMLADX(x3, c0, acc1);
353 acc2 = __SMLADX(x0, c0, acc2);
356 acc3 = __SMLADX(x1, c0, acc3);
372 #ifdef ARM_MATH_BIG_ENDIAN 378 c0 = c0 & 0x0000FFFF;
387 acc0 = __SMLAD(x0, c0, acc0);
388 acc1 = __SMLAD(x1, c0, acc1);
389 acc2 = __SMLADX(x1, c0, acc2);
390 acc3 = __SMLADX(x3, c0, acc3);
406 acc0 = __SMLADX(x0, c0, acc0);
407 acc1 = __SMLADX(x1, c0, acc1);
408 acc2 = __SMLADX(x3, c0, acc2);
409 acc3 = __SMLADX(x2, c0, acc3);
424 acc0 = __SMLADX(x0, c0, acc0);
425 acc1 = __SMLADX(x1, c0, acc1);
426 acc2 = __SMLADX(x3, c0, acc2);
427 acc3 = __SMLADX(x2, c0, acc3);
431 #ifdef ARM_MATH_BIG_ENDIAN 436 c0 = c0 & 0x0000FFFF;
444 acc0 = __SMLADX(x1, c0, acc0);
445 acc1 = __SMLAD(x2, c0, acc1);
446 acc2 = __SMLADX(x2, c0, acc2);
447 acc3 = __SMLADX(x3, c0, acc3);
451 #ifndef ARM_MATH_BIG_ENDIAN 453 *
__SIMD32(pOut)++ = __PKHBT((acc0 >> 15), (acc1 >> 15), 16);
454 *
__SIMD32(pOut)++ = __PKHBT((acc2 >> 15), (acc3 >> 15), 16);
458 *
__SIMD32(pOut)++ = __PKHBT((acc1 >> 15), (acc0 >> 15), 16);
459 *
__SIMD32(pOut)++ = __PKHBT((acc3 >> 15), (acc2 >> 15), 16);
476 blkCnt = blockSize2 % 0x4u;
491 sum += ((
q31_t) * px++ * *py--);
492 sum += ((
q31_t) * px++ * *py--);
493 sum += ((
q31_t) * px++ * *py--);
494 sum += ((
q31_t) * px++ * *py--);
507 sum += ((
q31_t) * px++ * *py--);
514 *pOut++ = (
q15_t) (sum >> 15);
544 sum += ((
q31_t) * px++ * *py--);
551 *pOut++ = (
q15_t) (sum >> 15);
581 pSrc1 = (pIn1 + srcALen) - (srcBLen - 1u);
585 pSrc2 = pIn2 + (srcBLen - 1u);
598 j = blockSize3 >> 2u;
600 while((j > 0u) && (blockSize3 > 0u))
606 k = blockSize3 >> 2u;
629 k = blockSize3 % 0x4u;
634 sum = __SMLAD(*px++, *py--, sum);
641 *pOut++ = (
q15_t) (sum >> 15);
658 while(blockSize3 > 0u)
670 sum = __SMLAD(*px++, *py--, sum);
677 *pOut++ = (
q15_t) (sum >> 15);
691 q31_t sum, acc0, acc1, acc2, acc3;
694 q15_t *pSrc1, *pSrc2;
695 q31_t x0, x1, x2, x3, c0;
696 uint32_t blockSize1, blockSize2, blockSize3, j, k, count, blkCnt;
702 if(srcALen >= srcBLen)
735 blockSize1 = srcBLen - 1u;
736 blockSize2 = srcALen - (srcBLen - 1u);
737 blockSize3 = blockSize1;
769 while((count < 4u) && (blockSize1 > 0u))
781 sum += ((
q31_t) * px++ * *py--);
788 *pOut++ = (
q15_t) (sum >> 15);
807 while(blockSize1 > 0u)
822 sum += ((
q31_t) * px++ * *py--);
823 sum += ((
q31_t) * px++ * *py--);
824 sum += ((
q31_t) * px++ * *py--);
825 sum += ((
q31_t) * px++ * *py--);
838 sum += ((
q31_t) * px++ * *py--);
845 *pOut++ = (
q15_t) (sum >> 15);
848 py = pIn2 + (count - 1u);
872 pSrc2 = pIn2 + (srcBLen - 1u);
889 blkCnt = blockSize2 >> 2u;
905 #ifndef ARM_MATH_BIG_ENDIAN 907 x0 = __PKHBT(a, b, 16);
909 x1 = __PKHBT(b, a, 16);
913 x0 = __PKHBT(b, a, 16);
915 x1 = __PKHBT(a, b, 16);
932 #ifndef ARM_MATH_BIG_ENDIAN 934 c0 = __PKHBT(a, b, 16);
938 c0 = __PKHBT(b, a, 16);;
943 acc0 = __SMLADX(x0, c0, acc0);
946 acc1 = __SMLADX(x1, c0, acc1);
951 #ifndef ARM_MATH_BIG_ENDIAN 953 x2 = __PKHBT(a, b, 16);
955 x3 = __PKHBT(b, a, 16);
959 x2 = __PKHBT(b, a, 16);
961 x3 = __PKHBT(a, b, 16);
966 acc2 = __SMLADX(x2, c0, acc2);
969 acc3 = __SMLADX(x3, c0, acc3);
976 #ifndef ARM_MATH_BIG_ENDIAN 978 c0 = __PKHBT(a, b, 16);
982 c0 = __PKHBT(b, a, 16);;
987 acc0 = __SMLADX(x2, c0, acc0);
990 acc1 = __SMLADX(x3, c0, acc1);
996 #ifndef ARM_MATH_BIG_ENDIAN 998 x0 = __PKHBT(a, b, 16);
1000 x1 = __PKHBT(b, a, 16);
1004 x0 = __PKHBT(b, a, 16);
1006 x1 = __PKHBT(a, b, 16);
1013 acc2 = __SMLADX(x0, c0, acc2);
1016 acc3 = __SMLADX(x1, c0, acc3);
1032 #ifdef ARM_MATH_BIG_ENDIAN 1038 c0 = c0 & 0x0000FFFF;
1047 #ifndef ARM_MATH_BIG_ENDIAN 1049 x3 = __PKHBT(a, b, 16);
1053 x3 = __PKHBT(b, a, 16);;
1059 acc0 = __SMLAD(x0, c0, acc0);
1060 acc1 = __SMLAD(x1, c0, acc1);
1061 acc2 = __SMLADX(x1, c0, acc2);
1062 acc3 = __SMLADX(x3, c0, acc3);
1071 #ifndef ARM_MATH_BIG_ENDIAN 1073 c0 = __PKHBT(a, b, 16);
1077 c0 = __PKHBT(b, a, 16);;
1085 #ifndef ARM_MATH_BIG_ENDIAN 1087 x3 = __PKHBT(a, b, 16);
1089 x2 = __PKHBT(b, a, 16);
1093 x3 = __PKHBT(b, a, 16);
1095 x2 = __PKHBT(a, b, 16);
1101 acc0 = __SMLADX(x0, c0, acc0);
1102 acc1 = __SMLADX(x1, c0, acc1);
1103 acc2 = __SMLADX(x3, c0, acc2);
1104 acc3 = __SMLADX(x2, c0, acc3);
1113 #ifndef ARM_MATH_BIG_ENDIAN 1115 c0 = __PKHBT(a, b, 16);
1119 c0 = __PKHBT(b, a, 16);;
1127 #ifndef ARM_MATH_BIG_ENDIAN 1129 x3 = __PKHBT(a, b, 16);
1131 x2 = __PKHBT(b, a, 16);
1135 x3 = __PKHBT(b, a, 16);
1137 x2 = __PKHBT(a, b, 16);
1142 acc0 = __SMLADX(x0, c0, acc0);
1143 acc1 = __SMLADX(x1, c0, acc1);
1144 acc2 = __SMLADX(x3, c0, acc2);
1145 acc3 = __SMLADX(x2, c0, acc3);
1149 #ifdef ARM_MATH_BIG_ENDIAN 1154 c0 = c0 & 0x0000FFFF;
1161 #ifndef ARM_MATH_BIG_ENDIAN 1163 x3 = __PKHBT(a, b, 16);
1167 x3 = __PKHBT(b, a, 16);;
1174 acc0 = __SMLADX(x1, c0, acc0);
1175 acc1 = __SMLAD(x2, c0, acc1);
1176 acc2 = __SMLADX(x2, c0, acc2);
1177 acc3 = __SMLADX(x3, c0, acc3);
1181 *pOut++ = (
q15_t)(acc0 >> 15);
1182 *pOut++ = (
q15_t)(acc1 >> 15);
1183 *pOut++ = (
q15_t)(acc2 >> 15);
1184 *pOut++ = (
q15_t)(acc3 >> 15);
1199 blkCnt = blockSize2 % 0x4u;
1214 sum += ((
q31_t) * px++ * *py--);
1215 sum += ((
q31_t) * px++ * *py--);
1216 sum += ((
q31_t) * px++ * *py--);
1217 sum += ((
q31_t) * px++ * *py--);
1230 sum += ((
q31_t) * px++ * *py--);
1237 *pOut++ = (
q15_t) (sum >> 15);
1254 blkCnt = blockSize2;
1267 sum += ((
q31_t) * px++ * *py--);
1274 *pOut++ = (
q15_t) (sum >> 15);
1304 pSrc1 = (pIn1 + srcALen) - (srcBLen - 1u);
1308 pSrc2 = pIn2 + (srcBLen - 1u);
1321 j = blockSize3 >> 2u;
1323 while((j > 0u) && (blockSize3 > 0u))
1329 k = blockSize3 >> 2u;
1337 sum += ((
q31_t) * px++ * *py--);
1338 sum += ((
q31_t) * px++ * *py--);
1339 sum += ((
q31_t) * px++ * *py--);
1340 sum += ((
q31_t) * px++ * *py--);
1347 k = blockSize3 % 0x4u;
1352 sum += ((
q31_t) * px++ * *py--);
1359 *pOut++ = (
q15_t) (sum >> 15);
1376 while(blockSize3 > 0u)
1388 sum += ((
q31_t) * px++ * *py--);
1395 *pOut++ = (
q15_t) (sum >> 15);
void arm_conv_fast_q15(q15_t *pSrcA, uint32_t srcALen, q15_t *pSrcB, uint32_t srcBLen, q15_t *pDst)
Convolution of Q15 sequences (fast version) for Cortex-M3 and Cortex-M4.
int16_t q15_t
16-bit fractional data type in 1.15 format.
#define __SIMD32(addr)
definition to read/write two 16 bit values.
#define _SIMD32_OFFSET(addr)
int32_t q31_t
32-bit fractional data type in 1.31 format.