76 #ifndef UNALIGNED_SUPPORT_DISABLE 81 q31_t sum, acc0, acc1, acc2, acc3;
85 q31_t x0, x1, x2, x3, c0;
86 uint32_t j, k, count, check, blkCnt;
87 int32_t blockSize1, blockSize2, blockSize3;
91 if((firstIndex + numPoints) > ((srcALen + (srcBLen - 1u))))
102 if(srcALen >=srcBLen)
126 check = firstIndex + numPoints;
127 blockSize3 = ((int32_t)check > (int32_t)srcALen) ? (int32_t)check - (int32_t)srcALen : 0;
128 blockSize3 = ((int32_t)firstIndex > (int32_t)srcALen - 1) ? blockSize3 - (int32_t)firstIndex + (int32_t)srcALen : blockSize3;
129 blockSize1 = (((int32_t) srcBLen - 1) - (int32_t) firstIndex);
130 blockSize1 = (blockSize1 > 0) ? ((check > (srcBLen - 1u)) ? blockSize1 :
131 (int32_t) numPoints) : 0;
132 blockSize2 = (int32_t) check - ((blockSize3 + blockSize1) +
133 (int32_t) firstIndex);
134 blockSize2 = (blockSize2 > 0) ? blockSize2 : 0;
147 pOut = pDst + firstIndex;
163 count = 1u + firstIndex;
169 pSrc2 = pIn2 + firstIndex;
181 while((count < 4u) && (blockSize1 > 0))
193 sum = __SMLAD(*px++, *py--, sum);
200 *pOut++ = (
q15_t) (sum >> 15);
219 while(blockSize1 > 0)
252 sum = __SMLAD(*px++, *py--, sum);
259 *pOut++ = (
q15_t) (sum >> 15);
283 if((int32_t)firstIndex - (int32_t)srcBLen + 1 > 0)
285 px = pIn1 + firstIndex - srcBLen + 1;
293 pSrc2 = pIn2 + (srcBLen - 1u);
310 blkCnt = ((uint32_t) blockSize2 >> 2u);
342 acc0 = __SMLADX(x0, c0, acc0);
345 acc1 = __SMLADX(x1, c0, acc1);
354 acc2 = __SMLADX(x2, c0, acc2);
357 acc3 = __SMLADX(x3, c0, acc3);
363 acc0 = __SMLADX(x2, c0, acc0);
366 acc1 = __SMLADX(x3, c0, acc1);
376 acc2 = __SMLADX(x0, c0, acc2);
379 acc3 = __SMLADX(x1, c0, acc3);
394 #ifdef ARM_MATH_BIG_ENDIAN 400 c0 = c0 & 0x0000FFFF;
409 acc0 = __SMLAD(x0, c0, acc0);
410 acc1 = __SMLAD(x1, c0, acc1);
411 acc2 = __SMLADX(x1, c0, acc2);
412 acc3 = __SMLADX(x3, c0, acc3);
428 acc0 = __SMLADX(x0, c0, acc0);
429 acc1 = __SMLADX(x1, c0, acc1);
430 acc2 = __SMLADX(x3, c0, acc2);
431 acc3 = __SMLADX(x2, c0, acc3);
446 acc0 = __SMLADX(x0, c0, acc0);
447 acc1 = __SMLADX(x1, c0, acc1);
448 acc2 = __SMLADX(x3, c0, acc2);
449 acc3 = __SMLADX(x2, c0, acc3);
452 #ifdef ARM_MATH_BIG_ENDIAN 457 c0 = c0 & 0x0000FFFF;
465 acc0 = __SMLADX(x1, c0, acc0);
466 acc1 = __SMLAD(x2, c0, acc1);
467 acc2 = __SMLADX(x2, c0, acc2);
468 acc3 = __SMLADX(x3, c0, acc3);
472 #ifndef ARM_MATH_BIG_ENDIAN 474 *
__SIMD32(pOut)++ = __PKHBT(acc0 >> 15, acc1 >> 15, 16);
475 *
__SIMD32(pOut)++ = __PKHBT(acc2 >> 15, acc3 >> 15, 16);
479 *
__SIMD32(pOut)++ = __PKHBT(acc1 >> 15, acc0 >> 15, 16);
480 *
__SIMD32(pOut)++ = __PKHBT(acc3 >> 15, acc2 >> 15, 16);
497 blkCnt = (uint32_t) blockSize2 % 0x4u;
512 sum += ((
q31_t) * px++ * *py--);
513 sum += ((
q31_t) * px++ * *py--);
514 sum += ((
q31_t) * px++ * *py--);
515 sum += ((
q31_t) * px++ * *py--);
528 sum += ((
q31_t) * px++ * *py--);
535 *pOut++ = (
q15_t) (sum >> 15);
552 blkCnt = (uint32_t) blockSize2;
565 sum += ((
q31_t) * px++ * *py--);
572 *pOut++ = (
q15_t) (sum >> 15);
600 count = srcBLen - 1u;
603 pSrc1 = (pIn1 + srcALen) - (srcBLen - 1u);
607 pSrc2 = pIn2 + (srcBLen - 1u);
622 while((j > 0u) && (blockSize3 > 0))
656 sum = __SMLAD(*px++, *py--, sum);
663 *pOut++ = (
q15_t) (sum >> 15);
683 while(blockSize3 > 0)
695 sum = __SMLAD(*px++, *py--, sum);
702 *pOut++ = (
q15_t) (sum >> 15);
727 q31_t sum, acc0, acc1, acc2, acc3;
730 q15_t *pSrc1, *pSrc2;
731 q31_t x0, x1, x2, x3, c0;
732 uint32_t j, k, count, check, blkCnt;
733 int32_t blockSize1, blockSize2, blockSize3;
738 if((firstIndex + numPoints) > ((srcALen + (srcBLen - 1u))))
749 if(srcALen >=srcBLen)
773 check = firstIndex + numPoints;
774 blockSize3 = ((int32_t)check > (int32_t)srcALen) ? (int32_t)check - (int32_t)srcALen : 0;
775 blockSize3 = ((int32_t)firstIndex > (int32_t)srcALen - 1) ? blockSize3 - (int32_t)firstIndex + (int32_t)srcALen : blockSize3;
776 blockSize1 = ((int32_t) srcBLen - 1) - (int32_t) firstIndex;
777 blockSize1 = (blockSize1 > 0) ? ((check > (srcBLen - 1u)) ? blockSize1 :
778 (int32_t) numPoints) : 0;
779 blockSize2 = ((int32_t) check - blockSize3) -
780 (blockSize1 + (int32_t) firstIndex);
781 blockSize2 = (blockSize2 > 0) ? blockSize2 : 0;
794 pOut = pDst + firstIndex;
810 count = 1u + firstIndex;
816 pSrc2 = pIn2 + firstIndex;
828 while((count < 4u) && (blockSize1 > 0))
840 sum += ((
q31_t) * px++ * *py--);
847 *pOut++ = (
q15_t) (sum >> 15);
866 while(blockSize1 > 0)
881 sum += ((
q31_t) * px++ * *py--);
882 sum += ((
q31_t) * px++ * *py--);
883 sum += ((
q31_t) * px++ * *py--);
884 sum += ((
q31_t) * px++ * *py--);
897 sum += ((
q31_t) * px++ * *py--);
904 *pOut++ = (
q15_t) (sum >> 15);
928 if((int32_t)firstIndex - (int32_t)srcBLen + 1 > 0)
930 px = pIn1 + firstIndex - srcBLen + 1;
938 pSrc2 = pIn2 + (srcBLen - 1u);
955 blkCnt = ((uint32_t) blockSize2 >> 2u);
971 #ifndef ARM_MATH_BIG_ENDIAN 973 x0 = __PKHBT(a, b, 16);
975 x1 = __PKHBT(b, a, 16);
979 x0 = __PKHBT(b, a, 16);
981 x1 = __PKHBT(a, b, 16);
998 #ifndef ARM_MATH_BIG_ENDIAN 1000 c0 = __PKHBT(a, b, 16);
1004 c0 = __PKHBT(b, a, 16);;
1009 acc0 = __SMLADX(x0, c0, acc0);
1012 acc1 = __SMLADX(x1, c0, acc1);
1017 #ifndef ARM_MATH_BIG_ENDIAN 1019 x2 = __PKHBT(a, b, 16);
1021 x3 = __PKHBT(b, a, 16);
1025 x2 = __PKHBT(b, a, 16);
1027 x3 = __PKHBT(a, b, 16);
1032 acc2 = __SMLADX(x2, c0, acc2);
1035 acc3 = __SMLADX(x3, c0, acc3);
1042 #ifndef ARM_MATH_BIG_ENDIAN 1044 c0 = __PKHBT(a, b, 16);
1048 c0 = __PKHBT(b, a, 16);;
1053 acc0 = __SMLADX(x2, c0, acc0);
1056 acc1 = __SMLADX(x3, c0, acc1);
1062 #ifndef ARM_MATH_BIG_ENDIAN 1064 x0 = __PKHBT(a, b, 16);
1066 x1 = __PKHBT(b, a, 16);
1070 x0 = __PKHBT(b, a, 16);
1072 x1 = __PKHBT(a, b, 16);
1079 acc2 = __SMLADX(x0, c0, acc2);
1082 acc3 = __SMLADX(x1, c0, acc3);
1098 #ifdef ARM_MATH_BIG_ENDIAN 1104 c0 = c0 & 0x0000FFFF;
1113 #ifndef ARM_MATH_BIG_ENDIAN 1115 x3 = __PKHBT(a, b, 16);
1119 x3 = __PKHBT(b, a, 16);;
1125 acc0 = __SMLAD(x0, c0, acc0);
1126 acc1 = __SMLAD(x1, c0, acc1);
1127 acc2 = __SMLADX(x1, c0, acc2);
1128 acc3 = __SMLADX(x3, c0, acc3);
1137 #ifndef ARM_MATH_BIG_ENDIAN 1139 c0 = __PKHBT(a, b, 16);
1143 c0 = __PKHBT(b, a, 16);;
1151 #ifndef ARM_MATH_BIG_ENDIAN 1153 x3 = __PKHBT(a, b, 16);
1155 x2 = __PKHBT(b, a, 16);
1159 x3 = __PKHBT(b, a, 16);
1161 x2 = __PKHBT(a, b, 16);
1167 acc0 = __SMLADX(x0, c0, acc0);
1168 acc1 = __SMLADX(x1, c0, acc1);
1169 acc2 = __SMLADX(x3, c0, acc2);
1170 acc3 = __SMLADX(x2, c0, acc3);
1179 #ifndef ARM_MATH_BIG_ENDIAN 1181 c0 = __PKHBT(a, b, 16);
1185 c0 = __PKHBT(b, a, 16);;
1193 #ifndef ARM_MATH_BIG_ENDIAN 1195 x3 = __PKHBT(a, b, 16);
1197 x2 = __PKHBT(b, a, 16);
1201 x3 = __PKHBT(b, a, 16);
1203 x2 = __PKHBT(a, b, 16);
1208 acc0 = __SMLADX(x0, c0, acc0);
1209 acc1 = __SMLADX(x1, c0, acc1);
1210 acc2 = __SMLADX(x3, c0, acc2);
1211 acc3 = __SMLADX(x2, c0, acc3);
1215 #ifdef ARM_MATH_BIG_ENDIAN 1220 c0 = c0 & 0x0000FFFF;
1227 #ifndef ARM_MATH_BIG_ENDIAN 1229 x3 = __PKHBT(a, b, 16);
1233 x3 = __PKHBT(b, a, 16);;
1240 acc0 = __SMLADX(x1, c0, acc0);
1241 acc1 = __SMLAD(x2, c0, acc1);
1242 acc2 = __SMLADX(x2, c0, acc2);
1243 acc3 = __SMLADX(x3, c0, acc3);
1247 *pOut++ = (
q15_t)(acc0 >> 15);
1248 *pOut++ = (
q15_t)(acc1 >> 15);
1249 *pOut++ = (
q15_t)(acc2 >> 15);
1250 *pOut++ = (
q15_t)(acc3 >> 15);
1265 blkCnt = (uint32_t) blockSize2 % 0x4u;
1280 sum += ((
q31_t) * px++ * *py--);
1281 sum += ((
q31_t) * px++ * *py--);
1282 sum += ((
q31_t) * px++ * *py--);
1283 sum += ((
q31_t) * px++ * *py--);
1296 sum += ((
q31_t) * px++ * *py--);
1303 *pOut++ = (
q15_t) (sum >> 15);
1320 blkCnt = (uint32_t) blockSize2;
1333 sum += ((
q31_t) * px++ * *py--);
1340 *pOut++ = (
q15_t) (sum >> 15);
1368 count = srcBLen - 1u;
1371 pSrc1 = (pIn1 + srcALen) - (srcBLen - 1u);
1375 pSrc2 = pIn2 + (srcBLen - 1u);
1390 while((j > 0u) && (blockSize3 > 0))
1405 sum += ((
q31_t) * px++ * *py--);
1406 sum += ((
q31_t) * px++ * *py--);
1407 sum += ((
q31_t) * px++ * *py--);
1408 sum += ((
q31_t) * px++ * *py--);
1421 sum += ((
q31_t) * px++ * *py--);
1428 *pOut++ = (
q15_t) (sum >> 15);
1448 while(blockSize3 > 0)
1460 sum += ((
q31_t) * px++ * *py--);
1467 *pOut++ = (
q15_t) (sum >> 15);
arm_status arm_conv_partial_fast_q15(q15_t *pSrcA, uint32_t srcALen, q15_t *pSrcB, uint32_t srcBLen, q15_t *pDst, uint32_t firstIndex, uint32_t numPoints)
Partial convolution of Q15 sequences (fast version) for Cortex-M3 and Cortex-M4.
int16_t q15_t
16-bit fractional data type in 1.15 format.
#define __SIMD32(addr)
definition to read/write two 16 bit values.
#define _SIMD32_OFFSET(addr)
int32_t q31_t
32-bit fractional data type in 1.31 format.
arm_status
Error status returned by some functions in the library.