94 q31_t sum, acc0, acc1, acc2, acc3;
95 q31_t x0, x1, x2, x3, c0;
96 uint32_t j, k = 0u, count, blkCnt, outBlockSize, blockSize1, blockSize2, blockSize3;
103 if(srcALen >= srcBLen)
112 outBlockSize = (2u * srcALen) - 1u;
118 j = outBlockSize - (srcALen + (srcBLen - 1u));
139 pOut = pDst + ((srcALen + srcBLen) - 2u);
155 blockSize1 = srcBLen - 1u;
156 blockSize2 = srcALen - (srcBLen - 1u);
157 blockSize3 = blockSize1;
177 pSrc1 = pIn2 + (srcBLen - 1u);
185 while(blockSize1 > 0u)
199 ((
q63_t) * px++ * (*py++))) >> 32);
202 ((
q63_t) * px++ * (*py++))) >> 32);
205 ((
q63_t) * px++ * (*py++))) >> 32);
208 ((
q63_t) * px++ * (*py++))) >> 32);
223 ((
q63_t) * px++ * (*py++))) >> 32);
274 blkCnt = blockSize2 >> 2u;
304 acc0 = (
q31_t) ((((
q63_t) acc0 << 32) + ((q63_t) x0 * c0)) >> 32);
306 acc1 = (
q31_t) ((((
q63_t) acc1 << 32) + ((q63_t) x1 * c0)) >> 32);
308 acc2 = (
q31_t) ((((
q63_t) acc2 << 32) + ((q63_t) x2 * c0)) >> 32);
310 acc3 = (
q31_t) ((((
q63_t) acc3 << 32) + ((q63_t) x3 * c0)) >> 32);
320 acc0 = (
q31_t) ((((
q63_t) acc0 << 32) + ((q63_t) x1 * c0)) >> 32);
322 acc1 = (
q31_t) ((((
q63_t) acc1 << 32) + ((q63_t) x2 * c0)) >> 32);
324 acc2 = (
q31_t) ((((
q63_t) acc2 << 32) + ((q63_t) x3 * c0)) >> 32);
326 acc3 = (
q31_t) ((((
q63_t) acc3 << 32) + ((q63_t) x0 * c0)) >> 32);
336 acc0 = (
q31_t) ((((
q63_t) acc0 << 32) + ((q63_t) x2 * c0)) >> 32);
338 acc1 = (
q31_t) ((((
q63_t) acc1 << 32) + ((q63_t) x3 * c0)) >> 32);
340 acc2 = (
q31_t) ((((
q63_t) acc2 << 32) + ((q63_t) x0 * c0)) >> 32);
342 acc3 = (
q31_t) ((((
q63_t) acc3 << 32) + ((q63_t) x1 * c0)) >> 32);
352 acc0 = (
q31_t) ((((
q63_t) acc0 << 32) + ((q63_t) x3 * c0)) >> 32);
354 acc1 = (
q31_t) ((((
q63_t) acc1 << 32) + ((q63_t) x0 * c0)) >> 32);
356 acc2 = (
q31_t) ((((
q63_t) acc2 << 32) + ((q63_t) x1 * c0)) >> 32);
358 acc3 = (
q31_t) ((((
q63_t) acc3 << 32) + ((q63_t) x2 * c0)) >> 32);
377 acc0 = (
q31_t) ((((
q63_t) acc0 << 32) + ((q63_t) x0 * c0)) >> 32);
379 acc1 = (
q31_t) ((((
q63_t) acc1 << 32) + ((q63_t) x1 * c0)) >> 32);
381 acc2 = (
q31_t) ((((
q63_t) acc2 << 32) + ((q63_t) x2 * c0)) >> 32);
383 acc3 = (
q31_t) ((((
q63_t) acc3 << 32) + ((q63_t) x3 * c0)) >> 32);
395 *pOut = (
q31_t) (acc0 << 1);
399 *pOut = (
q31_t) (acc1 << 1);
402 *pOut = (
q31_t) (acc2 << 1);
405 *pOut = (
q31_t) (acc3 << 1);
422 blkCnt = blockSize2 % 0x4u;
438 ((
q63_t) * px++ * (*py++))) >> 32);
440 ((
q63_t) * px++ * (*py++))) >> 32);
442 ((
q63_t) * px++ * (*py++))) >> 32);
444 ((
q63_t) * px++ * (*py++))) >> 32);
458 ((
q63_t) * px++ * (*py++))) >> 32);
499 ((
q63_t) * px++ * (*py++))) >> 32);
535 count = srcBLen - 1u;
538 pSrc1 = ((pIn1 + srcALen) - srcBLen) + 1u;
548 while(blockSize3 > 0u)
563 ((
q63_t) * px++ * (*py++))) >> 32);
566 ((
q63_t) * px++ * (*py++))) >> 32);
569 ((
q63_t) * px++ * (*py++))) >> 32);
572 ((
q63_t) * px++ * (*py++))) >> 32);
586 ((
q63_t) * px++ * (*py++))) >> 32);
int64_t q63_t
64-bit fractional data type in 1.63 format.
int32_t q31_t
32-bit fractional data type in 1.31 format.
void arm_correlate_fast_q31(q31_t *pSrcA, uint32_t srcALen, q31_t *pSrcB, uint32_t srcBLen, q31_t *pDst)
Correlation of Q31 sequences (fast version) for Cortex-M3 and Cortex-M4.