STM32F769IDiscovery  1.00
uDANTE Audio Networking with STM32F7 DISCO board
arm_cmplx_mult_cmplx_q31.c
Go to the documentation of this file.
1 /* ----------------------------------------------------------------------
2 * Copyright (C) 2010-2014 ARM Limited. All rights reserved.
3 *
4 * $Date: 19. March 2015
5 * $Revision: V.1.4.5
6 *
7 * Project: CMSIS DSP Library
8 * Title: arm_cmplx_mult_cmplx_q31.c
9 *
10 * Description: Q31 complex-by-complex multiplication
11 *
12 * Target Processor: Cortex-M4/Cortex-M3/Cortex-M0
13 *
14 * Redistribution and use in source and binary forms, with or without
15 * modification, are permitted provided that the following conditions
16 * are met:
17 * - Redistributions of source code must retain the above copyright
18 * notice, this list of conditions and the following disclaimer.
19 * - Redistributions in binary form must reproduce the above copyright
20 * notice, this list of conditions and the following disclaimer in
21 * the documentation and/or other materials provided with the
22 * distribution.
23 * - Neither the name of ARM LIMITED nor the names of its contributors
24 * may be used to endorse or promote products derived from this
25 * software without specific prior written permission.
26 *
27 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
28 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
29 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
30 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
31 * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
32 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
33 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
34 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
35 * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
36 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
37 * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
38 * POSSIBILITY OF SUCH DAMAGE.
39 * -------------------------------------------------------------------- */
40 
41 #include "arm_math.h"
42 
68  q31_t * pSrcA,
69  q31_t * pSrcB,
70  q31_t * pDst,
71  uint32_t numSamples)
72 {
73  q31_t a, b, c, d; /* Temporary variables to store real and imaginary values */
74  uint32_t blkCnt; /* loop counters */
75  q31_t mul1, mul2, mul3, mul4;
76  q31_t out1, out2;
77 
78 #ifndef ARM_MATH_CM0_FAMILY
79 
80  /* Run the below code for Cortex-M4 and Cortex-M3 */
81 
82  /* loop Unrolling */
83  blkCnt = numSamples >> 2u;
84 
85  /* First part of the processing with loop unrolling. Compute 4 outputs at a time.
86  ** a second loop below computes the remaining 1 to 3 samples. */
87  while(blkCnt > 0u)
88  {
89  /* C[2 * i] = A[2 * i] * B[2 * i] - A[2 * i + 1] * B[2 * i + 1]. */
90  /* C[2 * i + 1] = A[2 * i] * B[2 * i + 1] + A[2 * i + 1] * B[2 * i]. */
91  a = *pSrcA++;
92  b = *pSrcA++;
93  c = *pSrcB++;
94  d = *pSrcB++;
95 
96  mul1 = (q31_t) (((q63_t) a * c) >> 32);
97  mul2 = (q31_t) (((q63_t) b * d) >> 32);
98  mul3 = (q31_t) (((q63_t) a * d) >> 32);
99  mul4 = (q31_t) (((q63_t) b * c) >> 32);
100 
101  mul1 = (mul1 >> 1);
102  mul2 = (mul2 >> 1);
103  mul3 = (mul3 >> 1);
104  mul4 = (mul4 >> 1);
105 
106  out1 = mul1 - mul2;
107  out2 = mul3 + mul4;
108 
109  /* store the real result in 3.29 format in the destination buffer. */
110  *pDst++ = out1;
111  /* store the imag result in 3.29 format in the destination buffer. */
112  *pDst++ = out2;
113 
114  a = *pSrcA++;
115  b = *pSrcA++;
116  c = *pSrcB++;
117  d = *pSrcB++;
118 
119  mul1 = (q31_t) (((q63_t) a * c) >> 32);
120  mul2 = (q31_t) (((q63_t) b * d) >> 32);
121  mul3 = (q31_t) (((q63_t) a * d) >> 32);
122  mul4 = (q31_t) (((q63_t) b * c) >> 32);
123 
124  mul1 = (mul1 >> 1);
125  mul2 = (mul2 >> 1);
126  mul3 = (mul3 >> 1);
127  mul4 = (mul4 >> 1);
128 
129  out1 = mul1 - mul2;
130  out2 = mul3 + mul4;
131 
132  /* store the real result in 3.29 format in the destination buffer. */
133  *pDst++ = out1;
134  /* store the imag result in 3.29 format in the destination buffer. */
135  *pDst++ = out2;
136 
137  a = *pSrcA++;
138  b = *pSrcA++;
139  c = *pSrcB++;
140  d = *pSrcB++;
141 
142  mul1 = (q31_t) (((q63_t) a * c) >> 32);
143  mul2 = (q31_t) (((q63_t) b * d) >> 32);
144  mul3 = (q31_t) (((q63_t) a * d) >> 32);
145  mul4 = (q31_t) (((q63_t) b * c) >> 32);
146 
147  mul1 = (mul1 >> 1);
148  mul2 = (mul2 >> 1);
149  mul3 = (mul3 >> 1);
150  mul4 = (mul4 >> 1);
151 
152  out1 = mul1 - mul2;
153  out2 = mul3 + mul4;
154 
155  /* store the real result in 3.29 format in the destination buffer. */
156  *pDst++ = out1;
157  /* store the imag result in 3.29 format in the destination buffer. */
158  *pDst++ = out2;
159 
160  a = *pSrcA++;
161  b = *pSrcA++;
162  c = *pSrcB++;
163  d = *pSrcB++;
164 
165  mul1 = (q31_t) (((q63_t) a * c) >> 32);
166  mul2 = (q31_t) (((q63_t) b * d) >> 32);
167  mul3 = (q31_t) (((q63_t) a * d) >> 32);
168  mul4 = (q31_t) (((q63_t) b * c) >> 32);
169 
170  mul1 = (mul1 >> 1);
171  mul2 = (mul2 >> 1);
172  mul3 = (mul3 >> 1);
173  mul4 = (mul4 >> 1);
174 
175  out1 = mul1 - mul2;
176  out2 = mul3 + mul4;
177 
178  /* store the real result in 3.29 format in the destination buffer. */
179  *pDst++ = out1;
180  /* store the imag result in 3.29 format in the destination buffer. */
181  *pDst++ = out2;
182 
183  /* Decrement the blockSize loop counter */
184  blkCnt--;
185  }
186 
187  /* If the blockSize is not a multiple of 4, compute any remaining output samples here.
188  ** No loop unrolling is used. */
189  blkCnt = numSamples % 0x4u;
190 
191  while(blkCnt > 0u)
192  {
193  /* C[2 * i] = A[2 * i] * B[2 * i] - A[2 * i + 1] * B[2 * i + 1]. */
194  /* C[2 * i + 1] = A[2 * i] * B[2 * i + 1] + A[2 * i + 1] * B[2 * i]. */
195  a = *pSrcA++;
196  b = *pSrcA++;
197  c = *pSrcB++;
198  d = *pSrcB++;
199 
200  mul1 = (q31_t) (((q63_t) a * c) >> 32);
201  mul2 = (q31_t) (((q63_t) b * d) >> 32);
202  mul3 = (q31_t) (((q63_t) a * d) >> 32);
203  mul4 = (q31_t) (((q63_t) b * c) >> 32);
204 
205  mul1 = (mul1 >> 1);
206  mul2 = (mul2 >> 1);
207  mul3 = (mul3 >> 1);
208  mul4 = (mul4 >> 1);
209 
210  out1 = mul1 - mul2;
211  out2 = mul3 + mul4;
212 
213  /* store the real result in 3.29 format in the destination buffer. */
214  *pDst++ = out1;
215  /* store the imag result in 3.29 format in the destination buffer. */
216  *pDst++ = out2;
217 
218  /* Decrement the blockSize loop counter */
219  blkCnt--;
220  }
221 
222 #else
223 
224  /* Run the below code for Cortex-M0 */
225 
226  /* loop Unrolling */
227  blkCnt = numSamples >> 1u;
228 
229  /* First part of the processing with loop unrolling. Compute 2 outputs at a time.
230  ** a second loop below computes the remaining 1 sample. */
231  while(blkCnt > 0u)
232  {
233  /* C[2 * i] = A[2 * i] * B[2 * i] - A[2 * i + 1] * B[2 * i + 1]. */
234  /* C[2 * i + 1] = A[2 * i] * B[2 * i + 1] + A[2 * i + 1] * B[2 * i]. */
235  a = *pSrcA++;
236  b = *pSrcA++;
237  c = *pSrcB++;
238  d = *pSrcB++;
239 
240  mul1 = (q31_t) (((q63_t) a * c) >> 32);
241  mul2 = (q31_t) (((q63_t) b * d) >> 32);
242  mul3 = (q31_t) (((q63_t) a * d) >> 32);
243  mul4 = (q31_t) (((q63_t) b * c) >> 32);
244 
245  mul1 = (mul1 >> 1);
246  mul2 = (mul2 >> 1);
247  mul3 = (mul3 >> 1);
248  mul4 = (mul4 >> 1);
249 
250  out1 = mul1 - mul2;
251  out2 = mul3 + mul4;
252 
253  /* store the real result in 3.29 format in the destination buffer. */
254  *pDst++ = out1;
255  /* store the imag result in 3.29 format in the destination buffer. */
256  *pDst++ = out2;
257 
258  a = *pSrcA++;
259  b = *pSrcA++;
260  c = *pSrcB++;
261  d = *pSrcB++;
262 
263  mul1 = (q31_t) (((q63_t) a * c) >> 32);
264  mul2 = (q31_t) (((q63_t) b * d) >> 32);
265  mul3 = (q31_t) (((q63_t) a * d) >> 32);
266  mul4 = (q31_t) (((q63_t) b * c) >> 32);
267 
268  mul1 = (mul1 >> 1);
269  mul2 = (mul2 >> 1);
270  mul3 = (mul3 >> 1);
271  mul4 = (mul4 >> 1);
272 
273  out1 = mul1 - mul2;
274  out2 = mul3 + mul4;
275 
276  /* store the real result in 3.29 format in the destination buffer. */
277  *pDst++ = out1;
278  /* store the imag result in 3.29 format in the destination buffer. */
279  *pDst++ = out2;
280 
281  /* Decrement the blockSize loop counter */
282  blkCnt--;
283  }
284 
285  /* If the blockSize is not a multiple of 2, compute any remaining output samples here.
286  ** No loop unrolling is used. */
287  blkCnt = numSamples % 0x2u;
288 
289  while(blkCnt > 0u)
290  {
291  /* C[2 * i] = A[2 * i] * B[2 * i] - A[2 * i + 1] * B[2 * i + 1]. */
292  /* C[2 * i + 1] = A[2 * i] * B[2 * i + 1] + A[2 * i + 1] * B[2 * i]. */
293  a = *pSrcA++;
294  b = *pSrcA++;
295  c = *pSrcB++;
296  d = *pSrcB++;
297 
298  mul1 = (q31_t) (((q63_t) a * c) >> 32);
299  mul2 = (q31_t) (((q63_t) b * d) >> 32);
300  mul3 = (q31_t) (((q63_t) a * d) >> 32);
301  mul4 = (q31_t) (((q63_t) b * c) >> 32);
302 
303  mul1 = (mul1 >> 1);
304  mul2 = (mul2 >> 1);
305  mul3 = (mul3 >> 1);
306  mul4 = (mul4 >> 1);
307 
308  out1 = mul1 - mul2;
309  out2 = mul3 + mul4;
310 
311  /* store the real result in 3.29 format in the destination buffer. */
312  *pDst++ = out1;
313  /* store the imag result in 3.29 format in the destination buffer. */
314  *pDst++ = out2;
315 
316  /* Decrement the blockSize loop counter */
317  blkCnt--;
318  }
319 
320 #endif /* #ifndef ARM_MATH_CM0_FAMILY */
321 
322 }
323 
int64_t q63_t
64-bit fractional data type in 1.63 format.
Definition: arm_math.h:402
void arm_cmplx_mult_cmplx_q31(q31_t *pSrcA, q31_t *pSrcB, q31_t *pDst, uint32_t numSamples)
Q31 complex-by-complex multiplication.
int32_t q31_t
32-bit fractional data type in 1.31 format.
Definition: arm_math.h:397