STM32F769IDiscovery  1.00
uDANTE Audio Networking with STM32F7 DISCO board
arm_mat_cmplx_mult_f32.c
Go to the documentation of this file.
1 /* ----------------------------------------------------------------------
2 * Copyright (C) 2010-2014 ARM Limited. All rights reserved.
3 *
4 * $Date: 19. March 2015
5 * $Revision: V.1.4.5
6 *
7 * Project: CMSIS DSP Library
8 * Title: arm_mat_cmplx_mult_f32.c
9 *
10 * Description: Floating-point matrix multiplication.
11 *
12 * Target Processor: Cortex-M4/Cortex-M3/Cortex-M0
13 *
14 * Redistribution and use in source and binary forms, with or without
15 * modification, are permitted provided that the following conditions
16 * are met:
17 * - Redistributions of source code must retain the above copyright
18 * notice, this list of conditions and the following disclaimer.
19 * - Redistributions in binary form must reproduce the above copyright
20 * notice, this list of conditions and the following disclaimer in
21 * the documentation and/or other materials provided with the
22 * distribution.
23 * - Neither the name of ARM LIMITED nor the names of its contributors
24 * may be used to endorse or promote products derived from this
25 * software without specific prior written permission.
26 *
27 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
28 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
29 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
30 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
31 * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
32 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
33 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
34 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
35 * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
36 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
37 * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
38 * POSSIBILITY OF SUCH DAMAGE.
39 * -------------------------------------------------------------------- */
40 #include "arm_math.h"
41 
74  const arm_matrix_instance_f32 * pSrcA,
75  const arm_matrix_instance_f32 * pSrcB,
77 {
78  float32_t *pIn1 = pSrcA->pData; /* input data matrix pointer A */
79  float32_t *pIn2 = pSrcB->pData; /* input data matrix pointer B */
80  float32_t *pInA = pSrcA->pData; /* input data matrix pointer A */
81  float32_t *pOut = pDst->pData; /* output data matrix pointer */
82  float32_t *px; /* Temporary output data matrix pointer */
83  uint16_t numRowsA = pSrcA->numRows; /* number of rows of input matrix A */
84  uint16_t numColsB = pSrcB->numCols; /* number of columns of input matrix B */
85  uint16_t numColsA = pSrcA->numCols; /* number of columns of input matrix A */
86  float32_t sumReal1, sumImag1; /* accumulator */
87  float32_t a0, b0, c0, d0;
88  float32_t a1, b1, c1, d1;
89  float32_t sumReal2, sumImag2; /* accumulator */
90 
91 
92  /* Run the below code for Cortex-M4 and Cortex-M3 */
93 
94  uint16_t col, i = 0u, j, row = numRowsA, colCnt; /* loop counters */
95  arm_status status; /* status of matrix multiplication */
96 
97 #ifdef ARM_MATH_MATRIX_CHECK
98 
99 
100  /* Check for matrix mismatch condition */
101  if((pSrcA->numCols != pSrcB->numRows) ||
102  (pSrcA->numRows != pDst->numRows) || (pSrcB->numCols != pDst->numCols))
103  {
104 
105  /* Set status as ARM_MATH_SIZE_MISMATCH */
106  status = ARM_MATH_SIZE_MISMATCH;
107  }
108  else
109 #endif /* #ifdef ARM_MATH_MATRIX_CHECK */
110 
111  {
112  /* The following loop performs the dot-product of each row in pSrcA with each column in pSrcB */
113  /* row loop */
114  do
115  {
116  /* Output pointer is set to starting address of the row being processed */
117  px = pOut + 2 * i;
118 
119  /* For every row wise process, the column loop counter is to be initiated */
120  col = numColsB;
121 
122  /* For every row wise process, the pIn2 pointer is set
123  ** to the starting address of the pSrcB data */
124  pIn2 = pSrcB->pData;
125 
126  j = 0u;
127 
128  /* column loop */
129  do
130  {
131  /* Set the variable sum, that acts as accumulator, to zero */
132  sumReal1 = 0.0f;
133  sumImag1 = 0.0f;
134 
135  sumReal2 = 0.0f;
136  sumImag2 = 0.0f;
137 
138  /* Initiate the pointer pIn1 to point to the starting address of the column being processed */
139  pIn1 = pInA;
140 
141  /* Apply loop unrolling and compute 4 MACs simultaneously. */
142  colCnt = numColsA >> 2;
143 
144  /* matrix multiplication */
145  while(colCnt > 0u)
146  {
147 
148  /* Reading real part of complex matrix A */
149  a0 = *pIn1;
150 
151  /* Reading real part of complex matrix B */
152  c0 = *pIn2;
153 
154  /* Reading imaginary part of complex matrix A */
155  b0 = *(pIn1 + 1u);
156 
157  /* Reading imaginary part of complex matrix B */
158  d0 = *(pIn2 + 1u);
159 
160  sumReal1 += a0 * c0;
161  sumImag1 += b0 * c0;
162 
163  pIn1 += 2u;
164  pIn2 += 2 * numColsB;
165 
166  sumReal2 -= b0 * d0;
167  sumImag2 += a0 * d0;
168 
169  /* c(m,n) = a(1,1)*b(1,1) + a(1,2) * b(2,1) + .... + a(m,p)*b(p,n) */
170 
171  a1 = *pIn1;
172  c1 = *pIn2;
173 
174  b1 = *(pIn1 + 1u);
175  d1 = *(pIn2 + 1u);
176 
177  sumReal1 += a1 * c1;
178  sumImag1 += b1 * c1;
179 
180  pIn1 += 2u;
181  pIn2 += 2 * numColsB;
182 
183  sumReal2 -= b1 * d1;
184  sumImag2 += a1 * d1;
185 
186  a0 = *pIn1;
187  c0 = *pIn2;
188 
189  b0 = *(pIn1 + 1u);
190  d0 = *(pIn2 + 1u);
191 
192  sumReal1 += a0 * c0;
193  sumImag1 += b0 * c0;
194 
195  pIn1 += 2u;
196  pIn2 += 2 * numColsB;
197 
198  sumReal2 -= b0 * d0;
199  sumImag2 += a0 * d0;
200 
201  /* c(m,n) = a(1,1)*b(1,1) + a(1,2) * b(2,1) + .... + a(m,p)*b(p,n) */
202 
203  a1 = *pIn1;
204  c1 = *pIn2;
205 
206  b1 = *(pIn1 + 1u);
207  d1 = *(pIn2 + 1u);
208 
209  sumReal1 += a1 * c1;
210  sumImag1 += b1 * c1;
211 
212  pIn1 += 2u;
213  pIn2 += 2 * numColsB;
214 
215  sumReal2 -= b1 * d1;
216  sumImag2 += a1 * d1;
217 
218  /* Decrement the loop count */
219  colCnt--;
220  }
221 
222  /* If the columns of pSrcA is not a multiple of 4, compute any remaining MACs here.
223  ** No loop unrolling is used. */
224  colCnt = numColsA % 0x4u;
225 
226  while(colCnt > 0u)
227  {
228  /* c(m,n) = a(1,1)*b(1,1) + a(1,2) * b(2,1) + .... + a(m,p)*b(p,n) */
229  a1 = *pIn1;
230  c1 = *pIn2;
231 
232  b1 = *(pIn1 + 1u);
233  d1 = *(pIn2 + 1u);
234 
235  sumReal1 += a1 * c1;
236  sumImag1 += b1 * c1;
237 
238  pIn1 += 2u;
239  pIn2 += 2 * numColsB;
240 
241  sumReal2 -= b1 * d1;
242  sumImag2 += a1 * d1;
243 
244  /* Decrement the loop counter */
245  colCnt--;
246  }
247 
248  sumReal1 += sumReal2;
249  sumImag1 += sumImag2;
250 
251  /* Store the result in the destination buffer */
252  *px++ = sumReal1;
253  *px++ = sumImag1;
254 
255  /* Update the pointer pIn2 to point to the starting address of the next column */
256  j++;
257  pIn2 = pSrcB->pData + 2u * j;
258 
259  /* Decrement the column loop counter */
260  col--;
261 
262  } while(col > 0u);
263 
264  /* Update the pointer pInA to point to the starting address of the next row */
265  i = i + numColsB;
266  pInA = pInA + 2 * numColsA;
267 
268  /* Decrement the row loop counter */
269  row--;
270 
271  } while(row > 0u);
272 
273  /* Set status as ARM_MATH_SUCCESS */
274  status = ARM_MATH_SUCCESS;
275  }
276 
277  /* Return to application */
278  return (status);
279 }
280 
float float32_t
32-bit floating-point type definition.
Definition: arm_math.h:407
arm_status arm_mat_cmplx_mult_f32(const arm_matrix_instance_f32 *pSrcA, const arm_matrix_instance_f32 *pSrcB, arm_matrix_instance_f32 *pDst)
Floating-point Complex matrix multiplication.
Instance structure for the floating-point matrix structure.
Definition: arm_math.h:1369
arm_status
Error status returned by some functions in the library.
Definition: arm_math.h:373