Mali OpenCL SDK v1.1.0
 All Classes Files Functions Variables Macros Pages
sgemm.cl
Go to the documentation of this file.
1 /*
2  * This confidential and proprietary software may be used only as
3  * authorised by a licensing agreement from ARM Limited
4  * (C) COPYRIGHT 2013 ARM Limited
5  * ALL RIGHTS RESERVED
6  * The entire notice above must be reproduced on all authorised
7  * copies and copies may only be made to the extent permitted
8  * by a licensing agreement from ARM Limited.
9  */
10 
20 __kernel void sgemm(__global float* restrict matrixA,
21  __global float* restrict const matrixB,
22  __global float* restrict matrixC,
23  const uint matrixOrder,
24  const float alpha,
25  const float beta)
26 {
27  /* [Kernel size] */
28  const int i = get_global_id(1);
29  const int j = get_global_id(0);
30  float4 sum = (float4)0.0f;
31  float4 matrixBColumn;
32  /* [Kernel size] */
33 
34  /* Move to a specific row in matrixA. */
35  matrixA += i * matrixOrder;
36 
37  /* Move to a specific column in matrixB. */
38  uint bOffset = j;
39 
40  /* [Load column] */
41  /*
42  * Load 4 values from a column of data from matrixB, and 4 values from a row in matrixA,
43  * then multiply them together. Repeat until all values in the column/row have been multiplied.
44  * We only want the sum of the calculation so we can add the result of each calculation to the last.
45  */
46  for (int k = 0; k < matrixOrder; k+=4)
47  {
48  matrixBColumn.x = matrixB[bOffset];
49  bOffset += matrixOrder;
50 
51  matrixBColumn.y = matrixB[bOffset];
52  bOffset += matrixOrder;
53 
54  matrixBColumn.z = matrixB[bOffset];
55  bOffset += matrixOrder;
56 
57  matrixBColumn.w = matrixB[bOffset];
58  bOffset += matrixOrder;
59  /* [Load column] */
60 
61  /* [Calculation] */
62  sum += vload4 (0, matrixA) * matrixBColumn;
63  matrixA += 4;
64  }
65  /* [Calculation] */
66 
67  /* [Store] */
68  /*
69  * Sum the 4 results to get the single output of multiplying a row of matrixA by a column of matrixB.
70  * Then carry out the final calculation.
71  */
72  matrixC[i * matrixOrder + j] = alpha * (sum.x + sum.y + sum.z + sum.w) + beta * matrixC[i * matrixOrder + j];
73  /* [Store] */
74 }