Mali OpenCL SDK v1.1.0
 All Classes Files Functions Variables Macros Pages
sgemm.cpp
Go to the documentation of this file.
1 /*
2  * This confidential and proprietary software may be used only as
3  * authorised by a licensing agreement from ARM Limited
4  * (C) COPYRIGHT 2013 ARM Limited
5  * ALL RIGHTS RESERVED
6  * The entire notice above must be reproduced on all authorised
7  * copies and copies may only be made to the extent permitted
8  * by a licensing agreement from ARM Limited.
9  */
10 
11 #include "common.h"
12 #include "image.h"
13 
14 #include <CL/cl.h>
15 #include <iostream>
16 #include <fstream>
17 #include <sstream>
18 #include <cstddef>
19 #include <cmath>
20 #include <cstdlib>
21 
22 using namespace std;
23 
32 void sgemmInitialize (int matrixOrder, float* matrixA, float* matrixB, float * matrixC)
33 {
34  for (int i = 0; i < matrixOrder; i++)
35  {
36  for (int j = 0; j < matrixOrder; j++)
37  {
38  int index = i * matrixOrder + j;
39 
40  /* Keep the values in the range [-1, 1]. */
41  float randomeNumber = rand() / (float) RAND_MAX * 2 - 1;
42  matrixA[index] = randomeNumber;
43 
44  randomeNumber = rand() / (float) RAND_MAX * 2 - 1;
45  matrixB[index] = randomeNumber;
46 
47  randomeNumber = rand() / (float) RAND_MAX * 2 - 1;
48  matrixC[index] = randomeNumber;
49  }
50  }
51 }
52 
60 int main(void)
61 {
62  cl_context context = 0;
63  cl_command_queue commandQueue = 0;
64  cl_program program = 0;
65  cl_device_id device = 0;
66  cl_kernel kernel = 0;
67  const unsigned int numberOfMemoryObjects = 3;
68  cl_mem memoryObjects[numberOfMemoryObjects] = {0, 0, 0};
69  cl_int errorNumber;
70 
71  if (!createContext(&context))
72  {
73  cleanUpOpenCL(context, commandQueue, program, kernel, memoryObjects, numberOfMemoryObjects);
74  cerr << "Failed to create an OpenCL context. " << __FILE__ << ":"<< __LINE__ << endl;
75  return 1;
76  }
77 
78  if (!createCommandQueue(context, &commandQueue, &device))
79  {
80  cleanUpOpenCL(context, commandQueue, program, kernel, memoryObjects, numberOfMemoryObjects);
81  cerr << "Failed to create the OpenCL command queue. " << __FILE__ << ":"<< __LINE__ << endl;
82  return 1;
83  }
84 
85  if (!createProgram(context, device, "assets/sgemm.cl", &program))
86  {
87  cleanUpOpenCL(context, commandQueue, program, kernel, memoryObjects, numberOfMemoryObjects);
88  cerr << "Failed to create OpenCL program." << __FILE__ << ":"<< __LINE__ << endl;
89  return 1;
90  }
91 
92  kernel = clCreateKernel(program, "sgemm", &errorNumber);
93  if (!checkSuccess(errorNumber))
94  {
95  cleanUpOpenCL(context, commandQueue, program, kernel, memoryObjects, numberOfMemoryObjects);
96  cerr << "Failed to create OpenCL kernel. " << __FILE__ << ":"<< __LINE__ << endl;
97  return 1;
98  }
99 
100  /* Kernel variables. */
101  unsigned int matrixOrder = 2048;
102  float alpha = 1;
103  float beta = 0.1;
104 
105  /* Create the matrices. */
106  const size_t matrixSize = matrixOrder * matrixOrder;
107 
108  /* As all the matrices have the same size, the buffer size is common. */
109  size_t bufferSize = matrixSize * sizeof(float);
110 
111  /* Create buffers for the matrices used in the kernel. */
112  bool createMemoryObjectsSuccess = true;
113  memoryObjects[0] = clCreateBuffer(context, CL_MEM_READ_ONLY | CL_MEM_ALLOC_HOST_PTR, bufferSize, NULL, &errorNumber);
114  createMemoryObjectsSuccess &= checkSuccess(errorNumber);
115  memoryObjects[1] = clCreateBuffer(context, CL_MEM_READ_ONLY | CL_MEM_ALLOC_HOST_PTR, bufferSize, NULL, &errorNumber);
116  createMemoryObjectsSuccess &= checkSuccess(errorNumber);
117  memoryObjects[2] = clCreateBuffer(context, CL_MEM_READ_WRITE | CL_MEM_ALLOC_HOST_PTR, bufferSize, NULL, &errorNumber);
118  createMemoryObjectsSuccess &= checkSuccess(errorNumber);
119  if (!createMemoryObjectsSuccess)
120  {
121  cleanUpOpenCL(context, commandQueue, program, kernel, memoryObjects, numberOfMemoryObjects);
122  cerr << "Failed to create OpenCL buffers. " << __FILE__ << ":"<< __LINE__ << endl;
123  return 1;
124  }
125 
126  /* Map the input memory objects to a host side pointers. */
127  bool mapMemoryObjectsSuccess = true;
128  cl_float* matrixA = (cl_float*)clEnqueueMapBuffer(commandQueue, memoryObjects[0], CL_TRUE, CL_MAP_WRITE, 0, bufferSize, 0, NULL, NULL, &errorNumber);
129  mapMemoryObjectsSuccess &= checkSuccess(errorNumber);
130  cl_float* matrixB = (cl_float*)clEnqueueMapBuffer(commandQueue, memoryObjects[1], CL_TRUE, CL_MAP_WRITE, 0, bufferSize, 0, NULL, NULL, &errorNumber);
131  mapMemoryObjectsSuccess &= checkSuccess(errorNumber);
132  cl_float* matrixC = (cl_float*)clEnqueueMapBuffer(commandQueue, memoryObjects[2], CL_TRUE, CL_MAP_WRITE, 0, bufferSize, 0, NULL, NULL, &errorNumber);
133  mapMemoryObjectsSuccess &= checkSuccess(errorNumber);
134  if (!mapMemoryObjectsSuccess)
135  {
136  cleanUpOpenCL(context, commandQueue, program, kernel, memoryObjects, numberOfMemoryObjects);
137  cerr << "Mapping memory objects failed " << __FILE__ << ":"<< __LINE__ << endl;
138  return 1;
139  }
140 
141  /* Fill the matrices with random data. */
142  sgemmInitialize(matrixOrder, matrixA, matrixB, matrixC);
143 
144  /* Unmap the memory so we can pass it to the kernel. */
145  bool unmapMemoryObjectsSuccess = true;
146  unmapMemoryObjectsSuccess &= checkSuccess(clEnqueueUnmapMemObject(commandQueue, memoryObjects[0], matrixA, 0, NULL, NULL));
147  unmapMemoryObjectsSuccess &= checkSuccess(clEnqueueUnmapMemObject(commandQueue, memoryObjects[1], matrixB, 0, NULL, NULL));
148  unmapMemoryObjectsSuccess &= checkSuccess(clEnqueueUnmapMemObject(commandQueue, memoryObjects[2], matrixC, 0, NULL, NULL));
149  if (!unmapMemoryObjectsSuccess)
150  {
151  cleanUpOpenCL(context, commandQueue, program, kernel, memoryObjects, numberOfMemoryObjects);
152  cerr << "Unmapping memory objects failed " << __FILE__ << ":"<< __LINE__ << endl;
153  return 1;
154  }
155 
156  /* Setup kernel arguments. */
157  bool setKernelArgumentsSuccess = true;
158  setKernelArgumentsSuccess &= checkSuccess(clSetKernelArg(kernel, 0, sizeof(cl_mem), &memoryObjects[0]));
159  setKernelArgumentsSuccess &= checkSuccess(clSetKernelArg(kernel, 1, sizeof(cl_mem), &memoryObjects[1]));
160  setKernelArgumentsSuccess &= checkSuccess(clSetKernelArg(kernel, 2, sizeof(cl_mem), &memoryObjects[2]));
161  setKernelArgumentsSuccess &= checkSuccess(clSetKernelArg(kernel, 3, sizeof(cl_uint), &matrixOrder));
162  setKernelArgumentsSuccess &= checkSuccess(clSetKernelArg(kernel, 4, sizeof(cl_float), &alpha));
163  setKernelArgumentsSuccess &= checkSuccess(clSetKernelArg(kernel, 5, sizeof(cl_float), &beta));
164  if (!createMemoryObjectsSuccess)
165  {
166  cleanUpOpenCL(context, commandQueue, program, kernel, memoryObjects, numberOfMemoryObjects);
167  cerr << "Failed setting OpenCL kernel arguments. " << __FILE__ << ":"<< __LINE__ << endl;
168  return 1;
169  }
170 
171  /* An event to associate with the Kernel. Allows us to retrieve profiling information later. */
172  cl_event event = 0;
173 
174  /* [Kernel size] */
175  /*
176  * Each kernel outputs one element in matrixC,
177  * therefore the total number of work items must be the number of elements (matrixOrder * matrixOrder).
178  * To accomplish this we use a global worksize split into 2 dimensions both of matrixOrder size.
179  */
180  size_t globalWorksize[2] = {matrixOrder, matrixOrder};
181  /* [Kernel size] */
182 
183  /* Enqueue the kernel */
184  if (!checkSuccess(clEnqueueNDRangeKernel(commandQueue, kernel, 2, NULL, globalWorksize, NULL, 0, NULL, &event)))
185  {
186  cleanUpOpenCL(context, commandQueue, program, kernel, memoryObjects, numberOfMemoryObjects);
187  cerr << "Failed enqueuing the kernel. " << __FILE__ << ":"<< __LINE__ << endl;
188  return 1;
189  }
190 
191  /* Wait for kernel execution completion */
192  if (!checkSuccess(clFinish(commandQueue)))
193  {
194  cleanUpOpenCL(context, commandQueue, program, kernel, memoryObjects, numberOfMemoryObjects);
195  cerr << "Failed waiting for kernel execution to finish. " << __FILE__ << ":"<< __LINE__ << endl;
196  return 1;
197  }
198 
199  /* Print the profiling information for the event. */
200  printProfilingInfo(event);
201  /* Release the event object. */
202  if (!checkSuccess(clReleaseEvent(event)))
203  {
204  cleanUpOpenCL(context, commandQueue, program, kernel, memoryObjects, numberOfMemoryObjects);
205  cerr << "Failed releasing the event object. " << __FILE__ << ":"<< __LINE__ << endl;
206  return 1;
207  }
208 
209  /* Map the output to a host side pointer. */
210  matrixC = (cl_float*)clEnqueueMapBuffer(commandQueue, memoryObjects[2], CL_TRUE, CL_MAP_READ, 0, bufferSize, 0, NULL, NULL, &errorNumber);
211  if (!checkSuccess(errorNumber))
212  {
213  cleanUpOpenCL(context, commandQueue, program, kernel, memoryObjects, numberOfMemoryObjects);
214  cerr << "Mapping memory objects failed " << __FILE__ << ":"<< __LINE__ << endl;
215  return 1;
216  }
217 
218  /* Do something with the output (matrixC) here. */
219 
220  /* Unmap the output. */
221  if (!checkSuccess(clEnqueueUnmapMemObject(commandQueue, memoryObjects[2], matrixC, 0, NULL, NULL)))
222  {
223  cleanUpOpenCL(context, commandQueue, program, kernel, memoryObjects, numberOfMemoryObjects);
224  cerr << "Unmapping memory objects failed " << __FILE__ << ":"<< __LINE__ << endl;
225  return 1;
226  }
227 
228  /* Release OpenCL objects. */
229  cleanUpOpenCL(context, commandQueue, program, kernel, memoryObjects, numberOfMemoryObjects);
230 
231  return 0;
232 }