32 void sgemmInitialize (
int matrixOrder,
float* matrixA,
float* matrixB,
float * matrixC)
34 for (
int i = 0; i < matrixOrder; i++)
36 for (
int j = 0; j < matrixOrder; j++)
38 int index = i * matrixOrder + j;
41 float randomeNumber = rand() / (float) RAND_MAX * 2 - 1;
42 matrixA[index] = randomeNumber;
44 randomeNumber = rand() / (float) RAND_MAX * 2 - 1;
45 matrixB[index] = randomeNumber;
47 randomeNumber = rand() / (float) RAND_MAX * 2 - 1;
48 matrixC[index] = randomeNumber;
62 cl_context context = 0;
63 cl_command_queue commandQueue = 0;
64 cl_program program = 0;
65 cl_device_id device = 0;
67 const unsigned int numberOfMemoryObjects = 3;
68 cl_mem memoryObjects[numberOfMemoryObjects] = {0, 0, 0};
73 cleanUpOpenCL(context, commandQueue, program, kernel, memoryObjects, numberOfMemoryObjects);
74 cerr <<
"Failed to create an OpenCL context. " << __FILE__ <<
":"<< __LINE__ << endl;
80 cleanUpOpenCL(context, commandQueue, program, kernel, memoryObjects, numberOfMemoryObjects);
81 cerr <<
"Failed to create the OpenCL command queue. " << __FILE__ <<
":"<< __LINE__ << endl;
85 if (!
createProgram(context, device,
"assets/sgemm.cl", &program))
87 cleanUpOpenCL(context, commandQueue, program, kernel, memoryObjects, numberOfMemoryObjects);
88 cerr <<
"Failed to create OpenCL program." << __FILE__ <<
":"<< __LINE__ << endl;
92 kernel = clCreateKernel(program,
"sgemm", &errorNumber);
95 cleanUpOpenCL(context, commandQueue, program, kernel, memoryObjects, numberOfMemoryObjects);
96 cerr <<
"Failed to create OpenCL kernel. " << __FILE__ <<
":"<< __LINE__ << endl;
101 unsigned int matrixOrder = 2048;
106 const size_t matrixSize = matrixOrder * matrixOrder;
109 size_t bufferSize = matrixSize *
sizeof(float);
112 bool createMemoryObjectsSuccess =
true;
113 memoryObjects[0] = clCreateBuffer(context, CL_MEM_READ_ONLY | CL_MEM_ALLOC_HOST_PTR, bufferSize, NULL, &errorNumber);
114 createMemoryObjectsSuccess &=
checkSuccess(errorNumber);
115 memoryObjects[1] = clCreateBuffer(context, CL_MEM_READ_ONLY | CL_MEM_ALLOC_HOST_PTR, bufferSize, NULL, &errorNumber);
116 createMemoryObjectsSuccess &=
checkSuccess(errorNumber);
117 memoryObjects[2] = clCreateBuffer(context, CL_MEM_READ_WRITE | CL_MEM_ALLOC_HOST_PTR, bufferSize, NULL, &errorNumber);
118 createMemoryObjectsSuccess &=
checkSuccess(errorNumber);
119 if (!createMemoryObjectsSuccess)
121 cleanUpOpenCL(context, commandQueue, program, kernel, memoryObjects, numberOfMemoryObjects);
122 cerr <<
"Failed to create OpenCL buffers. " << __FILE__ <<
":"<< __LINE__ << endl;
127 bool mapMemoryObjectsSuccess =
true;
128 cl_float* matrixA = (cl_float*)clEnqueueMapBuffer(commandQueue, memoryObjects[0], CL_TRUE, CL_MAP_WRITE, 0, bufferSize, 0, NULL, NULL, &errorNumber);
130 cl_float* matrixB = (cl_float*)clEnqueueMapBuffer(commandQueue, memoryObjects[1], CL_TRUE, CL_MAP_WRITE, 0, bufferSize, 0, NULL, NULL, &errorNumber);
132 cl_float* matrixC = (cl_float*)clEnqueueMapBuffer(commandQueue, memoryObjects[2], CL_TRUE, CL_MAP_WRITE, 0, bufferSize, 0, NULL, NULL, &errorNumber);
134 if (!mapMemoryObjectsSuccess)
136 cleanUpOpenCL(context, commandQueue, program, kernel, memoryObjects, numberOfMemoryObjects);
137 cerr <<
"Mapping memory objects failed " << __FILE__ <<
":"<< __LINE__ << endl;
145 bool unmapMemoryObjectsSuccess =
true;
146 unmapMemoryObjectsSuccess &=
checkSuccess(clEnqueueUnmapMemObject(commandQueue, memoryObjects[0], matrixA, 0, NULL, NULL));
147 unmapMemoryObjectsSuccess &=
checkSuccess(clEnqueueUnmapMemObject(commandQueue, memoryObjects[1], matrixB, 0, NULL, NULL));
148 unmapMemoryObjectsSuccess &=
checkSuccess(clEnqueueUnmapMemObject(commandQueue, memoryObjects[2], matrixC, 0, NULL, NULL));
149 if (!unmapMemoryObjectsSuccess)
151 cleanUpOpenCL(context, commandQueue, program, kernel, memoryObjects, numberOfMemoryObjects);
152 cerr <<
"Unmapping memory objects failed " << __FILE__ <<
":"<< __LINE__ << endl;
157 bool setKernelArgumentsSuccess =
true;
158 setKernelArgumentsSuccess &=
checkSuccess(clSetKernelArg(kernel, 0,
sizeof(cl_mem), &memoryObjects[0]));
159 setKernelArgumentsSuccess &=
checkSuccess(clSetKernelArg(kernel, 1,
sizeof(cl_mem), &memoryObjects[1]));
160 setKernelArgumentsSuccess &=
checkSuccess(clSetKernelArg(kernel, 2,
sizeof(cl_mem), &memoryObjects[2]));
161 setKernelArgumentsSuccess &=
checkSuccess(clSetKernelArg(kernel, 3,
sizeof(cl_uint), &matrixOrder));
162 setKernelArgumentsSuccess &=
checkSuccess(clSetKernelArg(kernel, 4,
sizeof(cl_float), &alpha));
163 setKernelArgumentsSuccess &=
checkSuccess(clSetKernelArg(kernel, 5,
sizeof(cl_float), &beta));
164 if (!createMemoryObjectsSuccess)
166 cleanUpOpenCL(context, commandQueue, program, kernel, memoryObjects, numberOfMemoryObjects);
167 cerr <<
"Failed setting OpenCL kernel arguments. " << __FILE__ <<
":"<< __LINE__ << endl;
180 size_t globalWorksize[2] = {matrixOrder, matrixOrder};
184 if (!
checkSuccess(clEnqueueNDRangeKernel(commandQueue, kernel, 2, NULL, globalWorksize, NULL, 0, NULL, &event)))
186 cleanUpOpenCL(context, commandQueue, program, kernel, memoryObjects, numberOfMemoryObjects);
187 cerr <<
"Failed enqueuing the kernel. " << __FILE__ <<
":"<< __LINE__ << endl;
194 cleanUpOpenCL(context, commandQueue, program, kernel, memoryObjects, numberOfMemoryObjects);
195 cerr <<
"Failed waiting for kernel execution to finish. " << __FILE__ <<
":"<< __LINE__ << endl;
204 cleanUpOpenCL(context, commandQueue, program, kernel, memoryObjects, numberOfMemoryObjects);
205 cerr <<
"Failed releasing the event object. " << __FILE__ <<
":"<< __LINE__ << endl;
210 matrixC = (cl_float*)clEnqueueMapBuffer(commandQueue, memoryObjects[2], CL_TRUE, CL_MAP_READ, 0, bufferSize, 0, NULL, NULL, &errorNumber);
213 cleanUpOpenCL(context, commandQueue, program, kernel, memoryObjects, numberOfMemoryObjects);
214 cerr <<
"Mapping memory objects failed " << __FILE__ <<
":"<< __LINE__ << endl;
221 if (!
checkSuccess(clEnqueueUnmapMemObject(commandQueue, memoryObjects[2], matrixC, 0, NULL, NULL)))
223 cleanUpOpenCL(context, commandQueue, program, kernel, memoryObjects, numberOfMemoryObjects);
224 cerr <<
"Unmapping memory objects failed " << __FILE__ <<
":"<< __LINE__ << endl;
229 cleanUpOpenCL(context, commandQueue, program, kernel, memoryObjects, numberOfMemoryObjects);