Mali OpenCL SDK v1.1.0
 All Classes Files Functions Variables Macros Pages
hello_world_vector.cpp
Go to the documentation of this file.
1 /*
2  * This confidential and proprietary software may be used only as
3  * authorised by a licensing agreement from ARM Limited
4  * (C) COPYRIGHT 2013 ARM Limited
5  * ALL RIGHTS RESERVED
6  * The entire notice above must be reproduced on all authorised
7  * copies and copies may only be made to the extent permitted
8  * by a licensing agreement from ARM Limited.
9  */
10 
11 #include "common.h"
12 #include "image.h"
13 
14 #include <CL/cl.h>
15 #include <iostream>
16 
17 using namespace std;
18 
26 int main(void)
27 {
28  cl_context context = 0;
29  cl_command_queue commandQueue = 0;
30  cl_program program = 0;
31  cl_device_id device = 0;
32  cl_kernel kernel = 0;
33  int numberOfMemoryObjects = 3;
34  cl_mem memoryObjects[3] = {0, 0, 0};
35  cl_int errorNumber;
36 
37  if (!createContext(&context))
38  {
39  cleanUpOpenCL(context, commandQueue, program, kernel, memoryObjects, numberOfMemoryObjects);
40  cerr << "Failed to create an OpenCL context. " << __FILE__ << ":"<< __LINE__ << endl;
41  return 1;
42  }
43 
44  if (!createCommandQueue(context, &commandQueue, &device))
45  {
46  cleanUpOpenCL(context, commandQueue, program, kernel, memoryObjects, numberOfMemoryObjects);
47  cerr << "Failed to create the OpenCL command queue. " << __FILE__ << ":"<< __LINE__ << endl;
48  return 1;
49  }
50 
51  if (!createProgram(context, device, "assets/hello_world_vector.cl", &program))
52  {
53  cleanUpOpenCL(context, commandQueue, program, kernel, memoryObjects, numberOfMemoryObjects);
54  cerr << "Failed to create OpenCL program." << __FILE__ << ":"<< __LINE__ << endl;
55  return 1;
56  }
57 
58  kernel = clCreateKernel(program, "hello_world_vector", &errorNumber);
59  if (!checkSuccess(errorNumber))
60  {
61  cleanUpOpenCL(context, commandQueue, program, kernel, memoryObjects, numberOfMemoryObjects);
62  cerr << "Failed to create OpenCL kernel. " << __FILE__ << ":"<< __LINE__ << endl;
63  return 1;
64  }
65 
66  /* [Query preferred vector width] */
67  /*
68  * Query the device to find out it's prefered integer vector width.
69  * Although we are only printing the value here, it can be used to select between
70  * different versions of a kernel.
71  */
72  cl_uint integerVectorWidth;
73  clGetDeviceInfo(device, CL_DEVICE_PREFERRED_VECTOR_WIDTH_INT, sizeof(cl_uint), &integerVectorWidth, NULL);
74  cout << "Prefered vector width for integers: " << integerVectorWidth << endl;
75  /* [Query preferred vector width] */
76 
77  /* Number of elements in the arrays of input and output data. */
78  cl_int arraySize = 1000000;
79 
80  /* The buffers are the size of the arrays. */
81  size_t bufferSize = arraySize * sizeof(cl_int);
82 
83  /*
84  * Ask the OpenCL implementation to allocate buffers for the data.
85  * We ask the OpenCL implemenation to allocate memory rather than allocating
86  * it on the CPU to avoid having to copy the data later.
87  * The read/write flags relate to accesses to the memory from within the kernel.
88  */
89  bool createMemoryObjectsSuccess = true;
90  /* [Create buffer] */
91  memoryObjects[0] = clCreateBuffer(context, CL_MEM_READ_ONLY | CL_MEM_ALLOC_HOST_PTR, bufferSize, NULL, &errorNumber);
92  createMemoryObjectsSuccess &= checkSuccess(errorNumber);
93 
94  memoryObjects[1] = clCreateBuffer(context, CL_MEM_READ_ONLY | CL_MEM_ALLOC_HOST_PTR, bufferSize, NULL, &errorNumber);
95  createMemoryObjectsSuccess &= checkSuccess(errorNumber);
96 
97  memoryObjects[2] = clCreateBuffer(context, CL_MEM_WRITE_ONLY | CL_MEM_ALLOC_HOST_PTR, bufferSize, NULL, &errorNumber);
98  createMemoryObjectsSuccess &= checkSuccess(errorNumber);
99  /* [Create buffer] */
100 
101  if (!createMemoryObjectsSuccess)
102  {
103  cleanUpOpenCL(context, commandQueue, program, kernel, memoryObjects, numberOfMemoryObjects);
104  cerr << "Failed to create OpenCL buffer. " << __FILE__ << ":"<< __LINE__ << endl;
105  return 1;
106  }
107 
108  /* Map the memory buffers created by the OpenCL implementation to pointers so we can access them on the CPU. */
109  bool mapMemoryObjectsSuccess = true;
110 
111  /* [Map buffer] */
112  cl_int* inputA = (cl_int*)clEnqueueMapBuffer(commandQueue, memoryObjects[0], CL_TRUE, CL_MAP_WRITE, 0, bufferSize, 0, NULL, NULL, &errorNumber);
113  mapMemoryObjectsSuccess &= checkSuccess(errorNumber);
114 
115  cl_int* inputB = (cl_int*)clEnqueueMapBuffer(commandQueue, memoryObjects[1], CL_TRUE, CL_MAP_WRITE, 0, bufferSize, 0, NULL, NULL, &errorNumber);
116  mapMemoryObjectsSuccess &= checkSuccess(errorNumber);
117  /* [Map buffer] */
118 
119  if (!mapMemoryObjectsSuccess)
120  {
121  cleanUpOpenCL(context, commandQueue, program, kernel, memoryObjects, numberOfMemoryObjects);
122  cerr << "Failed to map buffer. " << __FILE__ << ":"<< __LINE__ << endl;
123  return 1;
124  }
125 
126  /* [Initialize the input data] */
127  for (int i = 0; i < arraySize; i++)
128  {
129  inputA[i] = i;
130  inputB[i] = i;
131  }
132  /* [Initialize the input data] */
133 
134  /*
135  * Unmap the memory objects as we are finished using them from the CPU side.
136  * We unmap the memory because otherwise:
137  * - reads and writes to that memory from inside a kernel on the OpenCL side are undefined,
138  * - the OpenCL implementation cannot free the memory when it is finished.
139  */
140  /* [Un-map buffer] */
141  if (!checkSuccess(clEnqueueUnmapMemObject(commandQueue, memoryObjects[0], inputA, 0, NULL, NULL)))
142  {
143  cleanUpOpenCL(context, commandQueue, program, kernel, memoryObjects, numberOfMemoryObjects);
144  cerr << "Unmapping memory objects failed " << __FILE__ << ":"<< __LINE__ << endl;
145  return 1;
146  }
147 
148  if (!checkSuccess(clEnqueueUnmapMemObject(commandQueue, memoryObjects[1], inputB, 0, NULL, NULL)))
149  {
150  cleanUpOpenCL(context, commandQueue, program, kernel, memoryObjects, numberOfMemoryObjects);
151  cerr << "Unmapping memory objects failed " << __FILE__ << ":"<< __LINE__ << endl;
152  return 1;
153  }
154  /* [Un-map buffer] */
155 
156  /* Set the kernel arguments. */
157  bool setKernelArgumentsSuccess = true;
158  setKernelArgumentsSuccess &= checkSuccess(clSetKernelArg(kernel, 0, sizeof(cl_mem), &memoryObjects[0]));
159  setKernelArgumentsSuccess &= checkSuccess(clSetKernelArg(kernel, 1, sizeof(cl_mem), &memoryObjects[1]));
160  setKernelArgumentsSuccess &= checkSuccess(clSetKernelArg(kernel, 2, sizeof(cl_mem), &memoryObjects[2]));
161 
162  if (!setKernelArgumentsSuccess)
163  {
164  cleanUpOpenCL(context, commandQueue, program, kernel, memoryObjects, numberOfMemoryObjects);
165  cerr << "Failed setting OpenCL kernel arguments. " << __FILE__ << ":"<< __LINE__ << endl;
166  return 1;
167  }
168 
169  /* An event to associate with the Kernel. Allows us to retreive profiling information later. */
170  cl_event event = 0;
171 
172  /* [Global work size] */
173  /*
174  * Each instance of our OpenCL kernel now operates on 4 elements of each array so the number of
175  * instances needed is the number of elements in the array divided by 4.
176  */
177  size_t globalWorksize[1] = {arraySize / 4};
178  /* Enqueue the kernel */
179  if (!checkSuccess(clEnqueueNDRangeKernel(commandQueue, kernel, 1, NULL, globalWorksize, NULL, 0, NULL, &event)))
180  {
181  cleanUpOpenCL(context, commandQueue, program, kernel, memoryObjects, numberOfMemoryObjects);
182  cerr << "Failed enqueuing the kernel. " << __FILE__ << ":"<< __LINE__ << endl;
183  return 1;
184  }
185  /* [Global work size] */
186 
187  /* Wait for kernel execution completion. */
188  if (!checkSuccess(clFinish(commandQueue)))
189  {
190  cleanUpOpenCL(context, commandQueue, program, kernel, memoryObjects, numberOfMemoryObjects);
191  cerr << "Failed waiting for kernel execution to finish. " << __FILE__ << ":"<< __LINE__ << endl;
192  return 1;
193  }
194 
195  /* Print the profiling information for the event. */
196  printProfilingInfo(event);
197  /* Release the event object. */
198  if (!checkSuccess(clReleaseEvent(event)))
199  {
200  cleanUpOpenCL(context, commandQueue, program, kernel, memoryObjects, numberOfMemoryObjects);
201  cerr << "Failed releasing the event object. " << __FILE__ << ":"<< __LINE__ << endl;
202  return 1;
203  }
204 
205  /* Get a pointer to the output data. */
206  cl_int* output = (cl_int*)clEnqueueMapBuffer(commandQueue, memoryObjects[2], CL_TRUE, CL_MAP_READ, 0, bufferSize, 0, NULL, NULL, &errorNumber);
207  if (!checkSuccess(errorNumber))
208  {
209  cleanUpOpenCL(context, commandQueue, program, kernel, memoryObjects, numberOfMemoryObjects);
210  cerr << "Failed to map buffer. " << __FILE__ << ":"<< __LINE__ << endl;
211  return 1;
212  }
213 
214  /* Uncomment the following block to print results. */
215  /*
216  for (int i = 0; i < arraySize; i++)
217  {
218  cout << "i = " << i << ", output = " << output[i] << "\n";
219  }
220  */
221 
222  /* Unmap the memory object as we are finished using them from the CPU side. */
223  if (!checkSuccess(clEnqueueUnmapMemObject(commandQueue, memoryObjects[2], output, 0, NULL, NULL)))
224  {
225  cleanUpOpenCL(context, commandQueue, program, kernel, memoryObjects, numberOfMemoryObjects);
226  cerr << "Unmapping memory objects failed " << __FILE__ << ":"<< __LINE__ << endl;
227  return 1;
228  }
229 
230  /* Release OpenCL objects. */
231  cleanUpOpenCL(context, commandQueue, program, kernel, memoryObjects, numberOfMemoryObjects);
232 }