Mali OpenCL SDK v1.1.0
 All Classes Files Functions Variables Macros Pages
sobel_no_vectors.cl
Go to the documentation of this file.
1 /*
2  * This confidential and proprietary software may be used only as
3  * authorised by a licensing agreement from ARM Limited
4  * (C) COPYRIGHT 2013 ARM Limited
5  * ALL RIGHTS RESERVED
6  * The entire notice above must be reproduced on all authorised
7  * copies and copies may only be made to the extent permitted
8  * by a licensing agreement from ARM Limited.
9  */
10 
18 __kernel void sobel_no_vectors(__global const uchar* restrict inputImage,
19  const int width,
20  __global char* restrict outputImageDX,
21  __global char* restrict outputImageDY)
22 {
23  /* [Kernel size] */
24  /*
25  * Each kernel calculates a single output pixels in the same row.
26  * column is in the range [0, width].
27  * row is in the range [0, height].
28  */
29  const int column = get_global_id(0);
30  const int row = get_global_id(1);
31 
32  /* Offset calculates the position in the linear data for the row and the column. */
33  const int offset = row * width + column;
34  /* [Kernel size] */
35 
36  /* [Load row] */
37  /*
38  * First row of input.
39  * Load 1 value for leftLoad, middleLoad and rightLoad.
40  */
41  uchar leftLoad = *(inputImage + (offset + 0));
42  uchar middleLoad = *(inputImage + (offset + 1));
43  uchar rightLoad = *(inputImage + (offset + 2));
44  /* [Load row] */
45 
46  /* [Convert data] */
47  /*
48  * Convert the data from unsigned chars to shorts (8-bit unsigned to 16-bit signed).
49  * The calculations can overflow 8-bits so we require larger intermediate storage.
50  * Additionally, the values can become negative so we need a signed type.
51  */
52  short leftData = convert_short(leftLoad);
53  short middleData = convert_short(middleLoad);
54  short rightData = convert_short(rightLoad);
55  /* [Convert data] */
56 
57  /* [Calculation] */
58  /*
59  * Calculate the results for the first row.
60  * Looking at the Sobel masks above for the first line of input,
61  * the dX calculation is the sum of 1 * leftData, 0 * middleData, and -1 * rightData.
62  * The dY calculation is the sum of 1 * leftData, 2 * middleData, and 1 * rightData.
63  * This is what is being calculated below, except we have removed the
64  * unnecessary calculations (multiplications by 1 or 0).
65  * This pattern repeats for the other 2 rows of data.
66  */
67  short dx = rightData - leftData;
68  short dy = rightData + leftData + middleData * (short)2;
69  /* [Calculation] */
70 
71  /*
72  * Second row of input.
73  * By adding the 'width * 1' to the offset we get the next row of data at the same column position.
74  * middleData is not loaded because it is not used in any of the calculations.
75  */
76  leftLoad = *(inputImage + (offset + width * 1 + 0));
77  rightLoad = *(inputImage + (offset + width * 1 + 2));
78 
79  leftData = convert_short(leftLoad);
80  rightData = convert_short(rightLoad);
81 
82  /*
83  * Calculate the results for the second row.
84  * The dX calculation is the sum of -2 * leftData, 0 * middleData, and -2 * rightData.
85  * There is no dY calculation to do: sum of 0 * leftData, 0 * middleData, and 0 * rightData.
86  */
87  dx += (rightData - leftData) * (short)2;
88 
89  /* Third row of input. */
90  leftLoad = *(inputImage + (offset + width * 2 + 0));
91  middleLoad = *(inputImage + (offset + width * 2 + 1));
92  rightLoad = *(inputImage + (offset + width * 2 + 2));
93 
94  leftData = convert_short(leftLoad);
95  middleData = convert_short(middleLoad);
96  rightData = convert_short(rightLoad);
97 
98  /*
99  * Calculate the results for the third row.
100  * The dX calculation is the sum of -1 * leftData, 0 * middleData, and -1 * rightData.
101  * The dY calculation is the sum of -1 * leftData, -2 * middleData, and -1 * rightData.
102  */
103  dx += rightData - leftData;
104  dy -= rightData + leftData + middleData * (short)2;
105 
106  /* [Store] */
107  /*
108  * Store the results.
109  * The range of outputs from our Sobel calculations is [-1020, 1020].
110  * In order to output this as an 8-bit signed char we must divide it by 8 (or shift right 3 times).
111  * This gives the range [-128, 128]. Depending on what type of output you require,
112  * (signed/unsigned, seperate/combined gradients) it is possible to do more of the calculations on the GPU using OpenCL.
113  * In this sample we're assuming that the application requires signed uncombined gradient outputs.
114  */
115  *(outputImageDX + offset + width + 1) = convert_char(dx >> 3);
116  *(outputImageDY + offset + width + 1) = convert_char(dy >> 3);
117  /* [Store] */
118 }