-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathimage_processing_opencl.cpp
executable file
·320 lines (285 loc) · 11.4 KB
/
image_processing_opencl.cpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
#include <iostream>
#include <fstream>
#include <cstddef>
#include <cmath>
#include <vector>
#include <string>
#include <cassert>
#ifdef ON_VM
#include <CL/cl2.hpp>
#else
#include <CL/cl.hpp>
#endif
extern "C"{
#include "ppm.h"
}
using namespace std;
using namespace cl;
typedef struct {
float red,green,blue;
} AccuratePixel;
typedef struct {
int x, y;
AccuratePixel *data;
} AccurateImage;
void errorAndExit(string error_message) {
cerr << error_message << endl;
exit(1);
}
AccurateImage *convertToAccurateImage(PPMImage *image) {
AccurateImage *imageAccurate;
imageAccurate = (AccurateImage *)malloc(sizeof(AccurateImage));
imageAccurate->x = image->x;
imageAccurate->y = image->y;
std::size_t size = image->x * image->y * sizeof(AccuratePixel);
imageAccurate->data = (AccuratePixel *)malloc(size);
for(int i = 0; i < image->x * image->y; i++) {
imageAccurate->data[i].red = (float) image->data[i].red;
imageAccurate->data[i].green = (float) image->data[i].green;
imageAccurate->data[i].blue = (float) image->data[i].blue;
}
return imageAccurate;
}
AccurateImage *copyAccurateImage(AccurateImage *image, bool allocate_data, bool copy_pixels) {
// Make a copy
AccurateImage *imageAccurate;
imageAccurate = (AccurateImage *)malloc(sizeof(AccurateImage));
imageAccurate->x = image->x;
imageAccurate->y = image->y;
std::size_t size = image->x * image->y * sizeof(AccuratePixel);
if(allocate_data){
imageAccurate->data = (AccuratePixel *)malloc(size);
if(copy_pixels){
memcpy(imageAccurate->data, image->data, size);
}
}
return imageAccurate;
}
PPMImage * convertToPPPMImage(AccurateImage *imageIn) {
PPMImage *imageOut;
imageOut = (PPMImage *)malloc(sizeof(PPMImage));
imageOut->data = (PPMPixel*)malloc(imageIn->x * imageIn->y * sizeof(PPMPixel));
imageOut->x = imageIn->x;
imageOut->y = imageIn->y;
for(int i = 0; i < imageIn->x * imageIn->y; i++) {
imageOut->data[i].red = imageIn->data[i].red;
imageOut->data[i].green = imageIn->data[i].green;
imageOut->data[i].blue = imageIn->data[i].blue;
}
return imageOut;
}
// Perform the final step, and return it as ppm.
PPMImage * imageDifference(AccurateImage *imageInSmall, AccurateImage *imageInLarge) {
PPMImage *imageOut;
imageOut = (PPMImage *)malloc(sizeof(PPMImage));
imageOut->data = (PPMPixel*)malloc(imageInSmall->x * imageInSmall->y * sizeof(PPMPixel));
imageOut->x = imageInSmall->x;
imageOut->y = imageInSmall->y;
for(int i = 0; i < imageInSmall->x * imageInSmall->y; i++) {
float value = (imageInLarge->data[i].red - imageInSmall->data[i].red);
if(value > 255)
imageOut->data[i].red = 255;
else if (value < -1.0) {
value = 257.0+value;
if(value > 255)
imageOut->data[i].red = 255;
else
imageOut->data[i].red = floor(value);
} else if (value > -1.0 && value < 0.0) {
imageOut->data[i].red = 0;
} else {
imageOut->data[i].red = floor(value);
}
value = (imageInLarge->data[i].green - imageInSmall->data[i].green);
if(value > 255)
imageOut->data[i].green = 255;
else if (value < -1.0) {
value = 257.0+value;
if(value > 255)
imageOut->data[i].green = 255;
else
imageOut->data[i].green = floor(value);
} else if (value > -1.0 && value < 0.0) {
imageOut->data[i].green = 0;
} else {
imageOut->data[i].green = floor(value);
}
value = (imageInLarge->data[i].blue - imageInSmall->data[i].blue);
if(value > 255)
imageOut->data[i].blue = 255;
else if (value < -1.0) {
value = 257.0+value;
if(value > 255)
imageOut->data[i].blue = 255;
else
imageOut->data[i].blue = floor(value);
} else if (value > -1.0 && value < 0.0) {
imageOut->data[i].blue = 0;
} else {
imageOut->data[i].blue = floor(value);
}
}
return imageOut;
}
class OpenClBlur{
public:
OpenClBlur() {
// choose a platform containing this string if available
string preferred_platform = "Intel";
// select platform
std::vector<Platform> all_platforms;
Platform::get(&all_platforms);
if (all_platforms.empty()){
errorAndExit("No platforms found.\n");
}
Platform default_platform = all_platforms[0];
for(const auto& platform: all_platforms){
cerr << "Found platform: " << platform.getInfo<CL_PLATFORM_NAME>() << endl;
if(platform.getInfo<CL_PLATFORM_NAME>().find(preferred_platform)!=-1){
default_platform = platform;
}
}
cerr << "Using platform: " << default_platform.getInfo<CL_PLATFORM_NAME>() << "\n";
// select device
std::vector<Device> all_devices;
default_platform.getDevices(CL_DEVICE_TYPE_ALL, &all_devices);
if (all_devices.empty()){
errorAndExit("No devices found.");
}
Device default_device = all_devices[0];
for(const auto& device: all_devices){
cerr << "Found device: " << device.getInfo<CL_DEVICE_NAME>() << endl;
}
cerr << "Using device: " << default_device.getInfo<CL_DEVICE_NAME>() << "\n";
device = default_device;
// create context
context = Context({device});
// create command queue with profiling enabled
queue = CommandQueue(context, device, CL_QUEUE_PROFILING_ENABLE);
// load opencl program and compile it
Program::Sources sources;
ifstream ifs("image_processing_opencl.cl");
if (ifs.fail()){
errorAndExit("Failed to open image_processing_opencl.");
}
string kernel_source { istreambuf_iterator<char>(ifs), istreambuf_iterator<char>() };
sources.push_back({kernel_source.c_str(), kernel_source.size()});
program = Program(context, sources);
if (program.build({device}) != CL_SUCCESS){
errorAndExit("Error building program!");
}
// create kernel
naive_kernel = Kernel(program, "naive_kernel");
}
AccurateImage * blur(AccurateImage *image, int size){
// perform blur operation
// allocate two buffers:
// use sizeof(AccuratePixel) instead of sizeof(float3) because sizeof(float3) = sizeof(float4)
// even though vload3 and vstore3 can operate on packed arrays
std::size_t bufferSize = image->x * image->y * sizeof(AccuratePixel);
Buffer buffer1(context, CL_MEM_READ_WRITE|CL_MEM_ALLOC_HOST_PTR, bufferSize);
Buffer buffer2(context, CL_MEM_READ_WRITE|CL_MEM_ALLOC_HOST_PTR, bufferSize);
// create a new Event so we know how long it took
events.emplace_back(make_pair("copy buffer to image", Event()));
// copy image to buffer
queue.enqueueWriteBuffer(buffer1, false, 0, bufferSize, image->data, nullptr, &events.back().second);
// 5 blur iterations
blurIteration(image, buffer1, buffer2, size);
blurIteration(image, buffer2, buffer1, size);
blurIteration(image, buffer1, buffer2, size);
blurIteration(image, buffer2, buffer1, size);
blurIteration(image, buffer1, buffer2, size);
// create new empty image
AccurateImage * result = copyAccurateImage(image, false, false);
// push back new event
events.emplace_back(make_pair("map buffer in memory", Event()));
// map buffer in memory - avoids having to copy again
result->data = (AccuratePixel *)queue.enqueueMapBuffer(buffer2,CL_FALSE,CL_MAP_READ, 0, bufferSize, nullptr, &events.back().second);
return result;
}
void finish(){
// finish execution and print events
queue.finish();
for(auto [s, e]: events){
printEvent(s, e);
}
events.clear();
}
private:
Device device;
Context context;
CommandQueue queue;
Program program;
Kernel naive_kernel;
std::vector<pair<string, Event>> events;
void printEvent(string s, Event& evt){
// ensure the event has completed
assert(evt.getInfo<CL_EVENT_COMMAND_EXECUTION_STATUS>()==CL_COMPLETE);
cl_ulong queued = evt.getProfilingInfo<CL_PROFILING_COMMAND_QUEUED>();
cl_ulong submit = evt.getProfilingInfo<CL_PROFILING_COMMAND_SUBMIT>();
cl_ulong start = evt.getProfilingInfo<CL_PROFILING_COMMAND_START>();
cl_ulong end = evt.getProfilingInfo<CL_PROFILING_COMMAND_END>();
cerr << "event: " << s << endl;
cerr << "queue time: " << submit-queued << "ns" << endl;
cerr << "run time: " << end-start << "ns" << endl;
cerr << "total time: " << end-queued << "ns" << endl;
}
void blurIteration(AccurateImage *image, Buffer& src, Buffer& dst, cl_int size){
// enqueue the OpenCL kernel naive_kernel
// create Event for profiling
events.emplace_back(make_pair("naive_kernel", Event()));
// set call arguments
naive_kernel.setArg(0, src);
naive_kernel.setArg(1, dst);
naive_kernel.setArg(2, image->x);
naive_kernel.setArg(3, image->y);
naive_kernel.setArg(4, size);
// call 2D kernel
queue.enqueueNDRangeKernel(
naive_kernel, // kernel to queue
NullRange, // use no offset
NDRange(image->y), // 2D kernel
NDRange(64), // use no local range
nullptr, // we use the queue in sequential mode so we don't have to specify Events that need to finish before
&events.back().second // Event to use for profiling
);
}
};
int main(int argc, char** argv){
// read image
PPMImage *image;
OpenClBlur blur;
// select where to read the image from
if(argc > 1) {
// from file for debugging (with argument)
image = readPPM("flower.ppm");
} else {
// from stdin for cmb
image = readStreamPPM(stdin);
}
AccurateImage *imageAccurate = convertToAccurateImage(image);
// apply blur
AccurateImage * imageAccurate2_tiny = blur.blur(imageAccurate, 2);
AccurateImage * imageAccurate2_small = blur.blur(imageAccurate, 3);
AccurateImage * imageAccurate2_medium = blur.blur(imageAccurate, 5);
AccurateImage * imageAccurate2_large = blur.blur(imageAccurate, 8);
// an intermediate step can be saved for debugging like this
// writePPM("imageAccurate2_tiny.ppm", convertToPPPMImage(imageAccurate2_tiny));
// finish OpenCl execution and print events
// we do this only once at the end in order to keep the OpenCL queue filled
blur.finish();
// calculate difference
PPMImage *final_tiny = imageDifference(imageAccurate2_tiny, imageAccurate2_small);
PPMImage *final_small = imageDifference(imageAccurate2_small, imageAccurate2_medium);
PPMImage *final_medium = imageDifference(imageAccurate2_medium, imageAccurate2_large);
// Save the images.
if(argc > 1) {
writePPM("flower_tiny.ppm", final_tiny);
writePPM("flower_small.ppm", final_small);
writePPM("flower_medium.ppm", final_medium);
} else {
writeStreamPPM(stdout, final_tiny);
writeStreamPPM(stdout, final_small);
writeStreamPPM(stdout, final_medium);
}
}