From a4d0a5dca8c1246c6f7858b6e6a8fa5b607b6033 Mon Sep 17 00:00:00 2001 From: karl Date: Wed, 16 Dec 2020 12:17:58 +0100 Subject: [PATCH] OpenCL mode is working! --- gol.cl | 45 +++++++++++++++++++-------------------------- main.cpp | 36 +++++++++++++++++++++++++----------- 2 files changed, 44 insertions(+), 37 deletions(-) diff --git a/gol.cl b/gol.cl index 13e0dc1..d1f9358 100644 --- a/gol.cl +++ b/gol.cl @@ -1,37 +1,30 @@ -void kernel generation(global const int *previous, global int *new, global const int *size) { +void kernel generation(global const bool *previous, global bool *new, global const int *size) { int ID, Nthreads, n, ratio, start, stop, x, y, left, right, up, down, neighbors, i; ID = get_global_id(0); - Nthreads = get_global_size(0); - n = size[0] * size[1]; - ratio = (n / Nthreads); // number of elements for each thread - start = ratio * ID; - stop = ratio * (ID + 1); - - for (i = start; i < stop; i++) - x = i % size[1]; - y = i / size[0]; + x = ID % size[0]; + y = ID / size[0]; - left = x - 1; - right = (x + 1) % size[0]; + left = (x - 1 + size[0]) % size[0]; + right = (x + 1) % size[0]; - up = (y - 1 + size[1]) % size[1]; - down = (y + 1) % size[1]; + up = (y - 1 + size[1]) % size[1]; + down = (y + 1) % size[1]; - // Get the number of neighbors - neighbors = - previous[size[0] * up + left] - + previous[size[0] * up + x] - + previous[size[0] * up + right] + // Get the number of neighbors + neighbors = + previous[size[0] * up + left] + + previous[size[0] * up + x] + + previous[size[0] * up + right] - + previous[size[0] * y + left] - + previous[size[0] * y + right] + + previous[size[0] * y + left] + + previous[size[0] * y + right] - + previous[size[0] * down + left] - + previous[size[0] * down + x] - + previous[size[0] * down + right]; + + previous[size[0] * down + left] + + previous[size[0] * down + x] + + previous[size[0] * down + right]; - // Update cell - new[size[0] * y + x] = (neighbors == 3) + previous[size[0] * y + x] * (neighbors == 2); + // Update cell + new[size[0] * y + x] = (neighbors == 3) + previous[size[0] * y + x] * (neighbors == 2); } \ No newline at end of file diff --git a/main.cpp b/main.cpp index b2e3d49..00c01cf 100644 --- a/main.cpp +++ b/main.cpp @@ -212,6 +212,8 @@ void main_opencl(std::string infile, std::string outfile, int num_generations) { std::ostringstream ss; ss << file.rdbuf(); kernel_code = ss.str(); + } else { + std::cout << "Error: Couldn't read Kernel source!" << std::endl; } sources.push_back({kernel_code.c_str(), kernel_code.length()}); @@ -236,6 +238,7 @@ void main_opencl(std::string infile, std::string outfile, int num_generations) { int size_y = std::stoi(y_str); bool *world = new bool[size_x * size_y]; + bool *result = new bool[size_x * size_y]; // Set the data for (int y = 0; y < size_y; y++) { @@ -266,19 +269,31 @@ void main_opencl(std::string infile, std::string outfile, int num_generations) { // push write commands to queue queue.enqueueWriteBuffer(buffer_previous, CL_TRUE, 0, sizeof(bool) * n, world); - queue.enqueueWriteBuffer(buffer_new, CL_TRUE, 0, sizeof(bool) * n, world); // TODO: pass empty array - queue.enqueueWriteBuffer(buffer_size, CL_TRUE, 0, sizeof(int) * 2, size); // TODO: pass empty array + queue.enqueueWriteBuffer(buffer_new, CL_TRUE, 0, sizeof(bool) * n, result); + queue.enqueueWriteBuffer(buffer_size, CL_TRUE, 0, sizeof(int) * 2, size); // RUN ZE KERNEL - cl::Kernel gol_kernel(program, "gol"); - gol_kernel.setArg(0, buffer_previous); - gol_kernel.setArg(1, buffer_new); - gol_kernel.setArg(2, buffer_size); - queue.enqueueNDRangeKernel(gol_kernel, cl::NullRange, cl::NDRange(10), cl::NullRange); + cl::Kernel gol_kernel(program, "generation"); + + for (int i = 0; i < num_generations; i++) { + gol_kernel.setArg(0, buffer_previous); + gol_kernel.setArg(1, buffer_new); + gol_kernel.setArg(2, buffer_size); + + queue.enqueueNDRangeKernel(gol_kernel, cl::NullRange, cl::NDRange(n), cl::NullRange); + queue.finish(); + + std::swap(buffer_previous, buffer_new); + } queue.finish(); - // read result from GPU to here - queue.enqueueReadBuffer(buffer_new, CL_TRUE, 0, sizeof(bool) * n, world); // TODO: pass different empty? + // Since we swap after every generation, we need to proceed differently depending on + // whether we're in swapped mode or not at the moment + if (num_generations % 2 == 0) { + queue.enqueueReadBuffer(buffer_previous, CL_TRUE, 0, sizeof(bool) * n, result); + } else { + queue.enqueueReadBuffer(buffer_new, CL_TRUE, 0, sizeof(bool) * n, result); + } // Write the result std::ofstream result_file; @@ -288,11 +303,10 @@ void main_opencl(std::string infile, std::string outfile, int num_generations) { for (int y = 0; y < size_y; y++) { std::string line; - getline(world_file, line); for (int x = 0; x < size_x; x++) { // Convert 1 and 0 to 'x' and '.' again - line += world[y * size_x + x] ? 'x' : '.'; + line += result[y * size_x + x] ? 'x' : '.'; } result_file << line << '\n';