diff --git a/main.cpp b/main.cpp index d0770c0..1225718 100644 --- a/main.cpp +++ b/main.cpp @@ -1,31 +1,27 @@ -#include #include #include +#include #include #ifdef __APPLE__ - #include +#include #else - #include +#include #endif #include "Timing.h" -#define LIVE_CELL 1 // 'x' in the input data -#define DEAD_CELL 0 // '.' in the input data +#define LIVE_CELL 1 // 'x' in the input data +#define DEAD_CELL 0 // '.' in the input data -enum Mode { - SEQ, - OMP, - OCL -}; +enum Mode { SEQ, OMP, OCL }; // Using this struct seems to be more performant than just passing // a bool** around functions. However, also adding the neighbor_count // made performance worse. struct World { World(int size_x, int size_y) : size_x(size_x), size_y(size_y) { - data = new bool*[size_y]; + data = new bool *[size_y]; for (int y = 0; y < size_y; y++) { data[y] = new bool[size_x]; @@ -36,7 +32,7 @@ struct World { for (int y = 0; y < size_y; y++) { delete data[y]; } - + delete data; } @@ -44,33 +40,21 @@ struct World { // All following functions are just convenience shorthands. // They are inlined so it doesn't make a difference in performance. - - inline bool get_value(int x, int y) { - return data[y][x]; - } - inline void set_alive(int x, int y) { - data[y][x] = LIVE_CELL; - } + inline bool get_value(int x, int y) { return data[y][x]; } - inline void set_dead(int x, int y) { - data[y][x] = DEAD_CELL; - } + inline void set_alive(int x, int y) { data[y][x] = LIVE_CELL; } - inline void set(int x, int y, bool val) { - data[y][x] = val; - } + inline void set_dead(int x, int y) { data[y][x] = DEAD_CELL; } - inline int get_num_neighbors(int left, int right, int up, int down, int x, int y) { - return - get_value(left, down) + - get_value(x, down) + - get_value(right, down) + - get_value(left, y) + - get_value(right, y) + - get_value(left, up) + - get_value(x, up) + - get_value(right, up); + inline void set(int x, int y, bool val) { data[y][x] = val; } + + inline int get_num_neighbors(int left, int right, int up, int down, int x, + int y) { + return get_value(left, down) + get_value(x, down) + + get_value(right, down) + get_value(left, y) + + get_value(right, y) + get_value(left, up) + get_value(x, up) + + get_value(right, up); } int size_x; @@ -84,13 +68,15 @@ void generation_omp(World &world, int *neighbor_counts) { // Set the neighbor count array according to the world. - // We handle x == 0 and x == size_x - 1 separately in order to avoid all the constant if checks. + // We handle x == 0 and x == size_x - 1 separately in order to avoid all the + // constant if checks. int loop_x = size_x - 1; - #pragma omp parallel for +#pragma omp parallel for for (int y = 0; y < size_y; y++) { // Wrap y - // This happens rarely enough that this if isn't a huge problem, and it would be tedious + // This happens rarely enough that this if isn't a huge problem, and it + // would be tedious // to handle both this and x manually. int up = y - 1; int down = y + 1; @@ -101,19 +87,22 @@ void generation_omp(World &world, int *neighbor_counts) { down -= size_y; // Handle x == 0 - neighbor_counts[y * size_x + 0] = world.get_num_neighbors(loop_x, 1, up, down, 0, y); - + neighbor_counts[y * size_x + 0] = + world.get_num_neighbors(loop_x, 1, up, down, 0, y); + // Handle 'normal' x for (int x = 1; x < loop_x; x++) { - neighbor_counts[y * size_x + x] = world.get_num_neighbors(x - 1, x + 1, up, down, x, y); + neighbor_counts[y * size_x + x] = + world.get_num_neighbors(x - 1, x + 1, up, down, x, y); } // Handle x == loop_x (== size_x - 1, we're just re-using the variable - neighbor_counts[y * size_x + loop_x] = world.get_num_neighbors(loop_x - 1, 0, up, down, loop_x, y); + neighbor_counts[y * size_x + loop_x] = + world.get_num_neighbors(loop_x - 1, 0, up, down, loop_x, y); } - // Update cells accordingly - #pragma omp parallel for +// Update cells accordingly +#pragma omp parallel for for (int y = 0; y < world.size_y; y++) { for (int x = 0; x < world.size_x; x++) { char this_cell = world.get_value(x, y); @@ -131,12 +120,14 @@ void generation_seq(World &world, int *neighbor_counts) { // Set the neighbor count array according to the world. - // We handle x == 0 and x == size_x - 1 separately in order to avoid all the constant if checks. + // We handle x == 0 and x == size_x - 1 separately in order to avoid all the + // constant if checks. int loop_x = size_x - 1; for (int y = 0; y < size_y; y++) { // Wrap y - // This happens rarely enough that this if isn't a huge problem, and it would be tedious + // This happens rarely enough that this if isn't a huge problem, and it + // would be tedious // to handle both this and x manually. int up = y - 1; int down = y + 1; @@ -147,15 +138,18 @@ void generation_seq(World &world, int *neighbor_counts) { down -= size_y; // Handle x == 0 - neighbor_counts[y * size_x + 0] = world.get_num_neighbors(loop_x, 1, up, down, 0, y); - + neighbor_counts[y * size_x + 0] = + world.get_num_neighbors(loop_x, 1, up, down, 0, y); + // Handle 'normal' x for (int x = 1; x < loop_x; x++) { - neighbor_counts[y * size_x + x] = world.get_num_neighbors(x - 1, x + 1, up, down, x, y); + neighbor_counts[y * size_x + x] = + world.get_num_neighbors(x - 1, x + 1, up, down, x, y); } // Handle x == loop_x (== size_x - 1, we're just re-using the variable - neighbor_counts[y * size_x + loop_x] = world.get_num_neighbors(loop_x - 1, 0, up, down, loop_x, y); + neighbor_counts[y * size_x + loop_x] = + world.get_num_neighbors(loop_x - 1, 0, up, down, loop_x, y); } // Update cells accordingly @@ -170,30 +164,34 @@ void generation_seq(World &world, int *neighbor_counts) { } void print_usage() { - std::cerr << "Usage: gol --mode seq|omp|ocl [--threads number] [--device cpu|gpu] --load infile.gol --save outfile.gol --generations number [--measure]" << std::endl; + std::cerr << "Usage: gol --mode seq|omp|ocl [--threads number] [--device " + "cpu|gpu] --load infile.gol --save outfile.gol --generations " + "number [--measure]" + << std::endl; } -void main_opencl(std::string infile, std::string outfile, int num_generations, bool measure, bool use_gpu) { +void main_opencl(std::string infile, std::string outfile, int num_generations, + bool measure, bool use_gpu) { Timing *timing = Timing::getInstance(); // Get Nvidia CUDA platform std::vector all_platforms; cl::Platform::get(&all_platforms); - if (all_platforms.size()==0) { - std::cout<<" No platforms found. Check OpenCL installation!\n"; + if (all_platforms.size() == 0) { + std::cout << " No platforms found. Check OpenCL installation!\n"; exit(1); } - cl::Platform default_platform=all_platforms[0]; + cl::Platform default_platform = all_platforms[0]; // Use the first device (in my case, GPU is on this platform) std::vector all_devices; default_platform.getDevices(CL_DEVICE_TYPE_ALL, &all_devices); - if(all_devices.size()==0){ - std::cout<<" No devices found. Check OpenCL installation!\n"; + if (all_devices.size() == 0) { + std::cout << " No devices found. Check OpenCL installation!\n"; exit(1); } - cl::Device default_device=all_devices[0]; + cl::Device default_device = all_devices[0]; // The context links device and platform cl::Context context({default_device}); @@ -201,9 +199,9 @@ void main_opencl(std::string infile, std::string outfile, int num_generations, b // Load kernel code from file into Sources cl::Program::Sources sources; - std::ifstream file("gol.cl"); //taking file as inputstream + std::ifstream file("gol.cl"); // taking file as inputstream std::string kernel_code; - + if (file) { std::ostringstream ss; ss << file.rdbuf(); @@ -216,7 +214,9 @@ void main_opencl(std::string infile, std::string outfile, int num_generations, b // Create a program with the previously defined context and (kernel) sources cl::Program program(context, sources); if (program.build({default_device}) != CL_SUCCESS) { - std::cout << "Error building: " << program.getBuildInfo(default_device) << std::endl; + std::cout << "Error building: " + << program.getBuildInfo(default_device) + << std::endl; exit(1); } @@ -251,7 +251,7 @@ void main_opencl(std::string infile, std::string outfile, int num_generations, b world[y * size_x + x] = 1 ? line[x] == 'x' : 0; } } - + world_file.close(); // Put the size into an array so it can be passed to the kernel @@ -267,11 +267,13 @@ void main_opencl(std::string infile, std::string outfile, int num_generations, b cl::CommandQueue queue(context, default_device); // Push write commands to queue - queue.enqueueWriteBuffer(buffer_previous, CL_TRUE, 0, sizeof(bool) * n, world); + queue.enqueueWriteBuffer(buffer_previous, CL_TRUE, 0, sizeof(bool) * n, + world); queue.enqueueWriteBuffer(buffer_new, CL_TRUE, 0, sizeof(bool) * n, result); queue.enqueueWriteBuffer(buffer_size, CL_TRUE, 0, sizeof(int) * 2, size); - // Create the kernel, which uses the `generation` method in our program (which was created from the kernel code) + // Create the kernel, which uses the `generation` method in our program + // (which was created from the kernel code) cl::Kernel gol_kernel(program, "generation"); timing->stopSetup(); @@ -285,11 +287,14 @@ void main_opencl(std::string infile, std::string outfile, int num_generations, b gol_kernel.setArg(2, buffer_size); // Run it - queue.enqueueNDRangeKernel(gol_kernel, cl::NullRange, cl::NDRange(n), cl::NullRange); + queue.enqueueNDRangeKernel(gol_kernel, cl::NullRange, cl::NDRange(n), + cl::NullRange); queue.finish(); - // Swap the previous buffer with the new buffer, as we will want to use our result from this loop - // as the input of the next loop (overwriting the previous result, which is not needed anymore) + // Swap the previous buffer with the new buffer, as we will want to use + // our result from this loop + // as the input of the next loop (overwriting the previous result, + // which is not needed anymore) std::swap(buffer_previous, buffer_new); } queue.finish(); @@ -297,12 +302,15 @@ void main_opencl(std::string infile, std::string outfile, int num_generations, b timing->stopComputation(); timing->startFinalization(); - // Since we swap after every generation, we need to proceed differently depending on + // Since we swap after every generation, we need to proceed differently + // depending on // whether we're in swapped mode or not at the moment if (num_generations % 2 == 0) { - queue.enqueueReadBuffer(buffer_previous, CL_TRUE, 0, sizeof(bool) * n, result); + queue.enqueueReadBuffer(buffer_previous, CL_TRUE, 0, sizeof(bool) * n, + result); } else { - queue.enqueueReadBuffer(buffer_new, CL_TRUE, 0, sizeof(bool) * n, result); + queue.enqueueReadBuffer(buffer_new, CL_TRUE, 0, sizeof(bool) * n, + result); } // Write the result @@ -310,7 +318,7 @@ void main_opencl(std::string infile, std::string outfile, int num_generations, b result_file.open(outfile); result_file << size_x << "," << size_y << '\n'; - + for (int y = 0; y < size_y; y++) { std::string line; @@ -330,7 +338,8 @@ void main_opencl(std::string infile, std::string outfile, int num_generations, b timing->stopFinalization(); } -void main_classic(std::string infile, std::string outfile, int num_generations, bool measure, Mode mode) { +void main_classic(std::string infile, std::string outfile, int num_generations, + bool measure, Mode mode) { Timing *timing = Timing::getInstance(); // Read in the start state @@ -359,7 +368,7 @@ void main_classic(std::string infile, std::string outfile, int num_generations, world.set(x, y, 1 ? line[x] == 'x' : 0); } } - + world_file.close(); // In this separate array, we keep track of how many live neighbors @@ -390,7 +399,7 @@ void main_classic(std::string infile, std::string outfile, int num_generations, result_file.open(outfile); result_file << size_x << "," << size_y << '\n'; - + for (int y = 0; y < size_y; y++) { std::string line; getline(world_file, line); @@ -409,7 +418,7 @@ void main_classic(std::string infile, std::string outfile, int num_generations, timing->stopFinalization(); } -int main(int argc, char* argv[]) { +int main(int argc, char *argv[]) { Timing *timing = Timing::getInstance(); // Setup. @@ -432,25 +441,25 @@ int main(int argc, char* argv[]) { for (int i = 1; i < argc; i++) { if (std::string(argv[i]) == "--load") { if (i + 1 < argc) { - infile = argv[i+1]; + infile = argv[i + 1]; } else { print_usage(); return 1; - } + } } else if (std::string(argv[i]) == "--save") { if (i + 1 < argc) { - outfile = argv[i+1]; + outfile = argv[i + 1]; } else { print_usage(); return 1; - } + } } else if (std::string(argv[i]) == "--mode") { if (i + 1 < argc) { - if (std::string(argv[i+1]) == "seq") { + if (std::string(argv[i + 1]) == "seq") { mode = Mode::SEQ; - } else if (std::string(argv[i+1]) == "omp") { + } else if (std::string(argv[i + 1]) == "omp") { mode = Mode::OMP; - } else if (std::string(argv[i+1]) == "ocl") { + } else if (std::string(argv[i + 1]) == "ocl") { mode = Mode::OCL; } else { print_usage(); @@ -462,17 +471,18 @@ int main(int argc, char* argv[]) { } } else if (std::string(argv[i]) == "--threads") { if (i + 1 < argc) { - omp_set_num_threads(std::stoi(argv[i+1])); + omp_set_num_threads(std::stoi(argv[i + 1])); } else { print_usage(); return 1; } - // TODO: This parameter isn't really needed anymore as we only use the GPU now + // TODO: This parameter isn't really needed anymore as we only use + // the GPU now } else if (std::string(argv[i]) == "--device") { if (i + 1 < argc) { - if (std::string(argv[i+1]) == "cpu") { + if (std::string(argv[i + 1]) == "cpu") { use_gpu = false; - } else if (std::string(argv[i+1]) == "gpu") { + } else if (std::string(argv[i + 1]) == "gpu") { use_gpu = true; } else { print_usage(); @@ -484,11 +494,11 @@ int main(int argc, char* argv[]) { } } else if (std::string(argv[i]) == "--generations") { if (i + 1 < argc) { - num_generations = std::stoi(argv[i+1]); + num_generations = std::stoi(argv[i + 1]); } else { print_usage(); return 1; - } + } } else if (std::string(argv[i]) == "--measure") { measure = true; } @@ -501,9 +511,7 @@ int main(int argc, char* argv[]) { main_classic(infile, outfile, num_generations, measure, mode); } - if (measure) { - std::cout << timing->getResults() << std::endl; - } + if (measure) { std::cout << timing->getResults() << std::endl; } return 0; }