From 7bcc724d8ec16640854ece50dc690bd0b91f2638 Mon Sep 17 00:00:00 2001 From: karl Date: Wed, 16 Dec 2020 19:17:07 +0100 Subject: [PATCH] Improve code readability updated comments and separate function also for OMP/seq --- main.cpp | 249 +++++++++++++++++++++++++++++-------------------------- 1 file changed, 130 insertions(+), 119 deletions(-) diff --git a/main.cpp b/main.cpp index 00c01cf..286694c 100644 --- a/main.cpp +++ b/main.cpp @@ -173,8 +173,10 @@ void print_usage() { std::cerr << "Usage: gol --mode seq|omp|ocl [--threads number] [--device cpu|gpu] --load infile.gol --save outfile.gol --generations number [--measure]" << std::endl; } -void main_opencl(std::string infile, std::string outfile, int num_generations) { - // get all platforms (drivers), e.g. NVIDIA +void main_opencl(std::string infile, std::string outfile, int num_generations, bool measure) { + Timing *timing = Timing::getInstance(); + + // Get Nvidia CUDA platform std::vector all_platforms; cl::Platform::get(&all_platforms); @@ -183,28 +185,22 @@ void main_opencl(std::string infile, std::string outfile, int num_generations) { exit(1); } cl::Platform default_platform=all_platforms[0]; - std::cout << "Using platform: "<()<<"\n"; - // get default device (CPUs, GPUs) of the default platform + // Use the first device (in my case, GPU is on this platform) std::vector all_devices; default_platform.getDevices(CL_DEVICE_TYPE_ALL, &all_devices); if(all_devices.size()==0){ std::cout<<" No devices found. Check OpenCL installation!\n"; exit(1); } - - // use device[1] because that's a GPU; device[0] is the CPU cl::Device default_device=all_devices[0]; - std::cout<< "Using device: "<()<<"\n"; - // a context is like a "runtime link" to the device and platform; - // i.e. communication is possible + // The context links device and platform cl::Context context({default_device}); - // create the program that we want to execute on the device + // Load kernel code from file into Sources cl::Program::Sources sources; - // load kernel from file std::ifstream file("gol.cl"); //taking file as inputstream std::string kernel_code; @@ -217,6 +213,7 @@ void main_opencl(std::string infile, std::string outfile, int num_generations) { } sources.push_back({kernel_code.c_str(), kernel_code.length()}); + // Create a program with the previously defined context and (kernel) sources cl::Program program(context, sources); if (program.build({default_device}) != CL_SUCCESS) { std::cout << "Error building: " << program.getBuildInfo(default_device) << std::endl; @@ -237,6 +234,8 @@ void main_opencl(std::string infile, std::string outfile, int num_generations) { int size_x = std::stoi(x_str); int size_y = std::stoi(y_str); + // Two arrays because one will always hold the previous status + // For now, we only put data into `world` bool *world = new bool[size_x * size_y]; bool *result = new bool[size_x * size_y]; @@ -255,38 +254,49 @@ void main_opencl(std::string infile, std::string outfile, int num_generations) { world_file.close(); - // Put size into array + // Put the size into an array so it can be passed to the kernel int size[2] = {size_x, size_y}; int n = size_x * size_y; - // create buffers on device (allocate space on GPU) + // Allocate space on the GPU cl::Buffer buffer_previous(context, CL_MEM_READ_WRITE, sizeof(bool) * n); cl::Buffer buffer_new(context, CL_MEM_READ_WRITE, sizeof(bool) * n); cl::Buffer buffer_size(context, CL_MEM_READ_WRITE, sizeof(int) * 2); - // create a queue (a queue of commands that the GPU will execute) + // Create queue of commands that the GPU will execute cl::CommandQueue queue(context, default_device); - // push write commands to queue + // Push write commands to queue queue.enqueueWriteBuffer(buffer_previous, CL_TRUE, 0, sizeof(bool) * n, world); queue.enqueueWriteBuffer(buffer_new, CL_TRUE, 0, sizeof(bool) * n, result); queue.enqueueWriteBuffer(buffer_size, CL_TRUE, 0, sizeof(int) * 2, size); - // RUN ZE KERNEL + // Create the kernel, which uses the `generation` method in our program (which was created from the kernel code) cl::Kernel gol_kernel(program, "generation"); + timing->stopSetup(); + timing->startComputation(); + + // Actually do the generations for (int i = 0; i < num_generations; i++) { + // Update the arguments in the kernel gol_kernel.setArg(0, buffer_previous); gol_kernel.setArg(1, buffer_new); gol_kernel.setArg(2, buffer_size); + // Run it queue.enqueueNDRangeKernel(gol_kernel, cl::NullRange, cl::NDRange(n), cl::NullRange); queue.finish(); + // Swap the previous buffer with the new buffer, as we will want to use our result from this loop + // as the input of the next loop (overwriting the previous result, which is not needed anymore) std::swap(buffer_previous, buffer_new); } queue.finish(); + timing->stopComputation(); + timing->startFinalization(); + // Since we swap after every generation, we need to proceed differently depending on // whether we're in swapped mode or not at the moment if (num_generations % 2 == 0) { @@ -313,113 +323,17 @@ void main_opencl(std::string infile, std::string outfile, int num_generations) { } result_file.close(); + + timing->stopFinalization(); + + if (measure) { + std::cout << timing->getResults() << std::endl; + } } -int main(int argc, char* argv[]) { +void main_classic(std::string infile, std::string outfile, int num_generations, bool measure, Mode mode) { Timing *timing = Timing::getInstance(); - // Setup. - timing->startSetup(); - - // Parse command line arguments - std::string infile; - std::string outfile; - Mode mode = Mode::SEQ; - bool use_gpu = false; - int num_generations = 0; - int num_threads = 1; - bool measure = false; - - if (argc < 8) { - print_usage(); - return 1; - } - - // Parse arguments - for (int i = 1; i < argc; i++) { - if (std::string(argv[i]) == "--load") { - if (i + 1 < argc) { - infile = argv[i+1]; - } else { - print_usage(); - return 1; - } - } else if (std::string(argv[i]) == "--save") { - if (i + 1 < argc) { - outfile = argv[i+1]; - } else { - print_usage(); - return 1; - } - } else if (std::string(argv[i]) == "--mode") { - if (i + 1 < argc) { - if (std::string(argv[i+1]) == "seq") { - mode = Mode::SEQ; - } else if (std::string(argv[i+1]) == "omp") { - mode = Mode::OMP; - } else if (std::string(argv[i+1]) == "ocl") { - mode = Mode::OCL; - } else { - print_usage(); - return 1; - } - } else { - print_usage(); - return 1; - } - } else if (std::string(argv[i]) == "--threads") { - if (i + 1 < argc) { - num_threads = std::stoi(argv[i+1]); - } else { - print_usage(); - return 1; - } - } else if (std::string(argv[i]) == "--device") { - if (i + 1 < argc) { - if (std::string(argv[i+1]) == "cpu") { - use_gpu = false; - } else if (std::string(argv[i+1]) == "gpu") { - use_gpu = true; - } else { - print_usage(); - return 1; - } - } else { - print_usage(); - return 1; - } - } else if (std::string(argv[i]) == "--generations") { - if (i + 1 < argc) { - num_generations = std::stoi(argv[i+1]); - } else { - print_usage(); - return 1; - } - } else if (std::string(argv[i]) == "--measure") { - measure = true; - } - } - - // TODO: Just for testing - if (use_gpu) { - std::cout << "Using GPU" << std::endl; - } else { - std::cout << "Using CPU" << std::endl; - } - - if (mode == Mode::SEQ) { - std::cout << "Running classic sequential version" << std::endl; - } else if (mode == Mode::OMP) { - std::cout << "Running OpenMP version with " << num_threads << " threads" << std::endl; - } else if (mode == Mode::OCL) { - std::cout << "Running OpenCL version" << std::endl; - } - - if (mode == Mode::OCL) { - main_opencl(infile, outfile, num_generations); - return 0; - } - // Read in the start state std::ifstream world_file; world_file.open(infile); @@ -498,6 +412,103 @@ int main(int argc, char* argv[]) { if (measure) { std::cout << timing->getResults() << std::endl; } +} + +int main(int argc, char* argv[]) { + Timing *timing = Timing::getInstance(); + + // Setup. + timing->startSetup(); + + // Parse command line arguments + std::string infile; + std::string outfile; + Mode mode = Mode::SEQ; + bool use_gpu = false; + int num_generations = 0; + int num_threads = 1; + bool measure = false; + + if (argc < 8) { + print_usage(); + return 1; + } + + // Parse arguments + for (int i = 1; i < argc; i++) { + if (std::string(argv[i]) == "--load") { + if (i + 1 < argc) { + infile = argv[i+1]; + } else { + print_usage(); + return 1; + } + } else if (std::string(argv[i]) == "--save") { + if (i + 1 < argc) { + outfile = argv[i+1]; + } else { + print_usage(); + return 1; + } + } else if (std::string(argv[i]) == "--mode") { + if (i + 1 < argc) { + if (std::string(argv[i+1]) == "seq") { + mode = Mode::SEQ; + } else if (std::string(argv[i+1]) == "omp") { + mode = Mode::OMP; + } else if (std::string(argv[i+1]) == "ocl") { + mode = Mode::OCL; + } else { + print_usage(); + return 1; + } + } else { + print_usage(); + return 1; + } + } else if (std::string(argv[i]) == "--threads") { + if (i + 1 < argc) { + num_threads = std::stoi(argv[i+1]); + } else { + print_usage(); + return 1; + } + // TODO: This parameter isn't really needed anymore as we only use the GPU now + } else if (std::string(argv[i]) == "--device") { + if (i + 1 < argc) { + if (std::string(argv[i+1]) == "cpu") { + use_gpu = false; + } else if (std::string(argv[i+1]) == "gpu") { + use_gpu = true; + } else { + print_usage(); + return 1; + } + } else { + print_usage(); + return 1; + } + } else if (std::string(argv[i]) == "--generations") { + if (i + 1 < argc) { + num_generations = std::stoi(argv[i+1]); + } else { + print_usage(); + return 1; + } + } else if (std::string(argv[i]) == "--measure") { + measure = true; + } + } + + // If OpenCL was demanded, run that function. + if (mode == Mode::OCL) { + main_opencl(infile, outfile, num_generations, measure); + return 0; + } else { + main_classic(infile, outfile, num_generations, measure, mode); + } + + return 0; }