Apply clang-format

This commit is contained in:
karl 2020-12-16 20:00:12 +01:00
parent db6c5215df
commit 4fdb56faa6

190
main.cpp
View File

@ -1,31 +1,27 @@
#include <omp.h>
#include <fstream> #include <fstream>
#include <iostream> #include <iostream>
#include <omp.h>
#include <sstream> #include <sstream>
#ifdef __APPLE__ #ifdef __APPLE__
#include <OpenCL/cl.hpp> #include <OpenCL/cl.hpp>
#else #else
#include <CL/cl.hpp> #include <CL/cl.hpp>
#endif #endif
#include "Timing.h" #include "Timing.h"
#define LIVE_CELL 1 // 'x' in the input data #define LIVE_CELL 1 // 'x' in the input data
#define DEAD_CELL 0 // '.' in the input data #define DEAD_CELL 0 // '.' in the input data
enum Mode { enum Mode { SEQ, OMP, OCL };
SEQ,
OMP,
OCL
};
// Using this struct seems to be more performant than just passing // Using this struct seems to be more performant than just passing
// a bool** around functions. However, also adding the neighbor_count // a bool** around functions. However, also adding the neighbor_count
// made performance worse. // made performance worse.
struct World { struct World {
World(int size_x, int size_y) : size_x(size_x), size_y(size_y) { World(int size_x, int size_y) : size_x(size_x), size_y(size_y) {
data = new bool*[size_y]; data = new bool *[size_y];
for (int y = 0; y < size_y; y++) { for (int y = 0; y < size_y; y++) {
data[y] = new bool[size_x]; data[y] = new bool[size_x];
@ -36,7 +32,7 @@ struct World {
for (int y = 0; y < size_y; y++) { for (int y = 0; y < size_y; y++) {
delete data[y]; delete data[y];
} }
delete data; delete data;
} }
@ -44,33 +40,21 @@ struct World {
// All following functions are just convenience shorthands. // All following functions are just convenience shorthands.
// They are inlined so it doesn't make a difference in performance. // They are inlined so it doesn't make a difference in performance.
inline bool get_value(int x, int y) {
return data[y][x];
}
inline void set_alive(int x, int y) { inline bool get_value(int x, int y) { return data[y][x]; }
data[y][x] = LIVE_CELL;
}
inline void set_dead(int x, int y) { inline void set_alive(int x, int y) { data[y][x] = LIVE_CELL; }
data[y][x] = DEAD_CELL;
}
inline void set(int x, int y, bool val) { inline void set_dead(int x, int y) { data[y][x] = DEAD_CELL; }
data[y][x] = val;
}
inline int get_num_neighbors(int left, int right, int up, int down, int x, int y) { inline void set(int x, int y, bool val) { data[y][x] = val; }
return
get_value(left, down) + inline int get_num_neighbors(int left, int right, int up, int down, int x,
get_value(x, down) + int y) {
get_value(right, down) + return get_value(left, down) + get_value(x, down) +
get_value(left, y) + get_value(right, down) + get_value(left, y) +
get_value(right, y) + get_value(right, y) + get_value(left, up) + get_value(x, up) +
get_value(left, up) + get_value(right, up);
get_value(x, up) +
get_value(right, up);
} }
int size_x; int size_x;
@ -84,13 +68,15 @@ void generation_omp(World &world, int *neighbor_counts) {
// Set the neighbor count array according to the world. // Set the neighbor count array according to the world.
// We handle x == 0 and x == size_x - 1 separately in order to avoid all the constant if checks. // We handle x == 0 and x == size_x - 1 separately in order to avoid all the
// constant if checks.
int loop_x = size_x - 1; int loop_x = size_x - 1;
#pragma omp parallel for #pragma omp parallel for
for (int y = 0; y < size_y; y++) { for (int y = 0; y < size_y; y++) {
// Wrap y // Wrap y
// This happens rarely enough that this if isn't a huge problem, and it would be tedious // This happens rarely enough that this if isn't a huge problem, and it
// would be tedious
// to handle both this and x manually. // to handle both this and x manually.
int up = y - 1; int up = y - 1;
int down = y + 1; int down = y + 1;
@ -101,19 +87,22 @@ void generation_omp(World &world, int *neighbor_counts) {
down -= size_y; down -= size_y;
// Handle x == 0 // Handle x == 0
neighbor_counts[y * size_x + 0] = world.get_num_neighbors(loop_x, 1, up, down, 0, y); neighbor_counts[y * size_x + 0] =
world.get_num_neighbors(loop_x, 1, up, down, 0, y);
// Handle 'normal' x // Handle 'normal' x
for (int x = 1; x < loop_x; x++) { for (int x = 1; x < loop_x; x++) {
neighbor_counts[y * size_x + x] = world.get_num_neighbors(x - 1, x + 1, up, down, x, y); neighbor_counts[y * size_x + x] =
world.get_num_neighbors(x - 1, x + 1, up, down, x, y);
} }
// Handle x == loop_x (== size_x - 1, we're just re-using the variable // Handle x == loop_x (== size_x - 1, we're just re-using the variable
neighbor_counts[y * size_x + loop_x] = world.get_num_neighbors(loop_x - 1, 0, up, down, loop_x, y); neighbor_counts[y * size_x + loop_x] =
world.get_num_neighbors(loop_x - 1, 0, up, down, loop_x, y);
} }
// Update cells accordingly // Update cells accordingly
#pragma omp parallel for #pragma omp parallel for
for (int y = 0; y < world.size_y; y++) { for (int y = 0; y < world.size_y; y++) {
for (int x = 0; x < world.size_x; x++) { for (int x = 0; x < world.size_x; x++) {
char this_cell = world.get_value(x, y); char this_cell = world.get_value(x, y);
@ -131,12 +120,14 @@ void generation_seq(World &world, int *neighbor_counts) {
// Set the neighbor count array according to the world. // Set the neighbor count array according to the world.
// We handle x == 0 and x == size_x - 1 separately in order to avoid all the constant if checks. // We handle x == 0 and x == size_x - 1 separately in order to avoid all the
// constant if checks.
int loop_x = size_x - 1; int loop_x = size_x - 1;
for (int y = 0; y < size_y; y++) { for (int y = 0; y < size_y; y++) {
// Wrap y // Wrap y
// This happens rarely enough that this if isn't a huge problem, and it would be tedious // This happens rarely enough that this if isn't a huge problem, and it
// would be tedious
// to handle both this and x manually. // to handle both this and x manually.
int up = y - 1; int up = y - 1;
int down = y + 1; int down = y + 1;
@ -147,15 +138,18 @@ void generation_seq(World &world, int *neighbor_counts) {
down -= size_y; down -= size_y;
// Handle x == 0 // Handle x == 0
neighbor_counts[y * size_x + 0] = world.get_num_neighbors(loop_x, 1, up, down, 0, y); neighbor_counts[y * size_x + 0] =
world.get_num_neighbors(loop_x, 1, up, down, 0, y);
// Handle 'normal' x // Handle 'normal' x
for (int x = 1; x < loop_x; x++) { for (int x = 1; x < loop_x; x++) {
neighbor_counts[y * size_x + x] = world.get_num_neighbors(x - 1, x + 1, up, down, x, y); neighbor_counts[y * size_x + x] =
world.get_num_neighbors(x - 1, x + 1, up, down, x, y);
} }
// Handle x == loop_x (== size_x - 1, we're just re-using the variable // Handle x == loop_x (== size_x - 1, we're just re-using the variable
neighbor_counts[y * size_x + loop_x] = world.get_num_neighbors(loop_x - 1, 0, up, down, loop_x, y); neighbor_counts[y * size_x + loop_x] =
world.get_num_neighbors(loop_x - 1, 0, up, down, loop_x, y);
} }
// Update cells accordingly // Update cells accordingly
@ -170,30 +164,34 @@ void generation_seq(World &world, int *neighbor_counts) {
} }
void print_usage() { void print_usage() {
std::cerr << "Usage: gol --mode seq|omp|ocl [--threads number] [--device cpu|gpu] --load infile.gol --save outfile.gol --generations number [--measure]" << std::endl; std::cerr << "Usage: gol --mode seq|omp|ocl [--threads number] [--device "
"cpu|gpu] --load infile.gol --save outfile.gol --generations "
"number [--measure]"
<< std::endl;
} }
void main_opencl(std::string infile, std::string outfile, int num_generations, bool measure, bool use_gpu) { void main_opencl(std::string infile, std::string outfile, int num_generations,
bool measure, bool use_gpu) {
Timing *timing = Timing::getInstance(); Timing *timing = Timing::getInstance();
// Get Nvidia CUDA platform // Get Nvidia CUDA platform
std::vector<cl::Platform> all_platforms; std::vector<cl::Platform> all_platforms;
cl::Platform::get(&all_platforms); cl::Platform::get(&all_platforms);
if (all_platforms.size()==0) { if (all_platforms.size() == 0) {
std::cout<<" No platforms found. Check OpenCL installation!\n"; std::cout << " No platforms found. Check OpenCL installation!\n";
exit(1); exit(1);
} }
cl::Platform default_platform=all_platforms[0]; cl::Platform default_platform = all_platforms[0];
// Use the first device (in my case, GPU is on this platform) // Use the first device (in my case, GPU is on this platform)
std::vector<cl::Device> all_devices; std::vector<cl::Device> all_devices;
default_platform.getDevices(CL_DEVICE_TYPE_ALL, &all_devices); default_platform.getDevices(CL_DEVICE_TYPE_ALL, &all_devices);
if(all_devices.size()==0){ if (all_devices.size() == 0) {
std::cout<<" No devices found. Check OpenCL installation!\n"; std::cout << " No devices found. Check OpenCL installation!\n";
exit(1); exit(1);
} }
cl::Device default_device=all_devices[0]; cl::Device default_device = all_devices[0];
// The context links device and platform // The context links device and platform
cl::Context context({default_device}); cl::Context context({default_device});
@ -201,9 +199,9 @@ void main_opencl(std::string infile, std::string outfile, int num_generations, b
// Load kernel code from file into Sources // Load kernel code from file into Sources
cl::Program::Sources sources; cl::Program::Sources sources;
std::ifstream file("gol.cl"); //taking file as inputstream std::ifstream file("gol.cl"); // taking file as inputstream
std::string kernel_code; std::string kernel_code;
if (file) { if (file) {
std::ostringstream ss; std::ostringstream ss;
ss << file.rdbuf(); ss << file.rdbuf();
@ -216,7 +214,9 @@ void main_opencl(std::string infile, std::string outfile, int num_generations, b
// Create a program with the previously defined context and (kernel) sources // Create a program with the previously defined context and (kernel) sources
cl::Program program(context, sources); cl::Program program(context, sources);
if (program.build({default_device}) != CL_SUCCESS) { if (program.build({default_device}) != CL_SUCCESS) {
std::cout << "Error building: " << program.getBuildInfo<CL_PROGRAM_BUILD_LOG>(default_device) << std::endl; std::cout << "Error building: "
<< program.getBuildInfo<CL_PROGRAM_BUILD_LOG>(default_device)
<< std::endl;
exit(1); exit(1);
} }
@ -251,7 +251,7 @@ void main_opencl(std::string infile, std::string outfile, int num_generations, b
world[y * size_x + x] = 1 ? line[x] == 'x' : 0; world[y * size_x + x] = 1 ? line[x] == 'x' : 0;
} }
} }
world_file.close(); world_file.close();
// Put the size into an array so it can be passed to the kernel // Put the size into an array so it can be passed to the kernel
@ -267,11 +267,13 @@ void main_opencl(std::string infile, std::string outfile, int num_generations, b
cl::CommandQueue queue(context, default_device); cl::CommandQueue queue(context, default_device);
// Push write commands to queue // Push write commands to queue
queue.enqueueWriteBuffer(buffer_previous, CL_TRUE, 0, sizeof(bool) * n, world); queue.enqueueWriteBuffer(buffer_previous, CL_TRUE, 0, sizeof(bool) * n,
world);
queue.enqueueWriteBuffer(buffer_new, CL_TRUE, 0, sizeof(bool) * n, result); queue.enqueueWriteBuffer(buffer_new, CL_TRUE, 0, sizeof(bool) * n, result);
queue.enqueueWriteBuffer(buffer_size, CL_TRUE, 0, sizeof(int) * 2, size); queue.enqueueWriteBuffer(buffer_size, CL_TRUE, 0, sizeof(int) * 2, size);
// Create the kernel, which uses the `generation` method in our program (which was created from the kernel code) // Create the kernel, which uses the `generation` method in our program
// (which was created from the kernel code)
cl::Kernel gol_kernel(program, "generation"); cl::Kernel gol_kernel(program, "generation");
timing->stopSetup(); timing->stopSetup();
@ -285,11 +287,14 @@ void main_opencl(std::string infile, std::string outfile, int num_generations, b
gol_kernel.setArg(2, buffer_size); gol_kernel.setArg(2, buffer_size);
// Run it // Run it
queue.enqueueNDRangeKernel(gol_kernel, cl::NullRange, cl::NDRange(n), cl::NullRange); queue.enqueueNDRangeKernel(gol_kernel, cl::NullRange, cl::NDRange(n),
cl::NullRange);
queue.finish(); queue.finish();
// Swap the previous buffer with the new buffer, as we will want to use our result from this loop // Swap the previous buffer with the new buffer, as we will want to use
// as the input of the next loop (overwriting the previous result, which is not needed anymore) // our result from this loop
// as the input of the next loop (overwriting the previous result,
// which is not needed anymore)
std::swap(buffer_previous, buffer_new); std::swap(buffer_previous, buffer_new);
} }
queue.finish(); queue.finish();
@ -297,12 +302,15 @@ void main_opencl(std::string infile, std::string outfile, int num_generations, b
timing->stopComputation(); timing->stopComputation();
timing->startFinalization(); timing->startFinalization();
// Since we swap after every generation, we need to proceed differently depending on // Since we swap after every generation, we need to proceed differently
// depending on
// whether we're in swapped mode or not at the moment // whether we're in swapped mode or not at the moment
if (num_generations % 2 == 0) { if (num_generations % 2 == 0) {
queue.enqueueReadBuffer(buffer_previous, CL_TRUE, 0, sizeof(bool) * n, result); queue.enqueueReadBuffer(buffer_previous, CL_TRUE, 0, sizeof(bool) * n,
result);
} else { } else {
queue.enqueueReadBuffer(buffer_new, CL_TRUE, 0, sizeof(bool) * n, result); queue.enqueueReadBuffer(buffer_new, CL_TRUE, 0, sizeof(bool) * n,
result);
} }
// Write the result // Write the result
@ -310,7 +318,7 @@ void main_opencl(std::string infile, std::string outfile, int num_generations, b
result_file.open(outfile); result_file.open(outfile);
result_file << size_x << "," << size_y << '\n'; result_file << size_x << "," << size_y << '\n';
for (int y = 0; y < size_y; y++) { for (int y = 0; y < size_y; y++) {
std::string line; std::string line;
@ -330,7 +338,8 @@ void main_opencl(std::string infile, std::string outfile, int num_generations, b
timing->stopFinalization(); timing->stopFinalization();
} }
void main_classic(std::string infile, std::string outfile, int num_generations, bool measure, Mode mode) { void main_classic(std::string infile, std::string outfile, int num_generations,
bool measure, Mode mode) {
Timing *timing = Timing::getInstance(); Timing *timing = Timing::getInstance();
// Read in the start state // Read in the start state
@ -359,7 +368,7 @@ void main_classic(std::string infile, std::string outfile, int num_generations,
world.set(x, y, 1 ? line[x] == 'x' : 0); world.set(x, y, 1 ? line[x] == 'x' : 0);
} }
} }
world_file.close(); world_file.close();
// In this separate array, we keep track of how many live neighbors // In this separate array, we keep track of how many live neighbors
@ -390,7 +399,7 @@ void main_classic(std::string infile, std::string outfile, int num_generations,
result_file.open(outfile); result_file.open(outfile);
result_file << size_x << "," << size_y << '\n'; result_file << size_x << "," << size_y << '\n';
for (int y = 0; y < size_y; y++) { for (int y = 0; y < size_y; y++) {
std::string line; std::string line;
getline(world_file, line); getline(world_file, line);
@ -409,7 +418,7 @@ void main_classic(std::string infile, std::string outfile, int num_generations,
timing->stopFinalization(); timing->stopFinalization();
} }
int main(int argc, char* argv[]) { int main(int argc, char *argv[]) {
Timing *timing = Timing::getInstance(); Timing *timing = Timing::getInstance();
// Setup. // Setup.
@ -432,25 +441,25 @@ int main(int argc, char* argv[]) {
for (int i = 1; i < argc; i++) { for (int i = 1; i < argc; i++) {
if (std::string(argv[i]) == "--load") { if (std::string(argv[i]) == "--load") {
if (i + 1 < argc) { if (i + 1 < argc) {
infile = argv[i+1]; infile = argv[i + 1];
} else { } else {
print_usage(); print_usage();
return 1; return 1;
} }
} else if (std::string(argv[i]) == "--save") { } else if (std::string(argv[i]) == "--save") {
if (i + 1 < argc) { if (i + 1 < argc) {
outfile = argv[i+1]; outfile = argv[i + 1];
} else { } else {
print_usage(); print_usage();
return 1; return 1;
} }
} else if (std::string(argv[i]) == "--mode") { } else if (std::string(argv[i]) == "--mode") {
if (i + 1 < argc) { if (i + 1 < argc) {
if (std::string(argv[i+1]) == "seq") { if (std::string(argv[i + 1]) == "seq") {
mode = Mode::SEQ; mode = Mode::SEQ;
} else if (std::string(argv[i+1]) == "omp") { } else if (std::string(argv[i + 1]) == "omp") {
mode = Mode::OMP; mode = Mode::OMP;
} else if (std::string(argv[i+1]) == "ocl") { } else if (std::string(argv[i + 1]) == "ocl") {
mode = Mode::OCL; mode = Mode::OCL;
} else { } else {
print_usage(); print_usage();
@ -462,17 +471,18 @@ int main(int argc, char* argv[]) {
} }
} else if (std::string(argv[i]) == "--threads") { } else if (std::string(argv[i]) == "--threads") {
if (i + 1 < argc) { if (i + 1 < argc) {
omp_set_num_threads(std::stoi(argv[i+1])); omp_set_num_threads(std::stoi(argv[i + 1]));
} else { } else {
print_usage(); print_usage();
return 1; return 1;
} }
// TODO: This parameter isn't really needed anymore as we only use the GPU now // TODO: This parameter isn't really needed anymore as we only use
// the GPU now
} else if (std::string(argv[i]) == "--device") { } else if (std::string(argv[i]) == "--device") {
if (i + 1 < argc) { if (i + 1 < argc) {
if (std::string(argv[i+1]) == "cpu") { if (std::string(argv[i + 1]) == "cpu") {
use_gpu = false; use_gpu = false;
} else if (std::string(argv[i+1]) == "gpu") { } else if (std::string(argv[i + 1]) == "gpu") {
use_gpu = true; use_gpu = true;
} else { } else {
print_usage(); print_usage();
@ -484,11 +494,11 @@ int main(int argc, char* argv[]) {
} }
} else if (std::string(argv[i]) == "--generations") { } else if (std::string(argv[i]) == "--generations") {
if (i + 1 < argc) { if (i + 1 < argc) {
num_generations = std::stoi(argv[i+1]); num_generations = std::stoi(argv[i + 1]);
} else { } else {
print_usage(); print_usage();
return 1; return 1;
} }
} else if (std::string(argv[i]) == "--measure") { } else if (std::string(argv[i]) == "--measure") {
measure = true; measure = true;
} }
@ -501,9 +511,7 @@ int main(int argc, char* argv[]) {
main_classic(infile, outfile, num_generations, measure, mode); main_classic(infile, outfile, num_generations, measure, mode);
} }
if (measure) { if (measure) { std::cout << timing->getResults() << std::endl; }
std::cout << timing->getResults() << std::endl;
}
return 0; return 0;
} }