Apply clang-format
This commit is contained in:
parent
db6c5215df
commit
4fdb56faa6
160
main.cpp
160
main.cpp
@ -1,12 +1,12 @@
|
|||||||
#include <omp.h>
|
|
||||||
#include <fstream>
|
#include <fstream>
|
||||||
#include <iostream>
|
#include <iostream>
|
||||||
|
#include <omp.h>
|
||||||
#include <sstream>
|
#include <sstream>
|
||||||
|
|
||||||
#ifdef __APPLE__
|
#ifdef __APPLE__
|
||||||
#include <OpenCL/cl.hpp>
|
#include <OpenCL/cl.hpp>
|
||||||
#else
|
#else
|
||||||
#include <CL/cl.hpp>
|
#include <CL/cl.hpp>
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#include "Timing.h"
|
#include "Timing.h"
|
||||||
@ -14,18 +14,14 @@
|
|||||||
#define LIVE_CELL 1 // 'x' in the input data
|
#define LIVE_CELL 1 // 'x' in the input data
|
||||||
#define DEAD_CELL 0 // '.' in the input data
|
#define DEAD_CELL 0 // '.' in the input data
|
||||||
|
|
||||||
enum Mode {
|
enum Mode { SEQ, OMP, OCL };
|
||||||
SEQ,
|
|
||||||
OMP,
|
|
||||||
OCL
|
|
||||||
};
|
|
||||||
|
|
||||||
// Using this struct seems to be more performant than just passing
|
// Using this struct seems to be more performant than just passing
|
||||||
// a bool** around functions. However, also adding the neighbor_count
|
// a bool** around functions. However, also adding the neighbor_count
|
||||||
// made performance worse.
|
// made performance worse.
|
||||||
struct World {
|
struct World {
|
||||||
World(int size_x, int size_y) : size_x(size_x), size_y(size_y) {
|
World(int size_x, int size_y) : size_x(size_x), size_y(size_y) {
|
||||||
data = new bool*[size_y];
|
data = new bool *[size_y];
|
||||||
|
|
||||||
for (int y = 0; y < size_y; y++) {
|
for (int y = 0; y < size_y; y++) {
|
||||||
data[y] = new bool[size_x];
|
data[y] = new bool[size_x];
|
||||||
@ -45,31 +41,19 @@ struct World {
|
|||||||
// All following functions are just convenience shorthands.
|
// All following functions are just convenience shorthands.
|
||||||
// They are inlined so it doesn't make a difference in performance.
|
// They are inlined so it doesn't make a difference in performance.
|
||||||
|
|
||||||
inline bool get_value(int x, int y) {
|
inline bool get_value(int x, int y) { return data[y][x]; }
|
||||||
return data[y][x];
|
|
||||||
}
|
|
||||||
|
|
||||||
inline void set_alive(int x, int y) {
|
inline void set_alive(int x, int y) { data[y][x] = LIVE_CELL; }
|
||||||
data[y][x] = LIVE_CELL;
|
|
||||||
}
|
|
||||||
|
|
||||||
inline void set_dead(int x, int y) {
|
inline void set_dead(int x, int y) { data[y][x] = DEAD_CELL; }
|
||||||
data[y][x] = DEAD_CELL;
|
|
||||||
}
|
|
||||||
|
|
||||||
inline void set(int x, int y, bool val) {
|
inline void set(int x, int y, bool val) { data[y][x] = val; }
|
||||||
data[y][x] = val;
|
|
||||||
}
|
|
||||||
|
|
||||||
inline int get_num_neighbors(int left, int right, int up, int down, int x, int y) {
|
inline int get_num_neighbors(int left, int right, int up, int down, int x,
|
||||||
return
|
int y) {
|
||||||
get_value(left, down) +
|
return get_value(left, down) + get_value(x, down) +
|
||||||
get_value(x, down) +
|
get_value(right, down) + get_value(left, y) +
|
||||||
get_value(right, down) +
|
get_value(right, y) + get_value(left, up) + get_value(x, up) +
|
||||||
get_value(left, y) +
|
|
||||||
get_value(right, y) +
|
|
||||||
get_value(left, up) +
|
|
||||||
get_value(x, up) +
|
|
||||||
get_value(right, up);
|
get_value(right, up);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -84,13 +68,15 @@ void generation_omp(World &world, int *neighbor_counts) {
|
|||||||
|
|
||||||
// Set the neighbor count array according to the world.
|
// Set the neighbor count array according to the world.
|
||||||
|
|
||||||
// We handle x == 0 and x == size_x - 1 separately in order to avoid all the constant if checks.
|
// We handle x == 0 and x == size_x - 1 separately in order to avoid all the
|
||||||
|
// constant if checks.
|
||||||
int loop_x = size_x - 1;
|
int loop_x = size_x - 1;
|
||||||
|
|
||||||
#pragma omp parallel for
|
#pragma omp parallel for
|
||||||
for (int y = 0; y < size_y; y++) {
|
for (int y = 0; y < size_y; y++) {
|
||||||
// Wrap y
|
// Wrap y
|
||||||
// This happens rarely enough that this if isn't a huge problem, and it would be tedious
|
// This happens rarely enough that this if isn't a huge problem, and it
|
||||||
|
// would be tedious
|
||||||
// to handle both this and x manually.
|
// to handle both this and x manually.
|
||||||
int up = y - 1;
|
int up = y - 1;
|
||||||
int down = y + 1;
|
int down = y + 1;
|
||||||
@ -101,19 +87,22 @@ void generation_omp(World &world, int *neighbor_counts) {
|
|||||||
down -= size_y;
|
down -= size_y;
|
||||||
|
|
||||||
// Handle x == 0
|
// Handle x == 0
|
||||||
neighbor_counts[y * size_x + 0] = world.get_num_neighbors(loop_x, 1, up, down, 0, y);
|
neighbor_counts[y * size_x + 0] =
|
||||||
|
world.get_num_neighbors(loop_x, 1, up, down, 0, y);
|
||||||
|
|
||||||
// Handle 'normal' x
|
// Handle 'normal' x
|
||||||
for (int x = 1; x < loop_x; x++) {
|
for (int x = 1; x < loop_x; x++) {
|
||||||
neighbor_counts[y * size_x + x] = world.get_num_neighbors(x - 1, x + 1, up, down, x, y);
|
neighbor_counts[y * size_x + x] =
|
||||||
|
world.get_num_neighbors(x - 1, x + 1, up, down, x, y);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Handle x == loop_x (== size_x - 1, we're just re-using the variable
|
// Handle x == loop_x (== size_x - 1, we're just re-using the variable
|
||||||
neighbor_counts[y * size_x + loop_x] = world.get_num_neighbors(loop_x - 1, 0, up, down, loop_x, y);
|
neighbor_counts[y * size_x + loop_x] =
|
||||||
|
world.get_num_neighbors(loop_x - 1, 0, up, down, loop_x, y);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Update cells accordingly
|
// Update cells accordingly
|
||||||
#pragma omp parallel for
|
#pragma omp parallel for
|
||||||
for (int y = 0; y < world.size_y; y++) {
|
for (int y = 0; y < world.size_y; y++) {
|
||||||
for (int x = 0; x < world.size_x; x++) {
|
for (int x = 0; x < world.size_x; x++) {
|
||||||
char this_cell = world.get_value(x, y);
|
char this_cell = world.get_value(x, y);
|
||||||
@ -131,12 +120,14 @@ void generation_seq(World &world, int *neighbor_counts) {
|
|||||||
|
|
||||||
// Set the neighbor count array according to the world.
|
// Set the neighbor count array according to the world.
|
||||||
|
|
||||||
// We handle x == 0 and x == size_x - 1 separately in order to avoid all the constant if checks.
|
// We handle x == 0 and x == size_x - 1 separately in order to avoid all the
|
||||||
|
// constant if checks.
|
||||||
int loop_x = size_x - 1;
|
int loop_x = size_x - 1;
|
||||||
|
|
||||||
for (int y = 0; y < size_y; y++) {
|
for (int y = 0; y < size_y; y++) {
|
||||||
// Wrap y
|
// Wrap y
|
||||||
// This happens rarely enough that this if isn't a huge problem, and it would be tedious
|
// This happens rarely enough that this if isn't a huge problem, and it
|
||||||
|
// would be tedious
|
||||||
// to handle both this and x manually.
|
// to handle both this and x manually.
|
||||||
int up = y - 1;
|
int up = y - 1;
|
||||||
int down = y + 1;
|
int down = y + 1;
|
||||||
@ -147,15 +138,18 @@ void generation_seq(World &world, int *neighbor_counts) {
|
|||||||
down -= size_y;
|
down -= size_y;
|
||||||
|
|
||||||
// Handle x == 0
|
// Handle x == 0
|
||||||
neighbor_counts[y * size_x + 0] = world.get_num_neighbors(loop_x, 1, up, down, 0, y);
|
neighbor_counts[y * size_x + 0] =
|
||||||
|
world.get_num_neighbors(loop_x, 1, up, down, 0, y);
|
||||||
|
|
||||||
// Handle 'normal' x
|
// Handle 'normal' x
|
||||||
for (int x = 1; x < loop_x; x++) {
|
for (int x = 1; x < loop_x; x++) {
|
||||||
neighbor_counts[y * size_x + x] = world.get_num_neighbors(x - 1, x + 1, up, down, x, y);
|
neighbor_counts[y * size_x + x] =
|
||||||
|
world.get_num_neighbors(x - 1, x + 1, up, down, x, y);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Handle x == loop_x (== size_x - 1, we're just re-using the variable
|
// Handle x == loop_x (== size_x - 1, we're just re-using the variable
|
||||||
neighbor_counts[y * size_x + loop_x] = world.get_num_neighbors(loop_x - 1, 0, up, down, loop_x, y);
|
neighbor_counts[y * size_x + loop_x] =
|
||||||
|
world.get_num_neighbors(loop_x - 1, 0, up, down, loop_x, y);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Update cells accordingly
|
// Update cells accordingly
|
||||||
@ -170,30 +164,34 @@ void generation_seq(World &world, int *neighbor_counts) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
void print_usage() {
|
void print_usage() {
|
||||||
std::cerr << "Usage: gol --mode seq|omp|ocl [--threads number] [--device cpu|gpu] --load infile.gol --save outfile.gol --generations number [--measure]" << std::endl;
|
std::cerr << "Usage: gol --mode seq|omp|ocl [--threads number] [--device "
|
||||||
|
"cpu|gpu] --load infile.gol --save outfile.gol --generations "
|
||||||
|
"number [--measure]"
|
||||||
|
<< std::endl;
|
||||||
}
|
}
|
||||||
|
|
||||||
void main_opencl(std::string infile, std::string outfile, int num_generations, bool measure, bool use_gpu) {
|
void main_opencl(std::string infile, std::string outfile, int num_generations,
|
||||||
|
bool measure, bool use_gpu) {
|
||||||
Timing *timing = Timing::getInstance();
|
Timing *timing = Timing::getInstance();
|
||||||
|
|
||||||
// Get Nvidia CUDA platform
|
// Get Nvidia CUDA platform
|
||||||
std::vector<cl::Platform> all_platforms;
|
std::vector<cl::Platform> all_platforms;
|
||||||
cl::Platform::get(&all_platforms);
|
cl::Platform::get(&all_platforms);
|
||||||
|
|
||||||
if (all_platforms.size()==0) {
|
if (all_platforms.size() == 0) {
|
||||||
std::cout<<" No platforms found. Check OpenCL installation!\n";
|
std::cout << " No platforms found. Check OpenCL installation!\n";
|
||||||
exit(1);
|
exit(1);
|
||||||
}
|
}
|
||||||
cl::Platform default_platform=all_platforms[0];
|
cl::Platform default_platform = all_platforms[0];
|
||||||
|
|
||||||
// Use the first device (in my case, GPU is on this platform)
|
// Use the first device (in my case, GPU is on this platform)
|
||||||
std::vector<cl::Device> all_devices;
|
std::vector<cl::Device> all_devices;
|
||||||
default_platform.getDevices(CL_DEVICE_TYPE_ALL, &all_devices);
|
default_platform.getDevices(CL_DEVICE_TYPE_ALL, &all_devices);
|
||||||
if(all_devices.size()==0){
|
if (all_devices.size() == 0) {
|
||||||
std::cout<<" No devices found. Check OpenCL installation!\n";
|
std::cout << " No devices found. Check OpenCL installation!\n";
|
||||||
exit(1);
|
exit(1);
|
||||||
}
|
}
|
||||||
cl::Device default_device=all_devices[0];
|
cl::Device default_device = all_devices[0];
|
||||||
|
|
||||||
// The context links device and platform
|
// The context links device and platform
|
||||||
cl::Context context({default_device});
|
cl::Context context({default_device});
|
||||||
@ -201,7 +199,7 @@ void main_opencl(std::string infile, std::string outfile, int num_generations, b
|
|||||||
// Load kernel code from file into Sources
|
// Load kernel code from file into Sources
|
||||||
cl::Program::Sources sources;
|
cl::Program::Sources sources;
|
||||||
|
|
||||||
std::ifstream file("gol.cl"); //taking file as inputstream
|
std::ifstream file("gol.cl"); // taking file as inputstream
|
||||||
std::string kernel_code;
|
std::string kernel_code;
|
||||||
|
|
||||||
if (file) {
|
if (file) {
|
||||||
@ -216,7 +214,9 @@ void main_opencl(std::string infile, std::string outfile, int num_generations, b
|
|||||||
// Create a program with the previously defined context and (kernel) sources
|
// Create a program with the previously defined context and (kernel) sources
|
||||||
cl::Program program(context, sources);
|
cl::Program program(context, sources);
|
||||||
if (program.build({default_device}) != CL_SUCCESS) {
|
if (program.build({default_device}) != CL_SUCCESS) {
|
||||||
std::cout << "Error building: " << program.getBuildInfo<CL_PROGRAM_BUILD_LOG>(default_device) << std::endl;
|
std::cout << "Error building: "
|
||||||
|
<< program.getBuildInfo<CL_PROGRAM_BUILD_LOG>(default_device)
|
||||||
|
<< std::endl;
|
||||||
exit(1);
|
exit(1);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -267,11 +267,13 @@ void main_opencl(std::string infile, std::string outfile, int num_generations, b
|
|||||||
cl::CommandQueue queue(context, default_device);
|
cl::CommandQueue queue(context, default_device);
|
||||||
|
|
||||||
// Push write commands to queue
|
// Push write commands to queue
|
||||||
queue.enqueueWriteBuffer(buffer_previous, CL_TRUE, 0, sizeof(bool) * n, world);
|
queue.enqueueWriteBuffer(buffer_previous, CL_TRUE, 0, sizeof(bool) * n,
|
||||||
|
world);
|
||||||
queue.enqueueWriteBuffer(buffer_new, CL_TRUE, 0, sizeof(bool) * n, result);
|
queue.enqueueWriteBuffer(buffer_new, CL_TRUE, 0, sizeof(bool) * n, result);
|
||||||
queue.enqueueWriteBuffer(buffer_size, CL_TRUE, 0, sizeof(int) * 2, size);
|
queue.enqueueWriteBuffer(buffer_size, CL_TRUE, 0, sizeof(int) * 2, size);
|
||||||
|
|
||||||
// Create the kernel, which uses the `generation` method in our program (which was created from the kernel code)
|
// Create the kernel, which uses the `generation` method in our program
|
||||||
|
// (which was created from the kernel code)
|
||||||
cl::Kernel gol_kernel(program, "generation");
|
cl::Kernel gol_kernel(program, "generation");
|
||||||
|
|
||||||
timing->stopSetup();
|
timing->stopSetup();
|
||||||
@ -285,11 +287,14 @@ void main_opencl(std::string infile, std::string outfile, int num_generations, b
|
|||||||
gol_kernel.setArg(2, buffer_size);
|
gol_kernel.setArg(2, buffer_size);
|
||||||
|
|
||||||
// Run it
|
// Run it
|
||||||
queue.enqueueNDRangeKernel(gol_kernel, cl::NullRange, cl::NDRange(n), cl::NullRange);
|
queue.enqueueNDRangeKernel(gol_kernel, cl::NullRange, cl::NDRange(n),
|
||||||
|
cl::NullRange);
|
||||||
queue.finish();
|
queue.finish();
|
||||||
|
|
||||||
// Swap the previous buffer with the new buffer, as we will want to use our result from this loop
|
// Swap the previous buffer with the new buffer, as we will want to use
|
||||||
// as the input of the next loop (overwriting the previous result, which is not needed anymore)
|
// our result from this loop
|
||||||
|
// as the input of the next loop (overwriting the previous result,
|
||||||
|
// which is not needed anymore)
|
||||||
std::swap(buffer_previous, buffer_new);
|
std::swap(buffer_previous, buffer_new);
|
||||||
}
|
}
|
||||||
queue.finish();
|
queue.finish();
|
||||||
@ -297,12 +302,15 @@ void main_opencl(std::string infile, std::string outfile, int num_generations, b
|
|||||||
timing->stopComputation();
|
timing->stopComputation();
|
||||||
timing->startFinalization();
|
timing->startFinalization();
|
||||||
|
|
||||||
// Since we swap after every generation, we need to proceed differently depending on
|
// Since we swap after every generation, we need to proceed differently
|
||||||
|
// depending on
|
||||||
// whether we're in swapped mode or not at the moment
|
// whether we're in swapped mode or not at the moment
|
||||||
if (num_generations % 2 == 0) {
|
if (num_generations % 2 == 0) {
|
||||||
queue.enqueueReadBuffer(buffer_previous, CL_TRUE, 0, sizeof(bool) * n, result);
|
queue.enqueueReadBuffer(buffer_previous, CL_TRUE, 0, sizeof(bool) * n,
|
||||||
|
result);
|
||||||
} else {
|
} else {
|
||||||
queue.enqueueReadBuffer(buffer_new, CL_TRUE, 0, sizeof(bool) * n, result);
|
queue.enqueueReadBuffer(buffer_new, CL_TRUE, 0, sizeof(bool) * n,
|
||||||
|
result);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Write the result
|
// Write the result
|
||||||
@ -330,7 +338,8 @@ void main_opencl(std::string infile, std::string outfile, int num_generations, b
|
|||||||
timing->stopFinalization();
|
timing->stopFinalization();
|
||||||
}
|
}
|
||||||
|
|
||||||
void main_classic(std::string infile, std::string outfile, int num_generations, bool measure, Mode mode) {
|
void main_classic(std::string infile, std::string outfile, int num_generations,
|
||||||
|
bool measure, Mode mode) {
|
||||||
Timing *timing = Timing::getInstance();
|
Timing *timing = Timing::getInstance();
|
||||||
|
|
||||||
// Read in the start state
|
// Read in the start state
|
||||||
@ -409,7 +418,7 @@ void main_classic(std::string infile, std::string outfile, int num_generations,
|
|||||||
timing->stopFinalization();
|
timing->stopFinalization();
|
||||||
}
|
}
|
||||||
|
|
||||||
int main(int argc, char* argv[]) {
|
int main(int argc, char *argv[]) {
|
||||||
Timing *timing = Timing::getInstance();
|
Timing *timing = Timing::getInstance();
|
||||||
|
|
||||||
// Setup.
|
// Setup.
|
||||||
@ -432,25 +441,25 @@ int main(int argc, char* argv[]) {
|
|||||||
for (int i = 1; i < argc; i++) {
|
for (int i = 1; i < argc; i++) {
|
||||||
if (std::string(argv[i]) == "--load") {
|
if (std::string(argv[i]) == "--load") {
|
||||||
if (i + 1 < argc) {
|
if (i + 1 < argc) {
|
||||||
infile = argv[i+1];
|
infile = argv[i + 1];
|
||||||
} else {
|
} else {
|
||||||
print_usage();
|
print_usage();
|
||||||
return 1;
|
return 1;
|
||||||
}
|
}
|
||||||
} else if (std::string(argv[i]) == "--save") {
|
} else if (std::string(argv[i]) == "--save") {
|
||||||
if (i + 1 < argc) {
|
if (i + 1 < argc) {
|
||||||
outfile = argv[i+1];
|
outfile = argv[i + 1];
|
||||||
} else {
|
} else {
|
||||||
print_usage();
|
print_usage();
|
||||||
return 1;
|
return 1;
|
||||||
}
|
}
|
||||||
} else if (std::string(argv[i]) == "--mode") {
|
} else if (std::string(argv[i]) == "--mode") {
|
||||||
if (i + 1 < argc) {
|
if (i + 1 < argc) {
|
||||||
if (std::string(argv[i+1]) == "seq") {
|
if (std::string(argv[i + 1]) == "seq") {
|
||||||
mode = Mode::SEQ;
|
mode = Mode::SEQ;
|
||||||
} else if (std::string(argv[i+1]) == "omp") {
|
} else if (std::string(argv[i + 1]) == "omp") {
|
||||||
mode = Mode::OMP;
|
mode = Mode::OMP;
|
||||||
} else if (std::string(argv[i+1]) == "ocl") {
|
} else if (std::string(argv[i + 1]) == "ocl") {
|
||||||
mode = Mode::OCL;
|
mode = Mode::OCL;
|
||||||
} else {
|
} else {
|
||||||
print_usage();
|
print_usage();
|
||||||
@ -462,17 +471,18 @@ int main(int argc, char* argv[]) {
|
|||||||
}
|
}
|
||||||
} else if (std::string(argv[i]) == "--threads") {
|
} else if (std::string(argv[i]) == "--threads") {
|
||||||
if (i + 1 < argc) {
|
if (i + 1 < argc) {
|
||||||
omp_set_num_threads(std::stoi(argv[i+1]));
|
omp_set_num_threads(std::stoi(argv[i + 1]));
|
||||||
} else {
|
} else {
|
||||||
print_usage();
|
print_usage();
|
||||||
return 1;
|
return 1;
|
||||||
}
|
}
|
||||||
// TODO: This parameter isn't really needed anymore as we only use the GPU now
|
// TODO: This parameter isn't really needed anymore as we only use
|
||||||
|
// the GPU now
|
||||||
} else if (std::string(argv[i]) == "--device") {
|
} else if (std::string(argv[i]) == "--device") {
|
||||||
if (i + 1 < argc) {
|
if (i + 1 < argc) {
|
||||||
if (std::string(argv[i+1]) == "cpu") {
|
if (std::string(argv[i + 1]) == "cpu") {
|
||||||
use_gpu = false;
|
use_gpu = false;
|
||||||
} else if (std::string(argv[i+1]) == "gpu") {
|
} else if (std::string(argv[i + 1]) == "gpu") {
|
||||||
use_gpu = true;
|
use_gpu = true;
|
||||||
} else {
|
} else {
|
||||||
print_usage();
|
print_usage();
|
||||||
@ -484,7 +494,7 @@ int main(int argc, char* argv[]) {
|
|||||||
}
|
}
|
||||||
} else if (std::string(argv[i]) == "--generations") {
|
} else if (std::string(argv[i]) == "--generations") {
|
||||||
if (i + 1 < argc) {
|
if (i + 1 < argc) {
|
||||||
num_generations = std::stoi(argv[i+1]);
|
num_generations = std::stoi(argv[i + 1]);
|
||||||
} else {
|
} else {
|
||||||
print_usage();
|
print_usage();
|
||||||
return 1;
|
return 1;
|
||||||
@ -501,9 +511,7 @@ int main(int argc, char* argv[]) {
|
|||||||
main_classic(infile, outfile, num_generations, measure, mode);
|
main_classic(infile, outfile, num_generations, measure, mode);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (measure) {
|
if (measure) { std::cout << timing->getResults() << std::endl; }
|
||||||
std::cout << timing->getResults() << std::endl;
|
|
||||||
}
|
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
Loading…
x
Reference in New Issue
Block a user