Compare commits
3 Commits
d17e057912
...
a4d0a5dca8
Author | SHA1 | Date | |
---|---|---|---|
a4d0a5dca8 | |||
fddb092a56 | |||
62ce8f1b06 |
4
Makefile
4
Makefile
@ -1,8 +1,8 @@
|
|||||||
CXX = g++
|
CXX = g++
|
||||||
CXXFLAGS = -fopenmp -Wall -O3
|
CXXFLAGS = -fopenmp -Wall -O3 -g
|
||||||
|
|
||||||
gol: main.o Timing.o
|
gol: main.o Timing.o
|
||||||
$(CXX) $(CXXFLAGS) -o gol main.o Timing.o
|
$(CXX) $(CXXFLAGS) -o gol main.o Timing.o -lOpenCL
|
||||||
|
|
||||||
main.o: main.cpp Timing.h
|
main.o: main.cpp Timing.h
|
||||||
$(CXX) $(CXXFLAGS) -c main.cpp
|
$(CXX) $(CXXFLAGS) -c main.cpp
|
||||||
|
30
gol.cl
Normal file
30
gol.cl
Normal file
@ -0,0 +1,30 @@
|
|||||||
|
void kernel generation(global const bool *previous, global bool *new, global const int *size) {
|
||||||
|
int ID, Nthreads, n, ratio, start, stop, x, y, left, right, up, down, neighbors, i;
|
||||||
|
|
||||||
|
ID = get_global_id(0);
|
||||||
|
|
||||||
|
x = ID % size[0];
|
||||||
|
y = ID / size[0];
|
||||||
|
|
||||||
|
left = (x - 1 + size[0]) % size[0];
|
||||||
|
right = (x + 1) % size[0];
|
||||||
|
|
||||||
|
up = (y - 1 + size[1]) % size[1];
|
||||||
|
down = (y + 1) % size[1];
|
||||||
|
|
||||||
|
// Get the number of neighbors
|
||||||
|
neighbors =
|
||||||
|
previous[size[0] * up + left]
|
||||||
|
+ previous[size[0] * up + x]
|
||||||
|
+ previous[size[0] * up + right]
|
||||||
|
|
||||||
|
+ previous[size[0] * y + left]
|
||||||
|
+ previous[size[0] * y + right]
|
||||||
|
|
||||||
|
+ previous[size[0] * down + left]
|
||||||
|
+ previous[size[0] * down + x]
|
||||||
|
+ previous[size[0] * down + right];
|
||||||
|
|
||||||
|
// Update cell
|
||||||
|
new[size[0] * y + x] = (neighbors == 3) + previous[size[0] * y + x] * (neighbors == 2);
|
||||||
|
}
|
154
main.cpp
154
main.cpp
@ -1,6 +1,13 @@
|
|||||||
#include <omp.h>
|
#include <omp.h>
|
||||||
#include <fstream>
|
#include <fstream>
|
||||||
#include <iostream>
|
#include <iostream>
|
||||||
|
#include <sstream>
|
||||||
|
|
||||||
|
#ifdef __APPLE__
|
||||||
|
#include <OpenCL/cl.hpp>
|
||||||
|
#else
|
||||||
|
#include <CL/cl.hpp>
|
||||||
|
#endif
|
||||||
|
|
||||||
#include "Timing.h"
|
#include "Timing.h"
|
||||||
|
|
||||||
@ -166,6 +173,148 @@ void print_usage() {
|
|||||||
std::cerr << "Usage: gol --mode seq|omp|ocl [--threads number] [--device cpu|gpu] --load infile.gol --save outfile.gol --generations number [--measure]" << std::endl;
|
std::cerr << "Usage: gol --mode seq|omp|ocl [--threads number] [--device cpu|gpu] --load infile.gol --save outfile.gol --generations number [--measure]" << std::endl;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void main_opencl(std::string infile, std::string outfile, int num_generations) {
|
||||||
|
// get all platforms (drivers), e.g. NVIDIA
|
||||||
|
std::vector<cl::Platform> all_platforms;
|
||||||
|
cl::Platform::get(&all_platforms);
|
||||||
|
|
||||||
|
if (all_platforms.size()==0) {
|
||||||
|
std::cout<<" No platforms found. Check OpenCL installation!\n";
|
||||||
|
exit(1);
|
||||||
|
}
|
||||||
|
cl::Platform default_platform=all_platforms[0];
|
||||||
|
std::cout << "Using platform: "<<default_platform.getInfo<CL_PLATFORM_NAME>()<<"\n";
|
||||||
|
|
||||||
|
// get default device (CPUs, GPUs) of the default platform
|
||||||
|
std::vector<cl::Device> all_devices;
|
||||||
|
default_platform.getDevices(CL_DEVICE_TYPE_ALL, &all_devices);
|
||||||
|
if(all_devices.size()==0){
|
||||||
|
std::cout<<" No devices found. Check OpenCL installation!\n";
|
||||||
|
exit(1);
|
||||||
|
}
|
||||||
|
|
||||||
|
// use device[1] because that's a GPU; device[0] is the CPU
|
||||||
|
cl::Device default_device=all_devices[0];
|
||||||
|
std::cout<< "Using device: "<<default_device.getInfo<CL_DEVICE_NAME>()<<"\n";
|
||||||
|
|
||||||
|
// a context is like a "runtime link" to the device and platform;
|
||||||
|
// i.e. communication is possible
|
||||||
|
cl::Context context({default_device});
|
||||||
|
|
||||||
|
// create the program that we want to execute on the device
|
||||||
|
cl::Program::Sources sources;
|
||||||
|
|
||||||
|
// load kernel from file
|
||||||
|
std::ifstream file("gol.cl"); //taking file as inputstream
|
||||||
|
std::string kernel_code;
|
||||||
|
|
||||||
|
if (file) {
|
||||||
|
std::ostringstream ss;
|
||||||
|
ss << file.rdbuf();
|
||||||
|
kernel_code = ss.str();
|
||||||
|
} else {
|
||||||
|
std::cout << "Error: Couldn't read Kernel source!" << std::endl;
|
||||||
|
}
|
||||||
|
sources.push_back({kernel_code.c_str(), kernel_code.length()});
|
||||||
|
|
||||||
|
cl::Program program(context, sources);
|
||||||
|
if (program.build({default_device}) != CL_SUCCESS) {
|
||||||
|
std::cout << "Error building: " << program.getBuildInfo<CL_PROGRAM_BUILD_LOG>(default_device) << std::endl;
|
||||||
|
exit(1);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Setup on CPU: Load files
|
||||||
|
|
||||||
|
// Read in the start state
|
||||||
|
std::ifstream world_file;
|
||||||
|
world_file.open(infile);
|
||||||
|
|
||||||
|
// Get x and y size
|
||||||
|
std::string x_str, y_str;
|
||||||
|
getline(world_file, x_str, ',');
|
||||||
|
getline(world_file, y_str);
|
||||||
|
|
||||||
|
int size_x = std::stoi(x_str);
|
||||||
|
int size_y = std::stoi(y_str);
|
||||||
|
|
||||||
|
bool *world = new bool[size_x * size_y];
|
||||||
|
bool *result = new bool[size_x * size_y];
|
||||||
|
|
||||||
|
// Set the data
|
||||||
|
for (int y = 0; y < size_y; y++) {
|
||||||
|
std::string line;
|
||||||
|
getline(world_file, line);
|
||||||
|
|
||||||
|
for (int x = 0; x < size_x; x++) {
|
||||||
|
// The chars '.' and 'x' are mapped to the booleans 0 and 1.
|
||||||
|
// This speeds up the calculation of the neighbors -- no if-checks
|
||||||
|
// needed, just sum the values.
|
||||||
|
world[y * size_x + x] = 1 ? line[x] == 'x' : 0;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
world_file.close();
|
||||||
|
|
||||||
|
// Put size into array
|
||||||
|
int size[2] = {size_x, size_y};
|
||||||
|
int n = size_x * size_y;
|
||||||
|
|
||||||
|
// create buffers on device (allocate space on GPU)
|
||||||
|
cl::Buffer buffer_previous(context, CL_MEM_READ_WRITE, sizeof(bool) * n);
|
||||||
|
cl::Buffer buffer_new(context, CL_MEM_READ_WRITE, sizeof(bool) * n);
|
||||||
|
cl::Buffer buffer_size(context, CL_MEM_READ_WRITE, sizeof(int) * 2);
|
||||||
|
|
||||||
|
// create a queue (a queue of commands that the GPU will execute)
|
||||||
|
cl::CommandQueue queue(context, default_device);
|
||||||
|
|
||||||
|
// push write commands to queue
|
||||||
|
queue.enqueueWriteBuffer(buffer_previous, CL_TRUE, 0, sizeof(bool) * n, world);
|
||||||
|
queue.enqueueWriteBuffer(buffer_new, CL_TRUE, 0, sizeof(bool) * n, result);
|
||||||
|
queue.enqueueWriteBuffer(buffer_size, CL_TRUE, 0, sizeof(int) * 2, size);
|
||||||
|
|
||||||
|
// RUN ZE KERNEL
|
||||||
|
cl::Kernel gol_kernel(program, "generation");
|
||||||
|
|
||||||
|
for (int i = 0; i < num_generations; i++) {
|
||||||
|
gol_kernel.setArg(0, buffer_previous);
|
||||||
|
gol_kernel.setArg(1, buffer_new);
|
||||||
|
gol_kernel.setArg(2, buffer_size);
|
||||||
|
|
||||||
|
queue.enqueueNDRangeKernel(gol_kernel, cl::NullRange, cl::NDRange(n), cl::NullRange);
|
||||||
|
queue.finish();
|
||||||
|
|
||||||
|
std::swap(buffer_previous, buffer_new);
|
||||||
|
}
|
||||||
|
queue.finish();
|
||||||
|
|
||||||
|
// Since we swap after every generation, we need to proceed differently depending on
|
||||||
|
// whether we're in swapped mode or not at the moment
|
||||||
|
if (num_generations % 2 == 0) {
|
||||||
|
queue.enqueueReadBuffer(buffer_previous, CL_TRUE, 0, sizeof(bool) * n, result);
|
||||||
|
} else {
|
||||||
|
queue.enqueueReadBuffer(buffer_new, CL_TRUE, 0, sizeof(bool) * n, result);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Write the result
|
||||||
|
std::ofstream result_file;
|
||||||
|
result_file.open(outfile);
|
||||||
|
|
||||||
|
result_file << size_x << "," << size_y << '\n';
|
||||||
|
|
||||||
|
for (int y = 0; y < size_y; y++) {
|
||||||
|
std::string line;
|
||||||
|
|
||||||
|
for (int x = 0; x < size_x; x++) {
|
||||||
|
// Convert 1 and 0 to 'x' and '.' again
|
||||||
|
line += result[y * size_x + x] ? 'x' : '.';
|
||||||
|
}
|
||||||
|
|
||||||
|
result_file << line << '\n';
|
||||||
|
}
|
||||||
|
|
||||||
|
result_file.close();
|
||||||
|
}
|
||||||
|
|
||||||
int main(int argc, char* argv[]) {
|
int main(int argc, char* argv[]) {
|
||||||
Timing *timing = Timing::getInstance();
|
Timing *timing = Timing::getInstance();
|
||||||
|
|
||||||
@ -266,6 +415,11 @@ int main(int argc, char* argv[]) {
|
|||||||
std::cout << "Running OpenCL version" << std::endl;
|
std::cout << "Running OpenCL version" << std::endl;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (mode == Mode::OCL) {
|
||||||
|
main_opencl(infile, outfile, num_generations);
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
// Read in the start state
|
// Read in the start state
|
||||||
std::ifstream world_file;
|
std::ifstream world_file;
|
||||||
world_file.open(infile);
|
world_file.open(infile);
|
||||||
|
Loading…
x
Reference in New Issue
Block a user