From 7bcc724d8ec16640854ece50dc690bd0b91f2638 Mon Sep 17 00:00:00 2001
From: karl <karl.bittner@hexaquo.at>
Date: Wed, 16 Dec 2020 19:17:07 +0100
Subject: [PATCH] Improve code readability

updated comments and separate function also for OMP/seq
---
 main.cpp | 249 +++++++++++++++++++++++++++++--------------------------
 1 file changed, 130 insertions(+), 119 deletions(-)

diff --git a/main.cpp b/main.cpp
index 00c01cf..286694c 100644
--- a/main.cpp
+++ b/main.cpp
@@ -173,8 +173,10 @@ void print_usage() {
     std::cerr << "Usage: gol --mode seq|omp|ocl [--threads number] [--device cpu|gpu] --load infile.gol --save outfile.gol --generations number [--measure]" << std::endl;
 }
 
-void main_opencl(std::string infile, std::string outfile, int num_generations) {
-    // get all platforms (drivers), e.g. NVIDIA
+void main_opencl(std::string infile, std::string outfile, int num_generations, bool measure) {
+    Timing *timing = Timing::getInstance();
+
+    // Get Nvidia CUDA platform
     std::vector<cl::Platform> all_platforms;
     cl::Platform::get(&all_platforms);
 
@@ -183,28 +185,22 @@ void main_opencl(std::string infile, std::string outfile, int num_generations) {
         exit(1);
     }
     cl::Platform default_platform=all_platforms[0];
-    std::cout << "Using platform: "<<default_platform.getInfo<CL_PLATFORM_NAME>()<<"\n";
 
-    // get default device (CPUs, GPUs) of the default platform
+    // Use the first device (in my case, GPU is on this platform)
     std::vector<cl::Device> all_devices;
     default_platform.getDevices(CL_DEVICE_TYPE_ALL, &all_devices);
     if(all_devices.size()==0){
         std::cout<<" No devices found. Check OpenCL installation!\n";
         exit(1);
     }
-
-    // use device[1] because that's a GPU; device[0] is the CPU
     cl::Device default_device=all_devices[0];
-    std::cout<< "Using device: "<<default_device.getInfo<CL_DEVICE_NAME>()<<"\n";
 
-    // a context is like a "runtime link" to the device and platform;
-    // i.e. communication is possible
+    // The context links device and platform
     cl::Context context({default_device});
 
-    // create the program that we want to execute on the device
+    // Load kernel code from file into Sources
     cl::Program::Sources sources;
 
-    // load kernel from file
     std::ifstream file("gol.cl"); //taking file as inputstream
     std::string kernel_code;
    
@@ -217,6 +213,7 @@ void main_opencl(std::string infile, std::string outfile, int num_generations) {
     }
     sources.push_back({kernel_code.c_str(), kernel_code.length()});
 
+    // Create a program with the previously defined context and (kernel) sources
     cl::Program program(context, sources);
     if (program.build({default_device}) != CL_SUCCESS) {
         std::cout << "Error building: " << program.getBuildInfo<CL_PROGRAM_BUILD_LOG>(default_device) << std::endl;
@@ -237,6 +234,8 @@ void main_opencl(std::string infile, std::string outfile, int num_generations) {
     int size_x = std::stoi(x_str);
     int size_y = std::stoi(y_str);
 
+    // Two arrays because one will always hold the previous status
+    // For now, we only put data into `world`
     bool *world = new bool[size_x * size_y];
     bool *result = new bool[size_x * size_y];
 
@@ -255,38 +254,49 @@ void main_opencl(std::string infile, std::string outfile, int num_generations) {
     
     world_file.close();
 
-    // Put size into array
+    // Put the size into an array so it can be passed to the kernel
     int size[2] = {size_x, size_y};
     int n = size_x * size_y;
 
-    // create buffers on device (allocate space on GPU)
+    // Allocate space on the GPU
     cl::Buffer buffer_previous(context, CL_MEM_READ_WRITE, sizeof(bool) * n);
     cl::Buffer buffer_new(context, CL_MEM_READ_WRITE, sizeof(bool) * n);
     cl::Buffer buffer_size(context, CL_MEM_READ_WRITE, sizeof(int) * 2);
 
-    // create a queue (a queue of commands that the GPU will execute)
+    // Create queue of commands that the GPU will execute
     cl::CommandQueue queue(context, default_device);
 
-    // push write commands to queue
+    // Push write commands to queue
     queue.enqueueWriteBuffer(buffer_previous, CL_TRUE, 0, sizeof(bool) * n, world);
     queue.enqueueWriteBuffer(buffer_new, CL_TRUE, 0, sizeof(bool) * n, result);
     queue.enqueueWriteBuffer(buffer_size, CL_TRUE, 0, sizeof(int) * 2, size);
 
-    // RUN ZE KERNEL
+    // Create the kernel, which uses the `generation` method in our program (which was created from the kernel code)
     cl::Kernel gol_kernel(program, "generation");
 
+    timing->stopSetup();
+    timing->startComputation();
+
+    // Actually do the generations
     for (int i = 0; i < num_generations; i++) {
+        // Update the arguments in the kernel
         gol_kernel.setArg(0, buffer_previous);
         gol_kernel.setArg(1, buffer_new);
         gol_kernel.setArg(2, buffer_size);
 
+        // Run it
         queue.enqueueNDRangeKernel(gol_kernel, cl::NullRange, cl::NDRange(n), cl::NullRange);
         queue.finish();
 
+        // Swap the previous buffer with the new buffer, as we will want to use our result from this loop
+        //  as the input of the next loop (overwriting the previous result, which is not needed anymore)
         std::swap(buffer_previous, buffer_new);
     }
     queue.finish();
 
+    timing->stopComputation();
+    timing->startFinalization();
+
     // Since we swap after every generation, we need to proceed differently depending on
     //  whether we're in swapped mode or not at the moment
     if (num_generations % 2 == 0) {
@@ -313,113 +323,17 @@ void main_opencl(std::string infile, std::string outfile, int num_generations) {
     }
 
     result_file.close();
+
+    timing->stopFinalization();
+
+    if (measure) {
+        std::cout << timing->getResults() << std::endl;
+    }
 }
 
-int main(int argc, char* argv[]) {
+void main_classic(std::string infile, std::string outfile, int num_generations, bool measure, Mode mode) {
     Timing *timing = Timing::getInstance();
 
-    // Setup.
-    timing->startSetup();
-
-    // Parse command line arguments
-    std::string infile;
-    std::string outfile;
-    Mode mode = Mode::SEQ;
-    bool use_gpu = false;
-    int num_generations = 0;
-    int num_threads = 1;
-    bool measure = false;
-
-    if (argc < 8) {
-        print_usage();
-        return 1;
-    }
-
-    // Parse arguments
-    for (int i = 1; i < argc; i++) {
-        if (std::string(argv[i]) == "--load") {
-            if (i + 1 < argc) {
-                infile = argv[i+1];
-            } else {
-                print_usage();
-                return 1;
-            }  
-        } else if (std::string(argv[i]) == "--save") {
-            if (i + 1 < argc) {
-                outfile = argv[i+1];
-            } else {
-                print_usage();
-                return 1;
-            } 
-        } else if (std::string(argv[i]) == "--mode") {
-            if (i + 1 < argc) {
-                if (std::string(argv[i+1]) == "seq") {
-                    mode = Mode::SEQ;
-                } else if (std::string(argv[i+1]) == "omp") {
-                    mode = Mode::OMP;
-                } else if (std::string(argv[i+1]) == "ocl") {
-                    mode = Mode::OCL;
-                } else {
-                    print_usage();
-                    return 1;
-                }
-            } else {
-                print_usage();
-                return 1;
-            }
-        } else if (std::string(argv[i]) == "--threads") {
-            if (i + 1 < argc) {
-                num_threads = std::stoi(argv[i+1]);
-            } else {
-                print_usage();
-                return 1;
-            }
-        } else if (std::string(argv[i]) == "--device") {
-            if (i + 1 < argc) {
-                if (std::string(argv[i+1]) == "cpu") {
-                    use_gpu = false;
-                } else if (std::string(argv[i+1]) == "gpu") {
-                    use_gpu = true;
-                } else {
-                    print_usage();
-                    return 1;
-                }
-            } else {
-                print_usage();
-                return 1;
-            }
-        } else if (std::string(argv[i]) == "--generations") {
-            if (i + 1 < argc) {
-                num_generations = std::stoi(argv[i+1]);
-            } else {
-                print_usage();
-                return 1;
-            }  
-        } else if (std::string(argv[i]) == "--measure") {
-            measure = true;
-        }
-    }
-
-    // TODO: Just for testing
-    if (use_gpu) {
-        std::cout << "Using GPU" << std::endl;
-    } else {
-        std::cout << "Using CPU" << std::endl;
-    }
-
-    if (mode == Mode::SEQ) {
-        std::cout << "Running classic sequential version" << std::endl;
-    } else if (mode == Mode::OMP) {
-        std::cout << "Running OpenMP version with " << num_threads << " threads" << std::endl;
-    } else if (mode == Mode::OCL) {
-        std::cout << "Running OpenCL version" << std::endl;
-    }
-
-    if (mode == Mode::OCL) {
-        main_opencl(infile, outfile, num_generations);
-        return 0;
-    }
-
     // Read in the start state
     std::ifstream world_file;
     world_file.open(infile);
@@ -498,6 +412,103 @@ int main(int argc, char* argv[]) {
     if (measure) {
         std::cout << timing->getResults() << std::endl;
     }
+}
+
+int main(int argc, char* argv[]) {
+    Timing *timing = Timing::getInstance();
+
+    // Setup.
+    timing->startSetup();
+
+    // Parse command line arguments
+    std::string infile;
+    std::string outfile;
+    Mode mode = Mode::SEQ;
+    bool use_gpu = false;
+    int num_generations = 0;
+    int num_threads = 1;
+    bool measure = false;
+
+    if (argc < 8) {
+        print_usage();
+        return 1;
+    }
+
+    // Parse arguments
+    for (int i = 1; i < argc; i++) {
+        if (std::string(argv[i]) == "--load") {
+            if (i + 1 < argc) {
+                infile = argv[i+1];
+            } else {
+                print_usage();
+                return 1;
+            }  
+        } else if (std::string(argv[i]) == "--save") {
+            if (i + 1 < argc) {
+                outfile = argv[i+1];
+            } else {
+                print_usage();
+                return 1;
+            } 
+        } else if (std::string(argv[i]) == "--mode") {
+            if (i + 1 < argc) {
+                if (std::string(argv[i+1]) == "seq") {
+                    mode = Mode::SEQ;
+                } else if (std::string(argv[i+1]) == "omp") {
+                    mode = Mode::OMP;
+                } else if (std::string(argv[i+1]) == "ocl") {
+                    mode = Mode::OCL;
+                } else {
+                    print_usage();
+                    return 1;
+                }
+            } else {
+                print_usage();
+                return 1;
+            }
+        } else if (std::string(argv[i]) == "--threads") {
+            if (i + 1 < argc) {
+                num_threads = std::stoi(argv[i+1]);
+            } else {
+                print_usage();
+                return 1;
+            }
+        // TODO: This parameter isn't really needed anymore as we only use the GPU now
+        } else if (std::string(argv[i]) == "--device") {
+            if (i + 1 < argc) {
+                if (std::string(argv[i+1]) == "cpu") {
+                    use_gpu = false;
+                } else if (std::string(argv[i+1]) == "gpu") {
+                    use_gpu = true;
+                } else {
+                    print_usage();
+                    return 1;
+                }
+            } else {
+                print_usage();
+                return 1;
+            }
+        } else if (std::string(argv[i]) == "--generations") {
+            if (i + 1 < argc) {
+                num_generations = std::stoi(argv[i+1]);
+            } else {
+                print_usage();
+                return 1;
+            }  
+        } else if (std::string(argv[i]) == "--measure") {
+            measure = true;
+        }
+    }
+
+    // If OpenCL was demanded, run that function.
+    if (mode == Mode::OCL) {
+        main_opencl(infile, outfile, num_generations, measure);
+        return 0;
+    } else {
+        main_classic(infile, outfile, num_generations, measure, mode);
+    }
+
+    
 
     return 0;
 }