From 373134a25574cf8ff63bd5d87cf2472c0c1d07ad Mon Sep 17 00:00:00 2001 From: Brett Date: Sat, 3 Dec 2022 00:39:12 -0500 Subject: [PATCH] Document the OpenCL class --- Step 3/.idea/codeStyles/Project.xml | 73 +++++++++ Step 3/.idea/codeStyles/codeStyleConfig.xml | 2 +- Step 3/.idea/copyright/GPL3.xml | 5 + Step 3/.idea/scopes/GPL_3.xml | 3 + Step 3/.idea/scopes/GPL_3_LOCAL.xml | 3 + Step 3/include/engine/raytracing.h | 72 ++++----- Step 3/include/graphics/graphics.h | 28 ++-- Step 3/include/opencl/cl.h | 167 ++++++++++++++++++-- Step 3/src/engine/main.cpp | 154 +++++++++--------- Step 3/src/engine/raytracing.cpp | 96 +++++------ Step 3/src/engine/world.cpp | 72 ++++----- Step 3/src/opencl/cl.cpp | 137 ++++++++-------- 12 files changed, 515 insertions(+), 297 deletions(-) create mode 100644 Step 3/.idea/codeStyles/Project.xml create mode 100644 Step 3/.idea/copyright/GPL3.xml create mode 100644 Step 3/.idea/scopes/GPL_3.xml create mode 100644 Step 3/.idea/scopes/GPL_3_LOCAL.xml diff --git a/Step 3/.idea/codeStyles/Project.xml b/Step 3/.idea/codeStyles/Project.xml new file mode 100644 index 0000000..a7819ce --- /dev/null +++ b/Step 3/.idea/codeStyles/Project.xml @@ -0,0 +1,73 @@ + + + + \ No newline at end of file diff --git a/Step 3/.idea/codeStyles/codeStyleConfig.xml b/Step 3/.idea/codeStyles/codeStyleConfig.xml index a55e7a1..79ee123 100644 --- a/Step 3/.idea/codeStyles/codeStyleConfig.xml +++ b/Step 3/.idea/codeStyles/codeStyleConfig.xml @@ -1,5 +1,5 @@ - \ No newline at end of file diff --git a/Step 3/.idea/copyright/GPL3.xml b/Step 3/.idea/copyright/GPL3.xml new file mode 100644 index 0000000..b28474b --- /dev/null +++ b/Step 3/.idea/copyright/GPL3.xml @@ -0,0 +1,5 @@ + + + + \ No newline at end of file diff --git a/Step 3/.idea/scopes/GPL_3.xml b/Step 3/.idea/scopes/GPL_3.xml new file mode 100644 index 0000000..f921103 --- /dev/null +++ b/Step 3/.idea/scopes/GPL_3.xml @@ -0,0 +1,3 @@ + + + \ No newline at end of file diff --git a/Step 3/.idea/scopes/GPL_3_LOCAL.xml b/Step 3/.idea/scopes/GPL_3_LOCAL.xml new file mode 100644 index 0000000..0da52bc --- /dev/null +++ b/Step 3/.idea/scopes/GPL_3_LOCAL.xml @@ -0,0 +1,3 @@ + + + \ No newline at end of file diff --git a/Step 3/include/engine/raytracing.h b/Step 3/include/engine/raytracing.h index d39ffd8..e18290e 100644 --- a/Step 3/include/engine/raytracing.h +++ b/Step 3/include/engine/raytracing.h @@ -19,13 +19,13 @@ #include namespace Raytracing { - + class Camera { private: /* Image details */ const Image image; const PRECISION_TYPE aspectRatio; - + /* Camera details */ PRECISION_TYPE viewportHeight; PRECISION_TYPE viewportWidth; @@ -35,14 +35,14 @@ namespace Raytracing { const PRECISION_TYPE FAR_PLANE = 500; PRECISION_TYPE tanFovHalf; PRECISION_TYPE frustumLength; - + Vec4 position{0, 0, 0}; Vec4 horizontalAxis; Vec4 verticalAxis; Vec4 imageOrigin; - Vec4 up {0, 1, 0}; - + Vec4 up{0, 1, 0}; + public: Camera(PRECISION_TYPE fov, const Image& image): image(image), aspectRatio(double(image.getWidth()) / double(image.getHeight())) { @@ -58,20 +58,20 @@ namespace Raytracing { verticalAxis = (Vec4{0, viewportHeight, 0, 0}); // lower left of the camera's view port. used to project our vectors from image space to world space imageOrigin = (position - horizontalAxis / 2 - verticalAxis / 2 - Vec4(0, 0, focalLength, 0)); - + tlog << viewportHeight << "\n"; tlog << viewportWidth << "\n"; tlog << "\n"; tlog << horizontalAxis << "\n"; tlog << verticalAxis << "\n"; tlog << imageOrigin << "\n"; - + } - + Ray projectRay(PRECISION_TYPE x, PRECISION_TYPE y); - + void setPosition(const Vec4& pos) { this->position = pos; } - + void setRotation(PRECISION_TYPE yaw, PRECISION_TYPE pitch); // the follow utility functions are actually taking forever to get right @@ -79,7 +79,7 @@ namespace Raytracing { // got to install GLM to test which function works and which does. Maybe they are both bad. or Maybe it's my matrix impl // or maybe the whole rendering stack sucks [[nodiscard]] Mat4x4 project() const { - Mat4x4 project {emptyMatrix}; + Mat4x4 project{emptyMatrix}; // this should be all it takes to create a mostly correct projection matrix project.m00(float(1.0 / (aspectRatio * tanFovHalf))); @@ -106,12 +106,12 @@ namespace Raytracing { view.m01(float(w.y())); view.m02(float(w.z())); view.m03(float(w.w())); - + view.m10(float(u.x())); view.m11(float(u.y())); view.m12(float(u.z())); view.m13(float(u.w())); - + view.m20(float(v.x())); view.m21(float(v.y())); view.m22(float(v.z())); @@ -126,31 +126,31 @@ namespace Raytracing { return view; } Mat4x4 view(PRECISION_TYPE yaw, PRECISION_TYPE pitch); - - [[nodiscard]] inline Vec4 getPosition() const {return position;}; - + + [[nodiscard]] inline Vec4 getPosition() const { return position; }; + // the camera's position must be set with setPosition(Vec4); // uses an internal up vector, assumed to be {0, 1, 0} // will make the camera look at provided position with respects to the current camera position. void lookAt(const Vec4& lookAtPos); }; - + static Random rnd{-1.0, 1.0}; - + struct RaycasterImageBounds { - int width,height, x,y; + int width, height, x, y; }; - - class Raycaster { + + class RayCaster { private: int maxBounceDepth = 50; int raysPerPixel = 50; - + Camera& camera; Image& image; World& world; - std::vector> executors {}; + std::vector> executors{}; // is the raytracer still running? bool stillRunning = true; unsigned int finishedThreads = 0; @@ -159,17 +159,17 @@ namespace Raytracing { // and compared to the actual runtime of the raytracing it's very small! std::mutex queueSync; std::queue* unprocessedQuads = nullptr; - + Vec4 raycasti(const Ray& ray, int depth); Vec4 raycast(const Ray& ray); void runRaycastingAlgorithm(RaycasterImageBounds imageBounds, int loopX, int loopY); void setupQueue(const std::vector& bounds); public: - inline void updateRayInfo(int maxBounce, int perPixel){ + inline void updateRayInfo(int maxBounce, int perPixel) { raysPerPixel = perPixel; maxBounceDepth = maxBounce; } - inline void resetRayInfo(){ + inline void resetRayInfo() { raysPerPixel = 50; maxBounceDepth = 50; } @@ -189,33 +189,33 @@ namespace Raytracing { // the second creates better results but is 18% slower (better defined shadows) // likely due to not over generating unit vectors biased towards the corners } - Raycaster(Camera& c, Image& i, World& world, const Parser& p): camera(c), image(i), world(world) { + RayCaster(Camera& c, Image& i, World& world, const Parser& p): camera(c), image(i), world(world) { world.generateBVH(); } void runSTDThread(int threads = -1); void runOpenMP(int threads = -1); void runMPI(std::queue bounds); - [[nodiscard]] inline bool areThreadsStillRunning() const {return finishedThreads == executors.size();} - inline void join(){ - for (auto& p : executors) + [[nodiscard]] inline bool areThreadsStillRunning() const { return finishedThreads == executors.size(); } + inline void join() { + for (auto& p: executors) p->join(); } - void deleteThreads(){ - for (auto& p : executors){ + void deleteThreads() { + for (auto& p: executors) { // wait for all threads to exit before trying to delete them. try { if (p->joinable()) p->join(); - } catch (std::exception& e){} + } catch (std::exception& e) {} } // since executors contains the only reference to the unique_ptr it will be deleted automatically executors.clear(); } - ~Raycaster() { + ~RayCaster() { deleteThreads(); - delete(unprocessedQuads); + delete (unprocessedQuads); } }; - + } #endif //STEP_2_RAYTRACING_H diff --git a/Step 3/include/graphics/graphics.h b/Step 3/include/graphics/graphics.h index f1dd447..abcabb4 100644 --- a/Step 3/include/graphics/graphics.h +++ b/Step 3/include/graphics/graphics.h @@ -42,8 +42,8 @@ namespace Raytracing { void drawQuad(); void deleteQuad(); - - #ifdef USE_GLFW + +#ifdef USE_GLFW class XWindow { private: @@ -52,7 +52,7 @@ namespace Raytracing { bool isCloseRequested = false; long lastFrameTime{}; PRECISION_TYPE delta{}; - PRECISION_TYPE frameTimeMs{},frameTimeS{}; + PRECISION_TYPE frameTimeMs{}, frameTimeS{}; PRECISION_TYPE fps{}; public: XWindow(int width, int height); @@ -62,21 +62,21 @@ namespace Raytracing { [[nodiscard]] inline bool shouldWindowClose() const { return isCloseRequested; } - [[nodiscard]] inline PRECISION_TYPE getFrameTimeMillis() const {return frameTimeMs;} - [[nodiscard]] inline PRECISION_TYPE getFrameTimeSeconds() const {return frameTimeS;} - [[nodiscard]] inline PRECISION_TYPE getFPS() const {return fps;} + [[nodiscard]] inline PRECISION_TYPE getFrameTimeMillis() const { return frameTimeMs; } + [[nodiscard]] inline PRECISION_TYPE getFrameTimeSeconds() const { return frameTimeS; } + [[nodiscard]] inline PRECISION_TYPE getFPS() const { return fps; } void setMouseGrabbed(bool grabbed); bool isMouseGrabbed(); - [[nodiscard]] inline int displayWidth() const {return m_displayWidth;} - [[nodiscard]] inline int displayHeight() const {return m_displayHeight;} - [[nodiscard]] inline GLFWwindow* getWindow() const {return window;} + [[nodiscard]] inline int displayWidth() const { return m_displayWidth; } + [[nodiscard]] inline int displayHeight() const { return m_displayHeight; } + [[nodiscard]] inline GLFWwindow* getWindow() const { return window; } void closeWindow(); ~XWindow(); }; - - #else + +#else class XWindow { private: // X11 display itself @@ -125,7 +125,7 @@ namespace Raytracing { void closeWindow(); ~XWindow(); }; - #endif +#endif /** * The display renderer class handles all the major rendering events outside of window functions @@ -138,7 +138,7 @@ namespace Raytracing { World& m_world; Shader& m_imageShader; Shader& m_worldShader; - Raycaster& m_raycaster; + RayCaster& m_raycaster; Parser& m_parser; Camera& m_camera; public: @@ -147,7 +147,7 @@ namespace Raytracing { World& world, Shader& mImageShader, Shader& mWorldShader, - Raycaster& mRaycaster, + RayCaster& mRaycaster, Parser& mParser, Camera& mCamera) : m_window(mWindow), m_mainImage(mMainImage), m_imageShader(mImageShader), m_worldShader(mWorldShader), m_raycaster(mRaycaster), diff --git a/Step 3/include/opencl/cl.h b/Step 3/include/opencl/cl.h index 45986a6..43a6dc6 100644 --- a/Step 3/include/opencl/cl.h +++ b/Step 3/include/opencl/cl.h @@ -9,6 +9,7 @@ // OpenCL includes #include #include +#include #include #ifdef COMPILE_GUI @@ -34,26 +35,146 @@ namespace Raytracing { std::unordered_map buffers; std::unordered_map kernels; + /** + * Checks for some basic errors after calling OpenCL commands. Stuff like GPU out of memory... etc. + */ void checkBasicErrors() const; + public: + /** + * Loads the shader from a file on class creation + * @param file file to load OpenCL "shader" (code) file + */ explicit CLProgram(const std::string& file); + + /** + * Used by the OpenCL class to create a basic OpenCL program + * @param context provided by the OpenCL class. + * @param deviceID provided by the OpenCL class. + */ void loadCLShader(cl_context context, cl_device_id deviceID); + /** + * Kernels are the entry points in OpenCL. You can have multiple of them in a single program. + * @param kernelName both the name of the kernel function in the source and the reference to the kernel object used in other functions in this class. + */ void createKernel(const std::string& kernelName); + + + /** + * Buffers are the quintessential datastructures in OpenCL. They are basically regions of memory allocated to a program. + * @param bufferName the name of the buffer used to store internally + * @param flags read write flags for the buffer. One of CL_MEM_READ_ONLY | CL_MEM_WRITE_ONLY | CL_MEM_READ_WRITE + * @param bytes the number of bytes to be allocated. + */ void createBuffer(const std::string& bufferName, cl_mem_flags flags, size_t bytes); + + /** + * Creates a buffer on the GPU using the data pointed to by the supplied pointer. This copy happens as soon as this is called. + * @param bufferName the name of the buffer used to store internally + * @param flags One of CL_MEM_READ_ONLY | CL_MEM_WRITE_ONLY | CL_MEM_READ_WRITE + * @param bytes the number of bytes to be allocated. Must be less than equal to the number of bytes at ptr + * @param ptr the pointer to copy to the GPU. + */ void createBuffer(const std::string& bufferName, cl_mem_flags flags, size_t bytes, void* ptr); + + /** + * Creates a buffer on the GPU using the data pointed to by the supplied pointer. This copy happens as soon as this is called. + * @param bufferName the name of the buffer used to store internally + * @param flags One of CL_MEM_READ_ONLY | CL_MEM_WRITE_ONLY | CL_MEM_READ_WRITE + * @param bytes the number of bytes to be allocated. Must be less than equal to the number of bytes at ptr + * @param ptr the pointer to copy to the GPU. + */ void createImage(const std::string& imageName, int width, int height); + /** + * Allows you to bind certain buffers to a specific index in the kernel's argument list. + * @param kernel kernel to bind to + * @param buffer buffer to bind to argIndex + * @param argIndex the index of the argument for this buffer. + */ void setKernelArgument(const std::string& kernel, const std::string& buffer, int argIndex); + /** + * Runs the kernel code on the GPU. Is blocking. + * @param kernel kernel function name to call + * @param globalWorkSize the total number of times to execute the kernel function code. Corresponds to the result of get_global_id + * @param localWorkSize how many work items make up a work group to be executed by a kernel. 64 is recommended, must not exceed the printed value "device max workgroup size" + * @param globalWorkOffset not used. can be used to set an offset to the result of get_global_id + */ void runKernel(const std::string& kernel, size_t globalWorkSize, size_t localWorkSize, const size_t* globalWorkOffset = NULL); - void runKernel(const std::string& kernel, size_t* globalWorkSize, size_t* localWorkSize, cl_uint workDim = 1, const size_t* globalWorkOffset = NULL); + + /** + * Runs the kernel code on the GPU. Is blocking. + * This version allows you to specify the number of work dimensions. + * globalWorkSize and localWorkSize must be an array of workDim size which specify the work size for each kernel + * For example a work dim of 2 allows for two separate work sizes to be set per dimension. + * An image is two dimensional and so global work size would be {width of image, height of image} + * and local work size would be {8, 8} for a total of 64 (again recommended). Alternatively specify CL_D2_64_LOCAL_SIZE + * The resulting execution causes get_global_id(0) to run [0, width) times and get_global_id(1) to run [0, height) times + * @param kernel kernel function name to call + * @param globalWorkSize the total number of times to execute the kernel function code. Corresponds to the result of get_global_id(dim) + * @param localWorkSize how many work items make up a work group to be executed by a kernel. total 64 is recommended, total must not exceed the printed value "device max workgroup size" + * @param workDim number of dimensions to the work group being executed. + * @param globalWorkOffset not used. can be used to set an offset to the result of get_global_id + */ + void runKernel( + const std::string& kernel, size_t* globalWorkSize, size_t* localWorkSize, cl_uint workDim = 1, + const size_t* globalWorkOffset = NULL + ); + + /** + * Enqueues a write command to the buffer specified by the buffer name, + * @param buffer the buffer to write to + * @param bytes the number of bytes to be copied + * @param ptr the pointer to copy from. Must have at least bytes available + * @param blocking should this function wait for the bytes to be uploaded to the GPU? + * @param offset offset in the buffer object to write to + */ void writeBuffer(const std::string& buffer, size_t bytes, void* ptr, cl_bool blocking = CL_TRUE, size_t offset = 0); - void readBuffer(const std::string& buffer, size_t bytes, void* ptr, cl_bool blocking = CL_TRUE, size_t offset = 0); - void readImage(const std::string& imageName, size_t width, size_t height, void* ptr, cl_bool blocking = CL_TRUE, size_t x = 0, size_t y = 0); + /** + * Enqueues a read command from the buffered specified by the buffer name. + * Defaults to blocking but can be set to be non-blocking. + * @param buffer buffer to read from + * @param bytes the number of bytes to read. Make sure ptr has at least those bytes available. + * @param ptr the ptr to write the read bytes to. + * @param blocking should we wait for the read or do it async? + * @param offset offset in the buffer to read from. + */ + void readBuffer(const std::string& buffer, size_t bytes, void* ptr, cl_bool blocking = CL_TRUE, size_t offset = 0); + + /** + * Reads an image from the GPU into the memory region specified. Allocated memory region must be large enough to hold the image. + * @param imageName name of the buffer to read from + * @param width width of the image. Must be less than or equal to the width of the image on the GPU + * @param height height of the image. Also must be less than or equal to the height of the image on the GPU + * @param ptr pointer to the memory region to read into + * @param blocking should we wait for the read operation to complete? Defaults to yes. + * @param x x coordinate to start the read from. Defaults to zero since it's unlikely to be needed here. Included for possible future use. + * @param y y coordinate to start the read from. + */ + void readImage( + const std::string& imageName, size_t width, size_t height, void* ptr, cl_bool blocking = CL_TRUE, size_t x = 0, size_t y = 0 + ); + + /** + * Reads an image buffer into a RayCasting Image class. + * Image supplied must have a with and height that matches the width and height of the image buffer specified by the name. + * @param imageName name of the buffer you wish to read from + * @param image reference to an image that you want the GPU data read into. + */ + void readImage(const std::string& imageName, Image& image); + + /** + * Issues all previously queued OpenCL commands in a command-queue to the device associated with the command-queue. + */ void flushCommands(); + + /** + * Blocks until all previously queued OpenCL commands in a command-queue are issued to the associated device and have completed. + */ void finishCommands(); ~CLProgram(); @@ -76,17 +197,43 @@ namespace Raytracing { cl_context m_context; + /** + * prints out the important info about the specified device. + * @param device device to data dump + */ void printDeviceInfo(cl_device_id device); - public: - explicit OpenCL(int platformID = 0, int deviceID = 0); - static void init(); - static void createCLProgram(CLProgram& program); - static cl_uint activeDeviceComputeUnits(); - static cl_uint activeDeviceFrequency(); + public: + /** + * creates an opencl instance on the specified platform and device. Defaults to the first GPU device + */ + explicit OpenCL(int platformID = 0, int deviceID = 0); + + /** + * Creates the global OpenCL instance for the engine + */ + static void init(); + + /** + * Creates an OpenCL program object using the global OpenCL connection + * @param program + */ + static void createCLProgram(CLProgram& program); + + /** + * @return the number of compute units the device has + */ + static cl_uint activeDeviceComputeUnits(); + + /** + * the frequency in megahertz of the device + * @return + */ + static cl_uint activeDeviceFrequency(); + ~OpenCL(); }; - + } #endif //STEP_3_CL_H diff --git a/Step 3/src/engine/main.cpp b/Step 3/src/engine/main.cpp index 8ddd394..31ab184 100644 --- a/Step 3/src/engine/main.cpp +++ b/Step 3/src/engine/main.cpp @@ -9,24 +9,24 @@ #include #ifdef USE_MPI - - #include + +#include #endif //#include //#include #ifdef COMPILE_GUI - - #include - #include - #include + +#include +#include +#include #endif #ifdef COMPILE_OPENCL - - #include + +#include #endif @@ -81,10 +81,10 @@ int main(int argc, char** args) { "\tSets the directory where the resources are stored.\n" "\tThis can be relative.Must have trailing '/' \n", "../resources/"); parser.addOption("--mpi", "Use OpenMPI\n" - "\tTells the raycaster to use OpenMPI to run the raycaster algorithm\n"); + "\tTells the raycaster to use OpenMPI to run the raycaster algorithm\n"); parser.addOption("--openmp", "Use OpenMP\n" - "\tTells the raycaster to use OpenMP to run the raycaster algorithm\n"); - + "\tTells the raycaster to use OpenMP to run the raycaster algorithm\n"); + // disabled because don't currently have a way to parse vectors. TODO //parser.addOption("--position", "Camera Position\n\tSets the position used to render the scene with the camera.\n", "{0, 0, 0}"); @@ -111,22 +111,22 @@ int main(int argc, char** args) { tlog << "Parsing complete! Starting raytracer with options:" << std::endl; // not perfect (contains duplicates) but good enough. parser.printAllInInfo(); - - #ifdef USE_MPI - Raytracing::MPI::init(argc, args); - #endif - - #ifdef COMPILE_GUI - XWindow* window; - if (parser.hasOption("--gui") || parser.hasOption("-g")) - window = new XWindow(1440, 720); - Shader worldShader("../resources/shaders/world.vs", "../resources/shaders/world.fs"); - #endif - - #ifdef COMPILE_OPENCL + +#ifdef USE_MPI + Raytracing::MPI::init(argc, args); +#endif + +#ifdef COMPILE_GUI + XWindow* window; + if (parser.hasOption("--gui") || parser.hasOption("-g")) + window = new XWindow(1440, 720); + Shader worldShader("../resources/shaders/world.vs", "../resources/shaders/world.fs"); +#endif + +#ifdef COMPILE_OPENCL OpenCL::init(); - - #endif + +#endif Raytracing::Image image(1440, 720); //Raytracing::Image image(std::stoi(parser.getOptionValue("-w")), std::stoi(parser.getOptionValue("-h"))); @@ -135,13 +135,13 @@ int main(int argc, char** args) { //camera.setPosition({0, 0, 1}); camera.setPosition({6, 5, 6}); camera.lookAt({0, 0, 0}); - - - #ifdef COMPILE_GUI + + +#ifdef COMPILE_GUI WorldConfig worldConfig{worldShader}; - #else +#else WorldConfig worldConfig; - #endif +#endif worldConfig.useBVH = true; Raytracing::World world{worldConfig}; @@ -175,10 +175,10 @@ int main(int argc, char** args) { //world.add(new Raytracing::ModelObject({0, 0, 0}, debugCube, world.getMaterial("cat"))); if (parser.hasOption("--gui") || parser.hasOption("-g")) { - #ifdef COMPILE_GUI - Raytracing::Raycaster raycaster{camera, image, world, parser}; +#ifdef COMPILE_GUI + Raytracing::RayCaster rayCaster{camera, image, world, parser}; Texture mainImage(&image); - + CLProgram program(parser.getOptionValue("--resources") + "opencl/image.cl"); OpenCL::createCLProgram(program); program.createKernel("drawImage"); @@ -186,52 +186,42 @@ int main(int argc, char** args) { program.setKernelArgument("drawImage", "mainImage", 0); program.setKernelArgument("drawImage", "mainImage", 1); - size_t works[2] {(size_t) image.getWidth(), (size_t) image.getHeight()}; - size_t localWorks[2] {8, 8}; + size_t works[2]{(size_t) image.getWidth(), (size_t) image.getHeight()}; + size_t localWorks[2]{8, 8}; - unsigned char bytes[image.getWidth() * image.getHeight() * 4]; - for (int i = 0; i < image.getWidth() * image.getHeight() * 4; i++) - bytes[i] = 0; Shader shader("../resources/shaders/basic.vs", "../resources/shaders/basic.fs"); - Raytracing::DisplayRenderer renderer{*window, mainImage, world, shader, worldShader, raycaster, parser, camera}; + Raytracing::DisplayRenderer renderer{*window, mainImage, world, shader, worldShader, rayCaster, parser, camera}; while (!window->shouldWindowClose()) { window->beginUpdate(); renderer.draw(); program.runKernel("drawImage", works, localWorks, 2); - program.readImage("mainImage", image.getWidth(), image.getHeight(), bytes); - const PRECISION_TYPE colorFactor = 1.0 / 255.0; - for (int i = 0; i < image.getWidth(); i++){ - for (int j = 0; j < image.getHeight(); j++){ - const auto pixelData = bytes + (j * 4 * image.getWidth() + i * 4); - //tlog << (int)pixelData[0] << " " << (int)pixelData[1] << " " << (int)pixelData[2] << "\n"; - image.setPixelColor(i, j, {pixelData[0] * colorFactor, pixelData[1] * colorFactor, pixelData[2] * colorFactor}); - } - } + program.readImage("mainImage", image); + glPolygonMode(GL_FRONT_AND_BACK, GL_LINE); world.drawBVH(worldShader); glPolygonMode(GL_FRONT_AND_BACK, GL_FILL); window->endUpdate(); } RTSignal->haltExecution = true; - raycaster.join(); - #else + rayCaster.join(); +#else flog << "Program not compiled with GUI support! Unable to open GUI\n"; - #endif +#endif } else { - Raytracing::Raycaster rayCaster{camera, image, world, parser}; + Raytracing::RayCaster rayCaster{camera, image, world, parser}; ilog << "Running RayCaster (NO_GUI)!\n"; // we don't actually have to check for --single since it's implied to be default true. int threads = std::stoi(parser.getOptionValue("--threads")); if (parser.hasOption("--mpi")) { // We need to make sure that if the user requests that MPI be run while not having MPI compiled, they get a helpful error warning. - #ifdef USE_MPI +#ifdef USE_MPI rayCaster.runMPI(Raytracing::MPI::getCurrentImageRegionAssociation(rayCaster)); - #else +#else flog << "Unable to run with MPI, CMake not set to compile MPI!\n"; return 33; - #endif - } else if(parser.hasOption("--openmp")){ +#endif + } else if (parser.hasOption("--openmp")) { rayCaster.runOpenMP(threads); } else { rayCaster.runSTDThread(threads); @@ -258,39 +248,39 @@ int main(int argc, char** args) { image.fromArray(buffer, doubleSize, i); } #endif - // write the image to the file - Raytracing::ImageOutput imageOutput(image); - - auto t = std::time(nullptr); - auto now = std::localtime(&t); - std::stringstream timeString; - timeString << (1900 + now->tm_year); - timeString << "-"; - timeString << (1 + now->tm_mon); - timeString << "-"; - timeString << now->tm_mday; - timeString << " "; - timeString << now->tm_hour; - timeString << ":"; - timeString << now->tm_min; - timeString << ":"; - timeString << now->tm_sec; - ilog << "Writing Image!\n"; - imageOutput.write(parser.getOptionValue("--output") + timeString.str(), parser.getOptionValue("--format")); + // write the image to the file + Raytracing::ImageOutput imageOutput(image); + + auto t = std::time(nullptr); + auto now = std::localtime(&t); + std::stringstream timeString; + timeString << (1900 + now->tm_year); + timeString << "-"; + timeString << (1 + now->tm_mon); + timeString << "-"; + timeString << now->tm_mday; + timeString << " "; + timeString << now->tm_hour; + timeString << ":"; + timeString << now->tm_min; + timeString << ":"; + timeString << now->tm_sec; + ilog << "Writing Image!\n"; + imageOutput.write(parser.getOptionValue("--output") + timeString.str(), parser.getOptionValue("--format")); #ifdef USE_MPI } // wait for all processes to finish sending and receiving before we exit all of them. MPI_Barrier(MPI_COMM_WORLD); #endif - + delete (RTSignal); - #ifdef COMPILE_GUI +#ifdef COMPILE_GUI deleteQuad(); - #endif - #ifdef USE_MPI +#endif +#ifdef USE_MPI MPI_Finalize(); - #endif - +#endif + tlog << "Goodbye!\n"; return 0; } diff --git a/Step 3/src/engine/raytracing.cpp b/Step 3/src/engine/raytracing.cpp index 7cb8728..cca1eb6 100644 --- a/Step 3/src/engine/raytracing.cpp +++ b/Step 3/src/engine/raytracing.cpp @@ -44,21 +44,21 @@ namespace Raytracing { void Camera::setRotation(const PRECISION_TYPE yaw, const PRECISION_TYPE pitch) { // TODO: } - Mat4x4 Camera::view(PRECISION_TYPE yaw, PRECISION_TYPE pitch) { + Mat4x4 Camera::view(PRECISION_TYPE yaw, PRECISION_TYPE pitch) { Mat4x4 view; - + pitch = degreeeToRadian(pitch); yaw = degreeeToRadian(yaw); - + PRECISION_TYPE cosPitch = std::cos(pitch); PRECISION_TYPE cosYaw = std::cos(yaw); PRECISION_TYPE sinPitch = std::sin(pitch); PRECISION_TYPE sinYaw = std::sin(yaw); - + auto x = Vec4{cosYaw, 0, -sinYaw}; // forward auto y = Vec4{sinYaw * sinPitch, cosPitch, cosYaw * sinPitch}; // right auto z = Vec4{sinYaw * cosPitch, -sinPitch, cosPitch * cosYaw}; // up - + // we can actually take those x, y, z vectors and use them to compute the raytracer camera settings viewportHeight = 2 * tanFovHalf; viewportWidth = aspectRatio * viewportHeight; @@ -71,23 +71,23 @@ namespace Raytracing { view.m01(float(x.y())); view.m02(float(x.z())); view.m03(float(x.w())); - + view.m10(float(y.x())); view.m11(float(y.y())); view.m12(float(y.z())); view.m13(float(y.w())); - + view.m20(float(z.x())); view.m21(float(z.y())); view.m22(float(z.z())); view.m23(float(z.w())); - + // view matrix are inverted, dot product to simulate translate matrix multiplication view.m03(-float(Vec4::dot(x, position))); view.m13(-float(Vec4::dot(y, position))); view.m23(-float(Vec4::dot(z, position))); view.m33(1); - + return view; } @@ -97,14 +97,14 @@ namespace Raytracing { Vec4 color; }; - Vec4 Raycaster::raycasti(const Ray& ray, int depth){ + Vec4 RayCaster::raycasti(const Ray& ray, int depth) { return {}; } - Vec4 Raycaster::raycast(const Ray& ray) { + Vec4 RayCaster::raycast(const Ray& ray) { Ray localRay = ray; - Vec4 color {1.0, 1.0, 1.0}; - for (int CURRENT_BOUNCE = 0; CURRENT_BOUNCE < maxBounceDepth; CURRENT_BOUNCE++){ + Vec4 color{1.0, 1.0, 1.0}; + for (int CURRENT_BOUNCE = 0; CURRENT_BOUNCE < maxBounceDepth; CURRENT_BOUNCE++) { if (RTSignal->haltExecution || RTSignal->haltRaytracing) return color; while (RTSignal->pauseRaytracing) // sleep for 1/60th of a second, or about 1 frame. @@ -138,8 +138,8 @@ namespace Raytracing { return color; } - - void Raycaster::runRaycastingAlgorithm(RaycasterImageBounds imageBounds, int loopX, int loopY) { + + void RayCaster::runRaycastingAlgorithm(RaycasterImageBounds imageBounds, int loopX, int loopY) { try { int x = imageBounds.x + loopX; int y = imageBounds.y + loopY; @@ -160,9 +160,9 @@ namespace Raytracing { flog << error.what() << "\n"; } } - - - void Raycaster::runSTDThread(int threads){ + + + void RayCaster::runSTDThread(int threads) { setupQueue(partitionScreen(threads)); ilog << "Running std::thread\n"; for (int i = 0; i < threads; i++) { @@ -170,7 +170,7 @@ namespace Raytracing { // run through all the quadrants std::stringstream str; str << "Threading of #"; - str << (i+1); + str << (i + 1); profiler::start("Raytracer Results", str.str()); while (unprocessedQuads != nullptr) { RaycasterImageBounds imageBoundingData{}; @@ -195,12 +195,12 @@ namespace Raytracing { })); } } - - void Raycaster::runOpenMP(int threads){ + + void RayCaster::runOpenMP(int threads) { setupQueue(partitionScreen(threads)); - #ifdef USE_OPENMP +#ifdef USE_OPENMP ilog << "Running OpenMP\n"; - #pragma omp parallel num_threads(threads+1) default(none) shared(threads) + #pragma omp parallel num_threads(threads+1) default(none) shared(threads) { int threadID = omp_get_thread_num(); // an attempt at making the omp command non-blocking. @@ -214,7 +214,7 @@ namespace Raytracing { bool running = true; while (running) { RaycasterImageBounds imageBoundingData{}; - #pragma omp critical +#pragma omp critical { if (unprocessedQuads->empty()) running = false; @@ -231,18 +231,18 @@ namespace Raytracing { } } } - #pragma omp critical +#pragma omp critical finishedThreads++; profiler::end("Raytracer Results", str.str()); } } tlog << "OpenMP finished!\n"; - #else - flog << "Not compiled with OpenMP! Unable to run raytracing.\n"; - system_threads; - #endif +#else + flog << "Not compiled with OpenMP! Unable to run raytracing.\n"; + system_threads; +#endif } - void Raycaster::runMPI(std::queue bounds){ + void RayCaster::runMPI(std::queue bounds) { ilog << "Running MPI\n"; dlog << "We have " << bounds.size() << " bounds currently pending!\n"; while (!bounds.empty()) { @@ -254,17 +254,17 @@ namespace Raytracing { } bounds.pop(); } - #ifdef USE_MPI +#ifdef USE_MPI dlog << "Finished running MPI on " << currentProcessID << "\n"; - #endif +#endif } - - std::vector Raycaster::partitionScreen(int threads) { + + std::vector RayCaster::partitionScreen(int threads) { // if we are running single threaded, disable everything special // the reason we run single threaded in a seperate thread is because the GUI requires its own set of updating commands // which cannot be blocked by the raytracer, otherwise it would become unresponsive. int divs = 1; - if (threads < 0 || threads == 1){ + if (threads < 0 || threads == 1) { threads = 1; divs = 1; } else { @@ -275,30 +275,30 @@ namespace Raytracing { // to do it without a queue like this leads to most threads finishing and a single thread being the critical path which isn't optimally efficient. divs = int(std::log(threads) / std::log(2)) * 4; } - + ilog << "Generating multithreaded raytracer with " << threads << " threads and " << divs << " divisions! \n"; - + std::vector bounds; - + // we need to subdivide the image for the threads, since this is really quick it's fine to due sequentially for (int dx = 0; dx < divs; dx++) { for (int dy = 0; dy < divs; dy++) { bounds.push_back({ - image.getWidth() / divs, - image.getHeight() / divs, - (image.getWidth() / divs) * dx, - (image.getHeight() / divs) * dy - }); + image.getWidth() / divs, + image.getHeight() / divs, + (image.getWidth() / divs) * dx, + (image.getHeight() / divs) * dy + }); } } return bounds; } - - void Raycaster::setupQueue(const std::vector& bounds) { - delete(unprocessedQuads); + + void RayCaster::setupQueue(const std::vector& bounds) { + delete (unprocessedQuads); unprocessedQuads = new std::queue(); - for (auto& b : bounds) + for (auto& b: bounds) unprocessedQuads->push(b); } - + } diff --git a/Step 3/src/engine/world.cpp b/Step 3/src/engine/world.cpp index 23f738e..bc86edd 100644 --- a/Step 3/src/engine/world.cpp +++ b/Step 3/src/engine/world.cpp @@ -7,7 +7,7 @@ #include "engine/image/stb_image.h" namespace Raytracing { - + World::~World() { for (auto* p: objects) delete (p); @@ -15,7 +15,7 @@ namespace Raytracing { delete (p.second); //delete(bvhObjects); } - + HitData SphereObject::checkIfHit(const Ray& ray, PRECISION_TYPE min, PRECISION_TYPE max) const { PRECISION_TYPE radiusSquared = radius * radius; // move the ray to be with respects to the sphere @@ -28,11 +28,11 @@ namespace Raytracing { // = 0: the ray has one root, we hit the edge of the sphere // < 0: ray isn't inside the sphere. PRECISION_TYPE discriminant = b * b - (a * c); - + // < 0: ray isn't inside the sphere. Don't need to bother calculating the roots. if (discriminant < 0) return {false, Vec4(), Vec4(), 0}; - + // now we have to find the root which exists inside our range [min,max] auto root = (-b - std::sqrt(discriminant)) / a; // if the first root isn't in our range @@ -48,7 +48,7 @@ namespace Raytracing { auto RayAtRoot = ray.along(root); // The normal of a sphere is just the point of the hit minus the center position auto normal = (RayAtRoot - position) / radius; - + /*if (Raytracing::vec4::dot(ray.getDirection(), normal) > 0.0) { tlog << "ray inside sphere\n"; } else @@ -60,16 +60,16 @@ namespace Raytracing { // have to invert the v since we have to invert the v again later due to triangles return {true, RayAtRoot, normal, root, u, 1.0 - v}; } - + std::pair World::checkIfHit(const Ray& ray, PRECISION_TYPE min, PRECISION_TYPE max) const { // actually speeds up rendering by about 110,000ms (total across 16 threads) - if (bvhObjects != nullptr && m_config.useBVH){ + if (bvhObjects != nullptr && m_config.useBVH) { auto hResult = HitData{false, Vec4(), Vec4(), max}; Object* objPtr = nullptr; auto intersected = bvhObjects->rayAnyHitIntersect(ray, min, max); - for (const auto& ptr : intersected) { + for (const auto& ptr: intersected) { auto cResult = ptr.ptr->checkIfHit(ray, min, hResult.length); if (cResult.hit) { hResult = cResult; @@ -106,21 +106,21 @@ namespace Raytracing { return {hResult, objPtr}; } } - + void World::generateBVH() { bvhObjects = std::make_unique(objects); } - + ScatterResults DiffuseMaterial::scatter(const Ray& ray, const HitData& hitData) const { - Vec4 newRay = hitData.normal + Raytracing::Raycaster::randomUnitVector().normalize(); - + Vec4 newRay = hitData.normal + Raytracing::RayCaster::randomUnitVector().normalize(); + // rays that are close to zero are liable to floating point precision errors if (newRay.x() < EPSILON && newRay.y() < EPSILON && newRay.z() < EPSILON && newRay.w() < EPSILON) newRay = hitData.normal; - + return {true, Ray{hitData.hitPoint, newRay}, getBaseColor()}; } - + ScatterResults MetalMaterial::scatter(const Ray& ray, const HitData& hitData) const { // create a ray reflection Vec4 newRay = reflect(ray.getDirection().normalize(), hitData.normal); @@ -128,22 +128,22 @@ namespace Raytracing { bool shouldReflect = Vec4::dot(newRay, hitData.normal) > 0; return {shouldReflect, Ray{hitData.hitPoint, newRay}, getBaseColor()}; } - + ScatterResults BrushedMetalMaterial::scatter(const Ray& ray, const HitData& hitData) const { // create a ray reflection Vec4 newRay = reflect(ray.getDirection().normalize(), hitData.normal); // make sure our reflected ray is outside the sphere and doesn't point inwards bool shouldReflect = Vec4::dot(newRay, hitData.normal) > 0; - return {shouldReflect, Ray{hitData.hitPoint, newRay + Raycaster::randomUnitVector() * fuzzyness}, getBaseColor()}; + return {shouldReflect, Ray{hitData.hitPoint, newRay + RayCaster::randomUnitVector() * fuzzyness}, getBaseColor()}; } ScatterResults TexturedMaterial::scatter(const Ray& ray, const HitData& hitData) const { - Vec4 newRay = hitData.normal + Raytracing::Raycaster::randomUnitVector().normalize(); - + Vec4 newRay = hitData.normal + Raytracing::RayCaster::randomUnitVector().normalize(); + // rays that are close to zero are liable to floating point precision errors if (newRay.x() < EPSILON && newRay.y() < EPSILON && newRay.z() < EPSILON && newRay.w() < EPSILON) newRay = hitData.normal; - + return {true, Ray{hitData.hitPoint, newRay}, getColor(hitData.u, hitData.v)}; } Vec4 TexturedMaterial::getColor(PRECISION_TYPE u, PRECISION_TYPE v) const { @@ -155,11 +155,11 @@ namespace Raytracing { // fix that pesky issue of the v being rotated 90* compared to the image v = 1.0 - clamp(v, 0.0, 1.0); - auto imageX = (int)(width * u); - auto imageY = (int)(height * v); - - if (imageX >= width) imageX = width-1; - if (imageY >= height) imageY = height-1; + auto imageX = (int) (width * u); + auto imageY = (int) (height * v); + + if (imageX >= width) imageX = width - 1; + if (imageY >= height) imageY = height - 1; // since stbi loads in RGB8 [0, 255] but the engine works on [0, 1] we need to scale the data down. // this is best done with a single division followed by multiple multiplication. @@ -169,7 +169,7 @@ namespace Raytracing { return {pixelData[0] * colorFactor, pixelData[1] * colorFactor, pixelData[2] * colorFactor}; } - TexturedMaterial::TexturedMaterial(const std::string& file) : Material({}) { + TexturedMaterial::TexturedMaterial(const std::string& file): Material({}) { // we are going to have to ignore transparency for now. TODO:? data = stbi_load(file.c_str(), &width, &height, &channels, 0); if (!data) @@ -188,7 +188,7 @@ namespace Raytracing { return baseColor; } - PRECISION_TYPE sign(PRECISION_TYPE i){ + PRECISION_TYPE sign(PRECISION_TYPE i) { return i >= 0 ? 1 : -1; } @@ -200,25 +200,25 @@ namespace Raytracing { PRECISION_TYPE a, f, u, v; edge1 = (theTriangle.vertex2 + position) - (theTriangle.vertex1 + position); edge2 = (theTriangle.vertex3 + position) - (theTriangle.vertex1 + position); - + h = Vec4::cross(ray.getDirection(), edge2); a = Vec4::dot(edge1, h); - + if (a > -EPSILON && a < EPSILON) return {false, Vec4(), Vec4(), 0}; //parallel to triangle - + f = 1.0 / a; s = ray.getStartingPoint() - (theTriangle.vertex1 + position); u = f * Vec4::dot(s, h); - + if (u < 0.0 || u > 1.0) return {false, Vec4(), Vec4(), 0}; - + q = Vec4::cross(s, edge1); v = f * Vec4::dot(ray.getDirection(), q); if (v < 0.0 || u + v > 1.0) return {false, Vec4(), Vec4(), 0}; - + // At this stage we can compute t to find out where the intersection point is on the line. PRECISION_TYPE t = f * Vec4::dot(edge2, q); // keep t in reasonable bounds, ensuring we respect depth @@ -226,7 +226,7 @@ namespace Raytracing { // ray intersects Vec4 rayIntersectionPoint = ray.along(t); Vec4 normal; - + // calculate triangle berry centric coords // first we need the vector that runs between the vertex and the intersection point for all three vertices // we must subtract the position of the triangle from the intersection point because this calc must happen in triangle space not world space. @@ -234,7 +234,7 @@ namespace Raytracing { auto vertex1ToIntersect = theTriangle.vertex1 - (rayIntersectionPoint - position); auto vertex2ToIntersect = theTriangle.vertex2 - (rayIntersectionPoint - position); auto vertex3ToIntersect = theTriangle.vertex3 - (rayIntersectionPoint - position); - + // the magnitude of the cross product of two vectors is double the area formed by the triangle of their intersection. auto fullAreaVec = Vec4::cross(theTriangle.vertex1 - theTriangle.vertex2, theTriangle.vertex1 - theTriangle.vertex3); auto areaVert1Vec = Vec4::cross(vertex2ToIntersect, vertex3ToIntersect); @@ -271,7 +271,7 @@ namespace Raytracing { return {false, Vec4(), Vec4(), 0}; } - + HitData ModelObject::checkIfHit(const Ray& ray, PRECISION_TYPE min, PRECISION_TYPE max) const { auto hResult = HitData{false, Vec4(), Vec4(), max}; @@ -280,7 +280,7 @@ namespace Raytracing { // must check through all the triangles in the object // respecting depth along the way // but reducing the max it can reach my the last longest vector length. - for (const auto& t : triangles) { + for (const auto& t: triangles) { auto cResult = checkIfTriangleGotHit(*t, position, ray, min, hResult.length); if (cResult.hit) hResult = cResult; diff --git a/Step 3/src/opencl/cl.cpp b/Step 3/src/opencl/cl.cpp index 85353c7..8938536 100644 --- a/Step 3/src/opencl/cl.cpp +++ b/Step 3/src/opencl/cl.cpp @@ -7,14 +7,17 @@ #include #ifdef COMPILE_GUI - #define GLFW_EXPOSE_NATIVE_X11 - #define GLFW_EXPOSE_NATIVE_GLX - #include - #include +#define GLFW_EXPOSE_NATIVE_X11 +#define GLFW_EXPOSE_NATIVE_GLX + +#include +#include + #endif #include #include +#include "engine/math/vectors.h" namespace Raytracing { @@ -23,21 +26,23 @@ namespace Raytracing { void OpenCL::init() { openCl = std::make_shared(0, 0); } - OpenCL::OpenCL(int platformID, int deviceID): m_activePlatform(platformID) { + + OpenCL::OpenCL(int platformID, int deviceID): + m_activePlatform(platformID) { m_CL_ERR = CL_SUCCESS; m_numPlatforms = 0; - m_CL_ERR = clGetPlatformIDs(0, NULL, &m_numPlatforms ); - + m_CL_ERR = clGetPlatformIDs(0, NULL, &m_numPlatforms); + if (m_CL_ERR == CL_SUCCESS) dlog << "We found " << m_numPlatforms << " OpenCL Platforms.\n"; else elog << "OpenCL Error! " << m_CL_ERR << "\n"; - + m_platformIDs = new cl_platform_id[m_numPlatforms]; m_CL_ERR = clGetPlatformIDs(m_numPlatforms, m_platformIDs, &m_numOfPlatformIDs); m_CL_ERR = clGetDeviceIDs(m_platformIDs[platformID], CL_DEVICE_TYPE_GPU, 1, &m_deviceID, &m_numOfDevices); - + printDeviceInfo(m_deviceID); cl_context_properties proper[3] = { @@ -46,7 +51,7 @@ namespace Raytracing { }; m_context = clCreateContext(proper, 1, &m_deviceID, NULL, NULL, &m_CL_ERR); - + if (m_CL_ERR != CL_SUCCESS) elog << "OpenCL Error Creating Context! " << m_CL_ERR << "\n"; @@ -78,27 +83,31 @@ namespace Raytracing { clGetDeviceInfo(device, CL_DEVICE_VERSION, sizeof(dv), &dv, NULL); dlog << "Opening OpenCL Device!\n"; - dlog << "Device CL String " << dv.opencl_space << dv.major << dv.dot << dv.minor << dv.space << dv.vendor<< "\n"; + dlog << "Device CL String " << dv.opencl_space << dv.major << dv.dot << dv.minor << dv.space << dv.vendor << "\n"; dlog << "Device Address Bits: " << deviceAddressBits << "\n"; dlog << "Device is currently " << (deviceAvailable ? "available" : "unavailable") << "\n"; - dlog << "Device has " << cacheSize/1024 << "kb of cache with a cache line width of " << cacheLineSize << " bytes\n"; + dlog << "Device has " << cacheSize / 1024 << "kb of cache with a cache line width of " << cacheLineSize << " bytes\n"; dlog << "Device " << (textureSupport ? "has" : "doesn't have") << " texture support\n"; dlog << "Device has " << maxWorkgroups << " max workgroup size.\n"; dlog << "Device has " << m_computeUnits << " compute units running at a max clock frequency " << m_deviceClockFreq << "\n"; if (!textureSupport) elog << "Warning! The OpenCL device lacks texture support!\n"; } + void OpenCL::createCLProgram(CLProgram& program) { program.loadCLShader(openCl->m_context, openCl->m_deviceID); } + OpenCL::~OpenCL() { delete[](m_platformIDs); clReleaseDevice(m_deviceID); clReleaseContext(m_context); } + cl_uint OpenCL::activeDeviceComputeUnits() { return openCl->m_computeUnits; } + cl_uint OpenCL::activeDeviceFrequency() { return openCl->m_deviceClockFreq; } @@ -107,6 +116,7 @@ namespace Raytracing { CLProgram::CLProgram(const std::string& file) { m_source = ShaderLoader::loadShaderFile(file); } + void CLProgram::loadCLShader(cl_context context, cl_device_id deviceID) { this->m_context = context; this->m_deviceID = deviceID; @@ -118,49 +128,41 @@ namespace Raytracing { if (m_CL_ERR != CL_SUCCESS) elog << "Unable to create CL program!\n"; - + m_CL_ERR = clBuildProgram(m_program, 1, &deviceID, NULL, NULL, NULL); if (m_CL_ERR != CL_SUCCESS) { elog << "Unable to build CL program!\n"; size_t len; - + clGetProgramBuildInfo(m_program, m_deviceID, CL_PROGRAM_BUILD_LOG, 0, NULL, &len); char buffer[len]; - + clGetProgramBuildInfo(m_program, m_deviceID, CL_PROGRAM_BUILD_LOG, len, buffer, NULL); elog << buffer << "\n"; } } - /** - * Buffers are the quintessential datastructures in OpenCL. They are basically regions of memory allocated to a program. - * @param bufferName the name of the buffer used to store internally - * @param flags read write flags for the buffer. One of CL_MEM_READ_ONLY | CL_MEM_WRITE_ONLY | CL_MEM_READ_WRITE - * @param bytes the number of bytes to be allocated. - */ + + void CLProgram::createBuffer(const std::string& bufferName, cl_mem_flags flags, size_t bytes) { // create the buffer on the GPU cl_mem buff = clCreateBuffer(m_context, flags, bytes, NULL, &m_CL_ERR); // then store it in our buffer map for easy access. buffers.insert({bufferName, buff}); } - /** - * Creates a buffer on the GPU using the data pointed to by the supplied pointer. This copy happens as soon as this is called. - * @param bufferName the name of the buffer used to store internally - * @param flags One of CL_MEM_READ_ONLY | CL_MEM_WRITE_ONLY | CL_MEM_READ_WRITE - * @param bytes the number of bytes to be allocated. Must be less than equal to the number of bytes at ptr - * @param ptr the pointer to copy to the GPU. - */ + + void CLProgram::createBuffer(const std::string& bufferName, cl_mem_flags flags, size_t bytes, void* ptr) { // create the buffer on the GPU cl_mem buff = clCreateBuffer(m_context, CL_MEM_COPY_HOST_PTR | flags, bytes, ptr, &m_CL_ERR); // then store it in our buffer map for easy access. buffers.insert({bufferName, buff}); } + void CLProgram::createImage(const std::string& imageName, int width, int height) { // create the texture on the GPU - cl_image_format format {CL_RGBA, CL_UNORM_INT8}; + cl_image_format format{CL_RGBA, CL_UNORM_INT8}; cl_image_desc imageDesc; imageDesc.image_type = CL_MEM_OBJECT_IMAGE2D; imageDesc.image_width = width; @@ -171,10 +173,10 @@ namespace Raytracing { imageDesc.num_samples = 0; imageDesc.buffer = NULL; cl_mem tex = clCreateImage(m_context, CL_MEM_READ_WRITE, &format, &imageDesc, NULL, &m_CL_ERR); - if (m_CL_ERR != CL_SUCCESS){ + if (m_CL_ERR != CL_SUCCESS) { elog << "Unable to create image texture!\n"; checkBasicErrors(); - switch (m_CL_ERR){ + switch (m_CL_ERR) { case CL_INVALID_VALUE: elog << "\tFlags are not valid!\n"; // this is straight from the docs @@ -208,63 +210,37 @@ namespace Raytracing { // then store it in our buffer map for easy access. buffers.insert({imageName, tex}); } - /** - * Kernels are the entry points in OpenCL. You can have multiple of them in a single program. - * @param kernelName both the name of the kernel function in the source and the reference to the kernel object used in other functions in this class. - */ + + void CLProgram::createKernel(const std::string& kernelName) { auto kernel = clCreateKernel(m_program, kernelName.c_str(), &m_CL_ERR); if (m_CL_ERR != CL_SUCCESS) elog << "Unable to create CL kernel " << kernelName << "!\n"; kernels.insert({kernelName, kernel}); } - /** - * Allows you to bind certain buffers to a specific index in the kernel's argument list. - * @param kernel kernel to bind to - * @param buffer buffer to bind to argIndex - * @param argIndex the index of the argument for this buffer. - */ + void CLProgram::setKernelArgument(const std::string& kernel, const std::string& buffer, int argIndex) { m_CL_ERR = clSetKernelArg(kernels[kernel], argIndex, sizeof(cl_mem), (void*) &buffers[buffer]); if (m_CL_ERR != CL_SUCCESS) elog << "Unable to bind argument " << buffer << " to CL kernel " << kernel << "!\n"; } - /** - * Enqueues a write command to the buffer specified by the buffer name, - * @param buffer the buffer to write to - * @param bytes the number of bytes to be copied - * @param ptr the pointer to copy from. Must have at least bytes available - * @param blocking should this function wait for the bytes to be uploaded to the GPU? - * @param offset offset in the buffer object to write to - */ + void CLProgram::writeBuffer(const std::string& buffer, size_t bytes, void* ptr, cl_bool blocking, size_t offset) { m_CL_ERR = clEnqueueWriteBuffer(m_commandQueue, buffers[buffer], blocking, offset, bytes, ptr, 0, NULL, NULL); if (m_CL_ERR != CL_SUCCESS) elog << "Unable to enqueue write to " << buffer << " buffer!\n"; } - /** - * Enqueues a read command from the buffered specified by the buffer name. - * Defaults to blocking but can be set to be non-blocking. - * @param buffer buffer to read from - * @param bytes the number of bytes to read. Make sure ptr has at least those bytes available. - * @param ptr the ptr to write the read bytes to. - * @param blocking should we wait for the read or do it async? - * @param offset offset in the buffer to read from. - */ + void CLProgram::readBuffer(const std::string& buffer, size_t bytes, void* ptr, cl_bool blocking, size_t offset) { m_CL_ERR = clEnqueueReadBuffer(m_commandQueue, buffers[buffer], blocking, offset, bytes, ptr, 0, NULL, NULL); if (m_CL_ERR != CL_SUCCESS) elog << "Unable to enqueue read from " << buffer << " buffer!\n"; } - /** - * Issues all previously queued OpenCL commands in a command-queue to the device associated with the command-queue. - */ + void CLProgram::flushCommands() { clFlush(m_commandQueue); } - /** - * Blocks until all previously queued OpenCL commands in a command-queue are issued to the associated device and have completed. - */ + void CLProgram::finishCommands() { flushCommands(); clFinish(m_commandQueue); @@ -274,13 +250,14 @@ namespace Raytracing { m_CL_ERR = clEnqueueNDRangeKernel(m_commandQueue, kernels[kernel], 1, globalWorkOffset, &globalWorkSize, &localWorkSize, 0, NULL, NULL); } - void CLProgram::runKernel(const std::string& kernel, size_t* globalWorkSize, size_t* localWorkSize, cl_uint workDim, const size_t* globalWorkOffset) { + void + CLProgram::runKernel(const std::string& kernel, size_t* globalWorkSize, size_t* localWorkSize, cl_uint workDim, const size_t* globalWorkOffset) { m_CL_ERR = clEnqueueNDRangeKernel(m_commandQueue, kernels[kernel], workDim, globalWorkOffset, globalWorkSize, localWorkSize, 0, NULL, NULL); } void CLProgram::readImage(const std::string& imageName, size_t width, size_t height, void* ptr, cl_bool blocking, size_t x, size_t y) { - size_t origin[3] {x, y, 0}; - size_t region[3] {width, height, 1}; + size_t origin[3]{x, y, 0}; + size_t region[3]{width, height, 1}; m_CL_ERR = clEnqueueReadImage(m_commandQueue, buffers[imageName], blocking, origin, region, 0, 0, ptr, 0, NULL, NULL); if (m_CL_ERR != CL_SUCCESS) { elog << "Unable to enqueue read from " << imageName << " image:\n"; @@ -298,15 +275,16 @@ namespace Raytracing { CLProgram::~CLProgram() { finishCommands(); - for (const auto& kernel : kernels) + for (const auto& kernel: kernels) clReleaseKernel(kernel.second); clReleaseProgram(m_program); - for (const auto& buffer : buffers) + for (const auto& buffer: buffers) clReleaseMemObject(buffer.second); clReleaseCommandQueue(m_commandQueue); } + void CLProgram::checkBasicErrors() const { - switch (m_CL_ERR){ + switch (m_CL_ERR) { case CL_OUT_OF_HOST_MEMORY: elog << "\tHost is out of memory!\n"; break; @@ -331,4 +309,23 @@ namespace Raytracing { } } + void CLProgram::readImage(const std::string& imageName, Image& image) { + // Create an array for copying bytes from the GPU to the CPU with + unsigned char bytes[image.getWidth() * image.getHeight() * 4]; + for (int i = 0; i < image.getWidth() * image.getHeight() * 4; i++) + bytes[i] = 0; + // Call to OpenCL to read the image from the GPU and send it to the location of our buffer + this->readImage(imageName, image.getWidth(), image.getHeight(), bytes); + // multiplication is much faster than division, + // so to speed up the function precalculate the color multiplier used to convert from RGBA8 format to floating point format used by the raytracer. + const PRECISION_TYPE colorFactor = 1.0 / 255.0; + // copy the data from the buffer into the image by calculating its byte offset based on its pixel position. + for (int i = 0; i < image.getWidth(); i++) { + for (int j = 0; j < image.getHeight(); j++) { + const auto pixelData = bytes + (j * 4 * image.getWidth() + i * 4); + image.setPixelColor(i, j, {pixelData[0] * colorFactor, pixelData[1] * colorFactor, pixelData[2] * colorFactor}); + } + } + } + } \ No newline at end of file