Document the OpenCL class

main
Brett 2022-12-03 00:39:12 -05:00
parent 5b65167bd7
commit 373134a255
12 changed files with 515 additions and 297 deletions

View File

@ -0,0 +1,73 @@
<component name="ProjectCodeStyleConfiguration">
<code_scheme name="Project" version="173">
<option name="RIGHT_MARGIN" value="150" />
<Objective-C>
<option name="INDENT_CLASS_MEMBERS" value="8" />
<option name="INDENT_VISIBILITY_KEYWORDS" value="4" />
<option name="INDENT_PREPROCESSOR_DIRECTIVE" value="4" />
<option name="FUNCTION_NON_TOP_AFTER_RETURN_TYPE_WRAP" value="0" />
<option name="FUNCTION_TOP_AFTER_RETURN_TYPE_WRAP" value="0" />
<option name="FUNCTION_PARAMETERS_NEW_LINE_AFTER_LPAR" value="true" />
<option name="FUNCTION_PARAMETERS_NEW_LINE_BEFORE_RPAR" value="true" />
<option name="LAMBDA_CAPTURE_LIST_ALIGN_MULTILINE" value="true" />
<option name="STRUCTURED_BINDING_LIST_ALIGN_MULTILINE" value="true" />
<option name="FUNCTION_CALL_ARGUMENTS_NEW_LINE_AFTER_LPAR" value="true" />
<option name="FUNCTION_CALL_ARGUMENTS_NEW_LINE_BEFORE_RPAR" value="true" />
<option name="CLASS_CONSTRUCTOR_INIT_LIST_NEW_LINE_BEFORE_COLON" value="0" />
<option name="CLASS_CONSTRUCTOR_INIT_LIST_NEW_LINE_AFTER_COLON" value="1" />
<option name="SUPERCLASS_LIST_BEFORE_COLON" value="0" />
<option name="SPACE_BEFORE_INIT_LIST_COLON" value="false" />
<option name="SPACE_BEFORE_POINTER_IN_DECLARATION" value="false" />
<option name="SPACE_AFTER_POINTER_IN_DECLARATION" value="true" />
<option name="SPACE_BEFORE_REFERENCE_IN_DECLARATION" value="false" />
<option name="SPACE_AFTER_REFERENCE_IN_DECLARATION" value="true" />
</Objective-C>
<Objective-C-extensions>
<rules>
<rule entity="NAMESPACE" visibility="ANY" specifier="ANY" prefix="" style="CAMEL_CASE" suffix="" />
<rule entity="MACRO" visibility="ANY" specifier="ANY" prefix="" style="SCREAMING_SNAKE_CASE" suffix="" />
<rule entity="CLASS" visibility="ANY" specifier="ANY" prefix="" style="PASCAL_CASE" suffix="" />
<rule entity="STRUCT" visibility="ANY" specifier="ANY" prefix="" style="CAMEL_CASE" suffix="" />
<rule entity="ENUM" visibility="ANY" specifier="ANY" prefix="" style="CAMEL_CASE" suffix="" />
<rule entity="ENUMERATOR" visibility="ANY" specifier="ANY" prefix="" style="PASCAL_CASE" suffix="" />
<rule entity="TYPEDEF" visibility="ANY" specifier="ANY" prefix="" style="CAMEL_CASE" suffix="" />
<rule entity="UNION" visibility="ANY" specifier="ANY" prefix="" style="CAMEL_CASE" suffix="" />
<rule entity="CLASS_MEMBER_FUNCTION" visibility="ANY" specifier="ANY" prefix="" style="CAMEL_CASE" suffix="" />
<rule entity="STRUCT_MEMBER_FUNCTION" visibility="ANY" specifier="ANY" prefix="" style="CAMEL_CASE" suffix="" />
<rule entity="CLASS_MEMBER_FIELD" visibility="ANY" specifier="ANY" prefix="" style="CAMEL_CASE" suffix="" />
<rule entity="STRUCT_MEMBER_FIELD" visibility="ANY" specifier="ANY" prefix="" style="CAMEL_CASE" suffix="" />
<rule entity="GLOBAL_FUNCTION" visibility="ANY" specifier="ANY" prefix="" style="CAMEL_CASE" suffix="" />
<rule entity="GLOBAL_VARIABLE" visibility="ANY" specifier="ANY" prefix="" style="CAMEL_CASE" suffix="" />
<rule entity="PARAMETER" visibility="ANY" specifier="ANY" prefix="" style="CAMEL_CASE" suffix="" />
<rule entity="LOCAL_VARIABLE" visibility="ANY" specifier="ANY" prefix="" style="CAMEL_CASE" suffix="" />
</rules>
</Objective-C-extensions>
<Objective-C-extensions>
<rules>
<rule entity="NAMESPACE" visibility="ANY" specifier="ANY" prefix="" style="CAMEL_CASE" suffix="" />
<rule entity="MACRO" visibility="ANY" specifier="ANY" prefix="" style="SCREAMING_SNAKE_CASE" suffix="" />
<rule entity="CLASS" visibility="ANY" specifier="ANY" prefix="" style="PASCAL_CASE" suffix="" />
<rule entity="STRUCT" visibility="ANY" specifier="ANY" prefix="" style="CAMEL_CASE" suffix="" />
<rule entity="ENUM" visibility="ANY" specifier="ANY" prefix="" style="CAMEL_CASE" suffix="" />
<rule entity="ENUMERATOR" visibility="ANY" specifier="ANY" prefix="" style="PASCAL_CASE" suffix="" />
<rule entity="TYPEDEF" visibility="ANY" specifier="ANY" prefix="" style="CAMEL_CASE" suffix="" />
<rule entity="UNION" visibility="ANY" specifier="ANY" prefix="" style="CAMEL_CASE" suffix="" />
<rule entity="CLASS_MEMBER_FUNCTION" visibility="ANY" specifier="ANY" prefix="" style="CAMEL_CASE" suffix="" />
<rule entity="STRUCT_MEMBER_FUNCTION" visibility="ANY" specifier="ANY" prefix="" style="CAMEL_CASE" suffix="" />
<rule entity="CLASS_MEMBER_FIELD" visibility="ANY" specifier="ANY" prefix="" style="CAMEL_CASE" suffix="" />
<rule entity="STRUCT_MEMBER_FIELD" visibility="ANY" specifier="ANY" prefix="" style="CAMEL_CASE" suffix="" />
<rule entity="GLOBAL_FUNCTION" visibility="ANY" specifier="ANY" prefix="" style="CAMEL_CASE" suffix="" />
<rule entity="GLOBAL_VARIABLE" visibility="ANY" specifier="ANY" prefix="" style="CAMEL_CASE" suffix="" />
<rule entity="PARAMETER" visibility="ANY" specifier="ANY" prefix="" style="CAMEL_CASE" suffix="" />
<rule entity="LOCAL_VARIABLE" visibility="ANY" specifier="ANY" prefix="" style="CAMEL_CASE" suffix="" />
</rules>
</Objective-C-extensions>
<codeStyleSettings language="ObjectiveC">
<option name="RIGHT_MARGIN" value="150" />
<indentOptions>
<option name="LABEL_INDENT_SIZE" value="-4" />
<option name="KEEP_INDENTS_ON_EMPTY_LINES" value="true" />
</indentOptions>
</codeStyleSettings>
</code_scheme>
</component>

View File

@ -1,5 +1,5 @@
<component name="ProjectCodeStyleConfiguration"> <component name="ProjectCodeStyleConfiguration">
<state> <state>
<option name="PREFERRED_PROJECT_CODE_STYLE" value="Default" /> <option name="USE_PER_PROJECT_SETTINGS" value="true" />
</state> </state>
</component> </component>

View File

@ -0,0 +1,5 @@
<component name="CopyrightManager">
<copyright>
<option name="myName" value="GPL3" />
</copyright>
</component>

View File

@ -0,0 +1,3 @@
<component name="DependencyValidationManager">
<scope name="GPL_3" pattern="file[Step 2]:*/&amp;&amp;!file[Step 2]:.idea//*&amp;&amp;!file[Step 2]:Old//*&amp;&amp;!file[Step 2]:Old 2//*&amp;&amp;!file[Step 2]:resources//*&amp;&amp;!file:cmake_make_command.sh&amp;&amp;!file:CMakeLists.txt&amp;&amp;!file:Dev Log Week of 2022-10-23.odt&amp;&amp;!file:st3-rd-debug.cap&amp;&amp;!file:st3-rd-release.cap&amp;&amp;!file:Submission.zip" />
</component>

View File

@ -0,0 +1,3 @@
<component name="DependencyValidationManager">
<scope name="GPL_3_LOCAL" pattern="file[Step 2]:*/&amp;&amp;!file[Step 2]:.idea//*&amp;&amp;!file[Step 2]:Old//*&amp;&amp;!file[Step 2]:Old 2//*&amp;&amp;!file[Step 2]:resources//*&amp;&amp;!file:cmake_make_command.sh&amp;&amp;!file:CMakeLists.txt&amp;&amp;!file:Dev Log Week of 2022-10-23.odt&amp;&amp;!file:st3-rd-debug.cap&amp;&amp;!file:st3-rd-release.cap&amp;&amp;!file:Submission.zip" />
</component>

View File

@ -19,13 +19,13 @@
#include <queue> #include <queue>
namespace Raytracing { namespace Raytracing {
class Camera { class Camera {
private: private:
/* Image details */ /* Image details */
const Image image; const Image image;
const PRECISION_TYPE aspectRatio; const PRECISION_TYPE aspectRatio;
/* Camera details */ /* Camera details */
PRECISION_TYPE viewportHeight; PRECISION_TYPE viewportHeight;
PRECISION_TYPE viewportWidth; PRECISION_TYPE viewportWidth;
@ -35,14 +35,14 @@ namespace Raytracing {
const PRECISION_TYPE FAR_PLANE = 500; const PRECISION_TYPE FAR_PLANE = 500;
PRECISION_TYPE tanFovHalf; PRECISION_TYPE tanFovHalf;
PRECISION_TYPE frustumLength; PRECISION_TYPE frustumLength;
Vec4 position{0, 0, 0}; Vec4 position{0, 0, 0};
Vec4 horizontalAxis; Vec4 horizontalAxis;
Vec4 verticalAxis; Vec4 verticalAxis;
Vec4 imageOrigin; Vec4 imageOrigin;
Vec4 up {0, 1, 0}; Vec4 up{0, 1, 0};
public: public:
Camera(PRECISION_TYPE fov, const Image& image): image(image), Camera(PRECISION_TYPE fov, const Image& image): image(image),
aspectRatio(double(image.getWidth()) / double(image.getHeight())) { aspectRatio(double(image.getWidth()) / double(image.getHeight())) {
@ -58,20 +58,20 @@ namespace Raytracing {
verticalAxis = (Vec4{0, viewportHeight, 0, 0}); verticalAxis = (Vec4{0, viewportHeight, 0, 0});
// lower left of the camera's view port. used to project our vectors from image space to world space // lower left of the camera's view port. used to project our vectors from image space to world space
imageOrigin = (position - horizontalAxis / 2 - verticalAxis / 2 - Vec4(0, 0, focalLength, 0)); imageOrigin = (position - horizontalAxis / 2 - verticalAxis / 2 - Vec4(0, 0, focalLength, 0));
tlog << viewportHeight << "\n"; tlog << viewportHeight << "\n";
tlog << viewportWidth << "\n"; tlog << viewportWidth << "\n";
tlog << "\n"; tlog << "\n";
tlog << horizontalAxis << "\n"; tlog << horizontalAxis << "\n";
tlog << verticalAxis << "\n"; tlog << verticalAxis << "\n";
tlog << imageOrigin << "\n"; tlog << imageOrigin << "\n";
} }
Ray projectRay(PRECISION_TYPE x, PRECISION_TYPE y); Ray projectRay(PRECISION_TYPE x, PRECISION_TYPE y);
void setPosition(const Vec4& pos) { this->position = pos; } void setPosition(const Vec4& pos) { this->position = pos; }
void setRotation(PRECISION_TYPE yaw, PRECISION_TYPE pitch); void setRotation(PRECISION_TYPE yaw, PRECISION_TYPE pitch);
// the follow utility functions are actually taking forever to get right // the follow utility functions are actually taking forever to get right
@ -79,7 +79,7 @@ namespace Raytracing {
// got to install GLM to test which function works and which does. Maybe they are both bad. or Maybe it's my matrix impl // got to install GLM to test which function works and which does. Maybe they are both bad. or Maybe it's my matrix impl
// or maybe the whole rendering stack sucks // or maybe the whole rendering stack sucks
[[nodiscard]] Mat4x4 project() const { [[nodiscard]] Mat4x4 project() const {
Mat4x4 project {emptyMatrix}; Mat4x4 project{emptyMatrix};
// this should be all it takes to create a mostly correct projection matrix // this should be all it takes to create a mostly correct projection matrix
project.m00(float(1.0 / (aspectRatio * tanFovHalf))); project.m00(float(1.0 / (aspectRatio * tanFovHalf)));
@ -106,12 +106,12 @@ namespace Raytracing {
view.m01(float(w.y())); view.m01(float(w.y()));
view.m02(float(w.z())); view.m02(float(w.z()));
view.m03(float(w.w())); view.m03(float(w.w()));
view.m10(float(u.x())); view.m10(float(u.x()));
view.m11(float(u.y())); view.m11(float(u.y()));
view.m12(float(u.z())); view.m12(float(u.z()));
view.m13(float(u.w())); view.m13(float(u.w()));
view.m20(float(v.x())); view.m20(float(v.x()));
view.m21(float(v.y())); view.m21(float(v.y()));
view.m22(float(v.z())); view.m22(float(v.z()));
@ -126,31 +126,31 @@ namespace Raytracing {
return view; return view;
} }
Mat4x4 view(PRECISION_TYPE yaw, PRECISION_TYPE pitch); Mat4x4 view(PRECISION_TYPE yaw, PRECISION_TYPE pitch);
[[nodiscard]] inline Vec4 getPosition() const {return position;}; [[nodiscard]] inline Vec4 getPosition() const { return position; };
// the camera's position must be set with setPosition(Vec4); // the camera's position must be set with setPosition(Vec4);
// uses an internal up vector, assumed to be {0, 1, 0} // uses an internal up vector, assumed to be {0, 1, 0}
// will make the camera look at provided position with respects to the current camera position. // will make the camera look at provided position with respects to the current camera position.
void lookAt(const Vec4& lookAtPos); void lookAt(const Vec4& lookAtPos);
}; };
static Random rnd{-1.0, 1.0}; static Random rnd{-1.0, 1.0};
struct RaycasterImageBounds { struct RaycasterImageBounds {
int width,height, x,y; int width, height, x, y;
}; };
class Raycaster { class RayCaster {
private: private:
int maxBounceDepth = 50; int maxBounceDepth = 50;
int raysPerPixel = 50; int raysPerPixel = 50;
Camera& camera; Camera& camera;
Image& image; Image& image;
World& world; World& world;
std::vector<std::unique_ptr<std::thread>> executors {}; std::vector<std::unique_ptr<std::thread>> executors{};
// is the raytracer still running? // is the raytracer still running?
bool stillRunning = true; bool stillRunning = true;
unsigned int finishedThreads = 0; unsigned int finishedThreads = 0;
@ -159,17 +159,17 @@ namespace Raytracing {
// and compared to the actual runtime of the raytracing it's very small! // and compared to the actual runtime of the raytracing it's very small!
std::mutex queueSync; std::mutex queueSync;
std::queue<RaycasterImageBounds>* unprocessedQuads = nullptr; std::queue<RaycasterImageBounds>* unprocessedQuads = nullptr;
Vec4 raycasti(const Ray& ray, int depth); Vec4 raycasti(const Ray& ray, int depth);
Vec4 raycast(const Ray& ray); Vec4 raycast(const Ray& ray);
void runRaycastingAlgorithm(RaycasterImageBounds imageBounds, int loopX, int loopY); void runRaycastingAlgorithm(RaycasterImageBounds imageBounds, int loopX, int loopY);
void setupQueue(const std::vector<RaycasterImageBounds>& bounds); void setupQueue(const std::vector<RaycasterImageBounds>& bounds);
public: public:
inline void updateRayInfo(int maxBounce, int perPixel){ inline void updateRayInfo(int maxBounce, int perPixel) {
raysPerPixel = perPixel; raysPerPixel = perPixel;
maxBounceDepth = maxBounce; maxBounceDepth = maxBounce;
} }
inline void resetRayInfo(){ inline void resetRayInfo() {
raysPerPixel = 50; raysPerPixel = 50;
maxBounceDepth = 50; maxBounceDepth = 50;
} }
@ -189,33 +189,33 @@ namespace Raytracing {
// the second creates better results but is 18% slower (better defined shadows) // the second creates better results but is 18% slower (better defined shadows)
// likely due to not over generating unit vectors biased towards the corners // likely due to not over generating unit vectors biased towards the corners
} }
Raycaster(Camera& c, Image& i, World& world, const Parser& p): camera(c), image(i), world(world) { RayCaster(Camera& c, Image& i, World& world, const Parser& p): camera(c), image(i), world(world) {
world.generateBVH(); world.generateBVH();
} }
void runSTDThread(int threads = -1); void runSTDThread(int threads = -1);
void runOpenMP(int threads = -1); void runOpenMP(int threads = -1);
void runMPI(std::queue<RaycasterImageBounds> bounds); void runMPI(std::queue<RaycasterImageBounds> bounds);
[[nodiscard]] inline bool areThreadsStillRunning() const {return finishedThreads == executors.size();} [[nodiscard]] inline bool areThreadsStillRunning() const { return finishedThreads == executors.size(); }
inline void join(){ inline void join() {
for (auto& p : executors) for (auto& p: executors)
p->join(); p->join();
} }
void deleteThreads(){ void deleteThreads() {
for (auto& p : executors){ for (auto& p: executors) {
// wait for all threads to exit before trying to delete them. // wait for all threads to exit before trying to delete them.
try { try {
if (p->joinable()) if (p->joinable())
p->join(); p->join();
} catch (std::exception& e){} } catch (std::exception& e) {}
} }
// since executors contains the only reference to the unique_ptr it will be deleted automatically // since executors contains the only reference to the unique_ptr it will be deleted automatically
executors.clear(); executors.clear();
} }
~Raycaster() { ~RayCaster() {
deleteThreads(); deleteThreads();
delete(unprocessedQuads); delete (unprocessedQuads);
} }
}; };
} }
#endif //STEP_2_RAYTRACING_H #endif //STEP_2_RAYTRACING_H

View File

@ -42,8 +42,8 @@ namespace Raytracing {
void drawQuad(); void drawQuad();
void deleteQuad(); void deleteQuad();
#ifdef USE_GLFW #ifdef USE_GLFW
class XWindow { class XWindow {
private: private:
@ -52,7 +52,7 @@ namespace Raytracing {
bool isCloseRequested = false; bool isCloseRequested = false;
long lastFrameTime{}; long lastFrameTime{};
PRECISION_TYPE delta{}; PRECISION_TYPE delta{};
PRECISION_TYPE frameTimeMs{},frameTimeS{}; PRECISION_TYPE frameTimeMs{}, frameTimeS{};
PRECISION_TYPE fps{}; PRECISION_TYPE fps{};
public: public:
XWindow(int width, int height); XWindow(int width, int height);
@ -62,21 +62,21 @@ namespace Raytracing {
[[nodiscard]] inline bool shouldWindowClose() const { return isCloseRequested; } [[nodiscard]] inline bool shouldWindowClose() const { return isCloseRequested; }
[[nodiscard]] inline PRECISION_TYPE getFrameTimeMillis() const {return frameTimeMs;} [[nodiscard]] inline PRECISION_TYPE getFrameTimeMillis() const { return frameTimeMs; }
[[nodiscard]] inline PRECISION_TYPE getFrameTimeSeconds() const {return frameTimeS;} [[nodiscard]] inline PRECISION_TYPE getFrameTimeSeconds() const { return frameTimeS; }
[[nodiscard]] inline PRECISION_TYPE getFPS() const {return fps;} [[nodiscard]] inline PRECISION_TYPE getFPS() const { return fps; }
void setMouseGrabbed(bool grabbed); void setMouseGrabbed(bool grabbed);
bool isMouseGrabbed(); bool isMouseGrabbed();
[[nodiscard]] inline int displayWidth() const {return m_displayWidth;} [[nodiscard]] inline int displayWidth() const { return m_displayWidth; }
[[nodiscard]] inline int displayHeight() const {return m_displayHeight;} [[nodiscard]] inline int displayHeight() const { return m_displayHeight; }
[[nodiscard]] inline GLFWwindow* getWindow() const {return window;} [[nodiscard]] inline GLFWwindow* getWindow() const { return window; }
void closeWindow(); void closeWindow();
~XWindow(); ~XWindow();
}; };
#else #else
class XWindow { class XWindow {
private: private:
// X11 display itself // X11 display itself
@ -125,7 +125,7 @@ namespace Raytracing {
void closeWindow(); void closeWindow();
~XWindow(); ~XWindow();
}; };
#endif #endif
/** /**
* The display renderer class handles all the major rendering events outside of window functions * The display renderer class handles all the major rendering events outside of window functions
@ -138,7 +138,7 @@ namespace Raytracing {
World& m_world; World& m_world;
Shader& m_imageShader; Shader& m_imageShader;
Shader& m_worldShader; Shader& m_worldShader;
Raycaster& m_raycaster; RayCaster& m_raycaster;
Parser& m_parser; Parser& m_parser;
Camera& m_camera; Camera& m_camera;
public: public:
@ -147,7 +147,7 @@ namespace Raytracing {
World& world, World& world,
Shader& mImageShader, Shader& mImageShader,
Shader& mWorldShader, Shader& mWorldShader,
Raycaster& mRaycaster, RayCaster& mRaycaster,
Parser& mParser, Parser& mParser,
Camera& mCamera) Camera& mCamera)
: m_window(mWindow), m_mainImage(mMainImage), m_imageShader(mImageShader), m_worldShader(mWorldShader), m_raycaster(mRaycaster), : m_window(mWindow), m_mainImage(mMainImage), m_imageShader(mImageShader), m_worldShader(mWorldShader), m_raycaster(mRaycaster),

View File

@ -9,6 +9,7 @@
// OpenCL includes // OpenCL includes
#include <CL/cl.h> #include <CL/cl.h>
#include <CL/cl_gl.h> #include <CL/cl_gl.h>
#include <engine/image/image.h>
#include <config.h> #include <config.h>
#ifdef COMPILE_GUI #ifdef COMPILE_GUI
@ -34,26 +35,146 @@ namespace Raytracing {
std::unordered_map<std::string, cl_mem> buffers; std::unordered_map<std::string, cl_mem> buffers;
std::unordered_map<std::string, cl_kernel> kernels; std::unordered_map<std::string, cl_kernel> kernels;
/**
* Checks for some basic errors after calling OpenCL commands. Stuff like GPU out of memory... etc.
*/
void checkBasicErrors() const; void checkBasicErrors() const;
public: public:
/**
* Loads the shader from a file on class creation
* @param file file to load OpenCL "shader" (code) file
*/
explicit CLProgram(const std::string& file); explicit CLProgram(const std::string& file);
/**
* Used by the OpenCL class to create a basic OpenCL program
* @param context provided by the OpenCL class.
* @param deviceID provided by the OpenCL class.
*/
void loadCLShader(cl_context context, cl_device_id deviceID); void loadCLShader(cl_context context, cl_device_id deviceID);
/**
* Kernels are the entry points in OpenCL. You can have multiple of them in a single program.
* @param kernelName both the name of the kernel function in the source and the reference to the kernel object used in other functions in this class.
*/
void createKernel(const std::string& kernelName); void createKernel(const std::string& kernelName);
/**
* Buffers are the quintessential datastructures in OpenCL. They are basically regions of memory allocated to a program.
* @param bufferName the name of the buffer used to store internally
* @param flags read write flags for the buffer. One of CL_MEM_READ_ONLY | CL_MEM_WRITE_ONLY | CL_MEM_READ_WRITE
* @param bytes the number of bytes to be allocated.
*/
void createBuffer(const std::string& bufferName, cl_mem_flags flags, size_t bytes); void createBuffer(const std::string& bufferName, cl_mem_flags flags, size_t bytes);
/**
* Creates a buffer on the GPU using the data pointed to by the supplied pointer. This copy happens as soon as this is called.
* @param bufferName the name of the buffer used to store internally
* @param flags One of CL_MEM_READ_ONLY | CL_MEM_WRITE_ONLY | CL_MEM_READ_WRITE
* @param bytes the number of bytes to be allocated. Must be less than equal to the number of bytes at ptr
* @param ptr the pointer to copy to the GPU.
*/
void createBuffer(const std::string& bufferName, cl_mem_flags flags, size_t bytes, void* ptr); void createBuffer(const std::string& bufferName, cl_mem_flags flags, size_t bytes, void* ptr);
/**
* Creates a buffer on the GPU using the data pointed to by the supplied pointer. This copy happens as soon as this is called.
* @param bufferName the name of the buffer used to store internally
* @param flags One of CL_MEM_READ_ONLY | CL_MEM_WRITE_ONLY | CL_MEM_READ_WRITE
* @param bytes the number of bytes to be allocated. Must be less than equal to the number of bytes at ptr
* @param ptr the pointer to copy to the GPU.
*/
void createImage(const std::string& imageName, int width, int height); void createImage(const std::string& imageName, int width, int height);
/**
* Allows you to bind certain buffers to a specific index in the kernel's argument list.
* @param kernel kernel to bind to
* @param buffer buffer to bind to argIndex
* @param argIndex the index of the argument for this buffer.
*/
void setKernelArgument(const std::string& kernel, const std::string& buffer, int argIndex); void setKernelArgument(const std::string& kernel, const std::string& buffer, int argIndex);
/**
* Runs the kernel code on the GPU. Is blocking.
* @param kernel kernel function name to call
* @param globalWorkSize the total number of times to execute the kernel function code. Corresponds to the result of get_global_id
* @param localWorkSize how many work items make up a work group to be executed by a kernel. 64 is recommended, must not exceed the printed value "device max workgroup size"
* @param globalWorkOffset not used. can be used to set an offset to the result of get_global_id
*/
void runKernel(const std::string& kernel, size_t globalWorkSize, size_t localWorkSize, const size_t* globalWorkOffset = NULL); void runKernel(const std::string& kernel, size_t globalWorkSize, size_t localWorkSize, const size_t* globalWorkOffset = NULL);
void runKernel(const std::string& kernel, size_t* globalWorkSize, size_t* localWorkSize, cl_uint workDim = 1, const size_t* globalWorkOffset = NULL);
/**
* Runs the kernel code on the GPU. Is blocking.
* This version allows you to specify the number of work dimensions.
* globalWorkSize and localWorkSize must be an array of workDim size which specify the work size for each kernel
* For example a work dim of 2 allows for two separate work sizes to be set per dimension.
* An image is two dimensional and so global work size would be {width of image, height of image}
* and local work size would be {8, 8} for a total of 64 (again recommended). Alternatively specify CL_D2_64_LOCAL_SIZE
* The resulting execution causes get_global_id(0) to run [0, width) times and get_global_id(1) to run [0, height) times
* @param kernel kernel function name to call
* @param globalWorkSize the total number of times to execute the kernel function code. Corresponds to the result of get_global_id(dim)
* @param localWorkSize how many work items make up a work group to be executed by a kernel. total 64 is recommended, total must not exceed the printed value "device max workgroup size"
* @param workDim number of dimensions to the work group being executed.
* @param globalWorkOffset not used. can be used to set an offset to the result of get_global_id
*/
void runKernel(
const std::string& kernel, size_t* globalWorkSize, size_t* localWorkSize, cl_uint workDim = 1,
const size_t* globalWorkOffset = NULL
);
/**
* Enqueues a write command to the buffer specified by the buffer name,
* @param buffer the buffer to write to
* @param bytes the number of bytes to be copied
* @param ptr the pointer to copy from. Must have at least bytes available
* @param blocking should this function wait for the bytes to be uploaded to the GPU?
* @param offset offset in the buffer object to write to
*/
void writeBuffer(const std::string& buffer, size_t bytes, void* ptr, cl_bool blocking = CL_TRUE, size_t offset = 0); void writeBuffer(const std::string& buffer, size_t bytes, void* ptr, cl_bool blocking = CL_TRUE, size_t offset = 0);
void readBuffer(const std::string& buffer, size_t bytes, void* ptr, cl_bool blocking = CL_TRUE, size_t offset = 0);
void readImage(const std::string& imageName, size_t width, size_t height, void* ptr, cl_bool blocking = CL_TRUE, size_t x = 0, size_t y = 0);
/**
* Enqueues a read command from the buffered specified by the buffer name.
* Defaults to blocking but can be set to be non-blocking.
* @param buffer buffer to read from
* @param bytes the number of bytes to read. Make sure ptr has at least those bytes available.
* @param ptr the ptr to write the read bytes to.
* @param blocking should we wait for the read or do it async?
* @param offset offset in the buffer to read from.
*/
void readBuffer(const std::string& buffer, size_t bytes, void* ptr, cl_bool blocking = CL_TRUE, size_t offset = 0);
/**
* Reads an image from the GPU into the memory region specified. Allocated memory region must be large enough to hold the image.
* @param imageName name of the buffer to read from
* @param width width of the image. Must be less than or equal to the width of the image on the GPU
* @param height height of the image. Also must be less than or equal to the height of the image on the GPU
* @param ptr pointer to the memory region to read into
* @param blocking should we wait for the read operation to complete? Defaults to yes.
* @param x x coordinate to start the read from. Defaults to zero since it's unlikely to be needed here. Included for possible future use.
* @param y y coordinate to start the read from.
*/
void readImage(
const std::string& imageName, size_t width, size_t height, void* ptr, cl_bool blocking = CL_TRUE, size_t x = 0, size_t y = 0
);
/**
* Reads an image buffer into a RayCasting Image class.
* Image supplied must have a with and height that matches the width and height of the image buffer specified by the name.
* @param imageName name of the buffer you wish to read from
* @param image reference to an image that you want the GPU data read into.
*/
void readImage(const std::string& imageName, Image& image);
/**
* Issues all previously queued OpenCL commands in a command-queue to the device associated with the command-queue.
*/
void flushCommands(); void flushCommands();
/**
* Blocks until all previously queued OpenCL commands in a command-queue are issued to the associated device and have completed.
*/
void finishCommands(); void finishCommands();
~CLProgram(); ~CLProgram();
@ -76,17 +197,43 @@ namespace Raytracing {
cl_context m_context; cl_context m_context;
/**
* prints out the important info about the specified device.
* @param device device to data dump
*/
void printDeviceInfo(cl_device_id device); void printDeviceInfo(cl_device_id device);
public:
explicit OpenCL(int platformID = 0, int deviceID = 0);
static void init();
static void createCLProgram(CLProgram& program);
static cl_uint activeDeviceComputeUnits();
static cl_uint activeDeviceFrequency();
public:
/**
* creates an opencl instance on the specified platform and device. Defaults to the first GPU device
*/
explicit OpenCL(int platformID = 0, int deviceID = 0);
/**
* Creates the global OpenCL instance for the engine
*/
static void init();
/**
* Creates an OpenCL program object using the global OpenCL connection
* @param program
*/
static void createCLProgram(CLProgram& program);
/**
* @return the number of compute units the device has
*/
static cl_uint activeDeviceComputeUnits();
/**
* the frequency in megahertz of the device
* @return
*/
static cl_uint activeDeviceFrequency();
~OpenCL(); ~OpenCL();
}; };
} }
#endif //STEP_3_CL_H #endif //STEP_3_CL_H

View File

@ -9,24 +9,24 @@
#include <csignal> #include <csignal>
#ifdef USE_MPI #ifdef USE_MPI
#include <engine/mpi.h> #include <engine/mpi.h>
#endif #endif
//#include <sys/time.h> //#include <sys/time.h>
//#include <sys/resource.h> //#include <sys/resource.h>
#ifdef COMPILE_GUI #ifdef COMPILE_GUI
#include <graphics/graphics.h> #include <graphics/graphics.h>
#include <graphics/gl/gl.h> #include <graphics/gl/gl.h>
#include <graphics/gl/shader.h> #include <graphics/gl/shader.h>
#endif #endif
#ifdef COMPILE_OPENCL #ifdef COMPILE_OPENCL
#include <opencl/cl.h> #include <opencl/cl.h>
#endif #endif
@ -81,10 +81,10 @@ int main(int argc, char** args) {
"\tSets the directory where the resources are stored.\n" "\tSets the directory where the resources are stored.\n"
"\tThis can be relative.Must have trailing '/' \n", "../resources/"); "\tThis can be relative.Must have trailing '/' \n", "../resources/");
parser.addOption("--mpi", "Use OpenMPI\n" parser.addOption("--mpi", "Use OpenMPI\n"
"\tTells the raycaster to use OpenMPI to run the raycaster algorithm\n"); "\tTells the raycaster to use OpenMPI to run the raycaster algorithm\n");
parser.addOption("--openmp", "Use OpenMP\n" parser.addOption("--openmp", "Use OpenMP\n"
"\tTells the raycaster to use OpenMP to run the raycaster algorithm\n"); "\tTells the raycaster to use OpenMP to run the raycaster algorithm\n");
// disabled because don't currently have a way to parse vectors. TODO // disabled because don't currently have a way to parse vectors. TODO
//parser.addOption("--position", "Camera Position\n\tSets the position used to render the scene with the camera.\n", "{0, 0, 0}"); //parser.addOption("--position", "Camera Position\n\tSets the position used to render the scene with the camera.\n", "{0, 0, 0}");
@ -111,22 +111,22 @@ int main(int argc, char** args) {
tlog << "Parsing complete! Starting raytracer with options:" << std::endl; tlog << "Parsing complete! Starting raytracer with options:" << std::endl;
// not perfect (contains duplicates) but good enough. // not perfect (contains duplicates) but good enough.
parser.printAllInInfo(); parser.printAllInInfo();
#ifdef USE_MPI #ifdef USE_MPI
Raytracing::MPI::init(argc, args); Raytracing::MPI::init(argc, args);
#endif #endif
#ifdef COMPILE_GUI #ifdef COMPILE_GUI
XWindow* window; XWindow* window;
if (parser.hasOption("--gui") || parser.hasOption("-g")) if (parser.hasOption("--gui") || parser.hasOption("-g"))
window = new XWindow(1440, 720); window = new XWindow(1440, 720);
Shader worldShader("../resources/shaders/world.vs", "../resources/shaders/world.fs"); Shader worldShader("../resources/shaders/world.vs", "../resources/shaders/world.fs");
#endif #endif
#ifdef COMPILE_OPENCL #ifdef COMPILE_OPENCL
OpenCL::init(); OpenCL::init();
#endif #endif
Raytracing::Image image(1440, 720); Raytracing::Image image(1440, 720);
//Raytracing::Image image(std::stoi(parser.getOptionValue("-w")), std::stoi(parser.getOptionValue("-h"))); //Raytracing::Image image(std::stoi(parser.getOptionValue("-w")), std::stoi(parser.getOptionValue("-h")));
@ -135,13 +135,13 @@ int main(int argc, char** args) {
//camera.setPosition({0, 0, 1}); //camera.setPosition({0, 0, 1});
camera.setPosition({6, 5, 6}); camera.setPosition({6, 5, 6});
camera.lookAt({0, 0, 0}); camera.lookAt({0, 0, 0});
#ifdef COMPILE_GUI #ifdef COMPILE_GUI
WorldConfig worldConfig{worldShader}; WorldConfig worldConfig{worldShader};
#else #else
WorldConfig worldConfig; WorldConfig worldConfig;
#endif #endif
worldConfig.useBVH = true; worldConfig.useBVH = true;
Raytracing::World world{worldConfig}; Raytracing::World world{worldConfig};
@ -175,10 +175,10 @@ int main(int argc, char** args) {
//world.add(new Raytracing::ModelObject({0, 0, 0}, debugCube, world.getMaterial("cat"))); //world.add(new Raytracing::ModelObject({0, 0, 0}, debugCube, world.getMaterial("cat")));
if (parser.hasOption("--gui") || parser.hasOption("-g")) { if (parser.hasOption("--gui") || parser.hasOption("-g")) {
#ifdef COMPILE_GUI #ifdef COMPILE_GUI
Raytracing::Raycaster raycaster{camera, image, world, parser}; Raytracing::RayCaster rayCaster{camera, image, world, parser};
Texture mainImage(&image); Texture mainImage(&image);
CLProgram program(parser.getOptionValue("--resources") + "opencl/image.cl"); CLProgram program(parser.getOptionValue("--resources") + "opencl/image.cl");
OpenCL::createCLProgram(program); OpenCL::createCLProgram(program);
program.createKernel("drawImage"); program.createKernel("drawImage");
@ -186,52 +186,42 @@ int main(int argc, char** args) {
program.setKernelArgument("drawImage", "mainImage", 0); program.setKernelArgument("drawImage", "mainImage", 0);
program.setKernelArgument("drawImage", "mainImage", 1); program.setKernelArgument("drawImage", "mainImage", 1);
size_t works[2] {(size_t) image.getWidth(), (size_t) image.getHeight()}; size_t works[2]{(size_t) image.getWidth(), (size_t) image.getHeight()};
size_t localWorks[2] {8, 8}; size_t localWorks[2]{8, 8};
unsigned char bytes[image.getWidth() * image.getHeight() * 4];
for (int i = 0; i < image.getWidth() * image.getHeight() * 4; i++)
bytes[i] = 0;
Shader shader("../resources/shaders/basic.vs", "../resources/shaders/basic.fs"); Shader shader("../resources/shaders/basic.vs", "../resources/shaders/basic.fs");
Raytracing::DisplayRenderer renderer{*window, mainImage, world, shader, worldShader, raycaster, parser, camera}; Raytracing::DisplayRenderer renderer{*window, mainImage, world, shader, worldShader, rayCaster, parser, camera};
while (!window->shouldWindowClose()) { while (!window->shouldWindowClose()) {
window->beginUpdate(); window->beginUpdate();
renderer.draw(); renderer.draw();
program.runKernel("drawImage", works, localWorks, 2); program.runKernel("drawImage", works, localWorks, 2);
program.readImage("mainImage", image.getWidth(), image.getHeight(), bytes); program.readImage("mainImage", image);
const PRECISION_TYPE colorFactor = 1.0 / 255.0;
for (int i = 0; i < image.getWidth(); i++){
for (int j = 0; j < image.getHeight(); j++){
const auto pixelData = bytes + (j * 4 * image.getWidth() + i * 4);
//tlog << (int)pixelData[0] << " " << (int)pixelData[1] << " " << (int)pixelData[2] << "\n";
image.setPixelColor(i, j, {pixelData[0] * colorFactor, pixelData[1] * colorFactor, pixelData[2] * colorFactor});
}
}
glPolygonMode(GL_FRONT_AND_BACK, GL_LINE); glPolygonMode(GL_FRONT_AND_BACK, GL_LINE);
world.drawBVH(worldShader); world.drawBVH(worldShader);
glPolygonMode(GL_FRONT_AND_BACK, GL_FILL); glPolygonMode(GL_FRONT_AND_BACK, GL_FILL);
window->endUpdate(); window->endUpdate();
} }
RTSignal->haltExecution = true; RTSignal->haltExecution = true;
raycaster.join(); rayCaster.join();
#else #else
flog << "Program not compiled with GUI support! Unable to open GUI\n"; flog << "Program not compiled with GUI support! Unable to open GUI\n";
#endif #endif
} else { } else {
Raytracing::Raycaster rayCaster{camera, image, world, parser}; Raytracing::RayCaster rayCaster{camera, image, world, parser};
ilog << "Running RayCaster (NO_GUI)!\n"; ilog << "Running RayCaster (NO_GUI)!\n";
// we don't actually have to check for --single since it's implied to be default true. // we don't actually have to check for --single since it's implied to be default true.
int threads = std::stoi(parser.getOptionValue("--threads")); int threads = std::stoi(parser.getOptionValue("--threads"));
if (parser.hasOption("--mpi")) { if (parser.hasOption("--mpi")) {
// We need to make sure that if the user requests that MPI be run while not having MPI compiled, they get a helpful error warning. // We need to make sure that if the user requests that MPI be run while not having MPI compiled, they get a helpful error warning.
#ifdef USE_MPI #ifdef USE_MPI
rayCaster.runMPI(Raytracing::MPI::getCurrentImageRegionAssociation(rayCaster)); rayCaster.runMPI(Raytracing::MPI::getCurrentImageRegionAssociation(rayCaster));
#else #else
flog << "Unable to run with MPI, CMake not set to compile MPI!\n"; flog << "Unable to run with MPI, CMake not set to compile MPI!\n";
return 33; return 33;
#endif #endif
} else if(parser.hasOption("--openmp")){ } else if (parser.hasOption("--openmp")) {
rayCaster.runOpenMP(threads); rayCaster.runOpenMP(threads);
} else { } else {
rayCaster.runSTDThread(threads); rayCaster.runSTDThread(threads);
@ -258,39 +248,39 @@ int main(int argc, char** args) {
image.fromArray(buffer, doubleSize, i); image.fromArray(buffer, doubleSize, i);
} }
#endif #endif
// write the image to the file // write the image to the file
Raytracing::ImageOutput imageOutput(image); Raytracing::ImageOutput imageOutput(image);
auto t = std::time(nullptr); auto t = std::time(nullptr);
auto now = std::localtime(&t); auto now = std::localtime(&t);
std::stringstream timeString; std::stringstream timeString;
timeString << (1900 + now->tm_year); timeString << (1900 + now->tm_year);
timeString << "-"; timeString << "-";
timeString << (1 + now->tm_mon); timeString << (1 + now->tm_mon);
timeString << "-"; timeString << "-";
timeString << now->tm_mday; timeString << now->tm_mday;
timeString << " "; timeString << " ";
timeString << now->tm_hour; timeString << now->tm_hour;
timeString << ":"; timeString << ":";
timeString << now->tm_min; timeString << now->tm_min;
timeString << ":"; timeString << ":";
timeString << now->tm_sec; timeString << now->tm_sec;
ilog << "Writing Image!\n"; ilog << "Writing Image!\n";
imageOutput.write(parser.getOptionValue("--output") + timeString.str(), parser.getOptionValue("--format")); imageOutput.write(parser.getOptionValue("--output") + timeString.str(), parser.getOptionValue("--format"));
#ifdef USE_MPI #ifdef USE_MPI
} }
// wait for all processes to finish sending and receiving before we exit all of them. // wait for all processes to finish sending and receiving before we exit all of them.
MPI_Barrier(MPI_COMM_WORLD); MPI_Barrier(MPI_COMM_WORLD);
#endif #endif
delete (RTSignal); delete (RTSignal);
#ifdef COMPILE_GUI #ifdef COMPILE_GUI
deleteQuad(); deleteQuad();
#endif #endif
#ifdef USE_MPI #ifdef USE_MPI
MPI_Finalize(); MPI_Finalize();
#endif #endif
tlog << "Goodbye!\n"; tlog << "Goodbye!\n";
return 0; return 0;
} }

View File

@ -44,21 +44,21 @@ namespace Raytracing {
void Camera::setRotation(const PRECISION_TYPE yaw, const PRECISION_TYPE pitch) { void Camera::setRotation(const PRECISION_TYPE yaw, const PRECISION_TYPE pitch) {
// TODO: // TODO:
} }
Mat4x4 Camera::view(PRECISION_TYPE yaw, PRECISION_TYPE pitch) { Mat4x4 Camera::view(PRECISION_TYPE yaw, PRECISION_TYPE pitch) {
Mat4x4 view; Mat4x4 view;
pitch = degreeeToRadian(pitch); pitch = degreeeToRadian(pitch);
yaw = degreeeToRadian(yaw); yaw = degreeeToRadian(yaw);
PRECISION_TYPE cosPitch = std::cos(pitch); PRECISION_TYPE cosPitch = std::cos(pitch);
PRECISION_TYPE cosYaw = std::cos(yaw); PRECISION_TYPE cosYaw = std::cos(yaw);
PRECISION_TYPE sinPitch = std::sin(pitch); PRECISION_TYPE sinPitch = std::sin(pitch);
PRECISION_TYPE sinYaw = std::sin(yaw); PRECISION_TYPE sinYaw = std::sin(yaw);
auto x = Vec4{cosYaw, 0, -sinYaw}; // forward auto x = Vec4{cosYaw, 0, -sinYaw}; // forward
auto y = Vec4{sinYaw * sinPitch, cosPitch, cosYaw * sinPitch}; // right auto y = Vec4{sinYaw * sinPitch, cosPitch, cosYaw * sinPitch}; // right
auto z = Vec4{sinYaw * cosPitch, -sinPitch, cosPitch * cosYaw}; // up auto z = Vec4{sinYaw * cosPitch, -sinPitch, cosPitch * cosYaw}; // up
// we can actually take those x, y, z vectors and use them to compute the raytracer camera settings // we can actually take those x, y, z vectors and use them to compute the raytracer camera settings
viewportHeight = 2 * tanFovHalf; viewportHeight = 2 * tanFovHalf;
viewportWidth = aspectRatio * viewportHeight; viewportWidth = aspectRatio * viewportHeight;
@ -71,23 +71,23 @@ namespace Raytracing {
view.m01(float(x.y())); view.m01(float(x.y()));
view.m02(float(x.z())); view.m02(float(x.z()));
view.m03(float(x.w())); view.m03(float(x.w()));
view.m10(float(y.x())); view.m10(float(y.x()));
view.m11(float(y.y())); view.m11(float(y.y()));
view.m12(float(y.z())); view.m12(float(y.z()));
view.m13(float(y.w())); view.m13(float(y.w()));
view.m20(float(z.x())); view.m20(float(z.x()));
view.m21(float(z.y())); view.m21(float(z.y()));
view.m22(float(z.z())); view.m22(float(z.z()));
view.m23(float(z.w())); view.m23(float(z.w()));
// view matrix are inverted, dot product to simulate translate matrix multiplication // view matrix are inverted, dot product to simulate translate matrix multiplication
view.m03(-float(Vec4::dot(x, position))); view.m03(-float(Vec4::dot(x, position)));
view.m13(-float(Vec4::dot(y, position))); view.m13(-float(Vec4::dot(y, position)));
view.m23(-float(Vec4::dot(z, position))); view.m23(-float(Vec4::dot(z, position)));
view.m33(1); view.m33(1);
return view; return view;
} }
@ -97,14 +97,14 @@ namespace Raytracing {
Vec4 color; Vec4 color;
}; };
Vec4 Raycaster::raycasti(const Ray& ray, int depth){ Vec4 RayCaster::raycasti(const Ray& ray, int depth) {
return {}; return {};
} }
Vec4 Raycaster::raycast(const Ray& ray) { Vec4 RayCaster::raycast(const Ray& ray) {
Ray localRay = ray; Ray localRay = ray;
Vec4 color {1.0, 1.0, 1.0}; Vec4 color{1.0, 1.0, 1.0};
for (int CURRENT_BOUNCE = 0; CURRENT_BOUNCE < maxBounceDepth; CURRENT_BOUNCE++){ for (int CURRENT_BOUNCE = 0; CURRENT_BOUNCE < maxBounceDepth; CURRENT_BOUNCE++) {
if (RTSignal->haltExecution || RTSignal->haltRaytracing) if (RTSignal->haltExecution || RTSignal->haltRaytracing)
return color; return color;
while (RTSignal->pauseRaytracing) // sleep for 1/60th of a second, or about 1 frame. while (RTSignal->pauseRaytracing) // sleep for 1/60th of a second, or about 1 frame.
@ -138,8 +138,8 @@ namespace Raytracing {
return color; return color;
} }
void Raycaster::runRaycastingAlgorithm(RaycasterImageBounds imageBounds, int loopX, int loopY) { void RayCaster::runRaycastingAlgorithm(RaycasterImageBounds imageBounds, int loopX, int loopY) {
try { try {
int x = imageBounds.x + loopX; int x = imageBounds.x + loopX;
int y = imageBounds.y + loopY; int y = imageBounds.y + loopY;
@ -160,9 +160,9 @@ namespace Raytracing {
flog << error.what() << "\n"; flog << error.what() << "\n";
} }
} }
void Raycaster::runSTDThread(int threads){ void RayCaster::runSTDThread(int threads) {
setupQueue(partitionScreen(threads)); setupQueue(partitionScreen(threads));
ilog << "Running std::thread\n"; ilog << "Running std::thread\n";
for (int i = 0; i < threads; i++) { for (int i = 0; i < threads; i++) {
@ -170,7 +170,7 @@ namespace Raytracing {
// run through all the quadrants // run through all the quadrants
std::stringstream str; std::stringstream str;
str << "Threading of #"; str << "Threading of #";
str << (i+1); str << (i + 1);
profiler::start("Raytracer Results", str.str()); profiler::start("Raytracer Results", str.str());
while (unprocessedQuads != nullptr) { while (unprocessedQuads != nullptr) {
RaycasterImageBounds imageBoundingData{}; RaycasterImageBounds imageBoundingData{};
@ -195,12 +195,12 @@ namespace Raytracing {
})); }));
} }
} }
void Raycaster::runOpenMP(int threads){ void RayCaster::runOpenMP(int threads) {
setupQueue(partitionScreen(threads)); setupQueue(partitionScreen(threads));
#ifdef USE_OPENMP #ifdef USE_OPENMP
ilog << "Running OpenMP\n"; ilog << "Running OpenMP\n";
#pragma omp parallel num_threads(threads+1) default(none) shared(threads) #pragma omp parallel num_threads(threads+1) default(none) shared(threads)
{ {
int threadID = omp_get_thread_num(); int threadID = omp_get_thread_num();
// an attempt at making the omp command non-blocking. // an attempt at making the omp command non-blocking.
@ -214,7 +214,7 @@ namespace Raytracing {
bool running = true; bool running = true;
while (running) { while (running) {
RaycasterImageBounds imageBoundingData{}; RaycasterImageBounds imageBoundingData{};
#pragma omp critical #pragma omp critical
{ {
if (unprocessedQuads->empty()) if (unprocessedQuads->empty())
running = false; running = false;
@ -231,18 +231,18 @@ namespace Raytracing {
} }
} }
} }
#pragma omp critical #pragma omp critical
finishedThreads++; finishedThreads++;
profiler::end("Raytracer Results", str.str()); profiler::end("Raytracer Results", str.str());
} }
} }
tlog << "OpenMP finished!\n"; tlog << "OpenMP finished!\n";
#else #else
flog << "Not compiled with OpenMP! Unable to run raytracing.\n"; flog << "Not compiled with OpenMP! Unable to run raytracing.\n";
system_threads; system_threads;
#endif #endif
} }
void Raycaster::runMPI(std::queue<RaycasterImageBounds> bounds){ void RayCaster::runMPI(std::queue<RaycasterImageBounds> bounds) {
ilog << "Running MPI\n"; ilog << "Running MPI\n";
dlog << "We have " << bounds.size() << " bounds currently pending!\n"; dlog << "We have " << bounds.size() << " bounds currently pending!\n";
while (!bounds.empty()) { while (!bounds.empty()) {
@ -254,17 +254,17 @@ namespace Raytracing {
} }
bounds.pop(); bounds.pop();
} }
#ifdef USE_MPI #ifdef USE_MPI
dlog << "Finished running MPI on " << currentProcessID << "\n"; dlog << "Finished running MPI on " << currentProcessID << "\n";
#endif #endif
} }
std::vector<RaycasterImageBounds> Raycaster::partitionScreen(int threads) { std::vector<RaycasterImageBounds> RayCaster::partitionScreen(int threads) {
// if we are running single threaded, disable everything special // if we are running single threaded, disable everything special
// the reason we run single threaded in a seperate thread is because the GUI requires its own set of updating commands // the reason we run single threaded in a seperate thread is because the GUI requires its own set of updating commands
// which cannot be blocked by the raytracer, otherwise it would become unresponsive. // which cannot be blocked by the raytracer, otherwise it would become unresponsive.
int divs = 1; int divs = 1;
if (threads < 0 || threads == 1){ if (threads < 0 || threads == 1) {
threads = 1; threads = 1;
divs = 1; divs = 1;
} else { } else {
@ -275,30 +275,30 @@ namespace Raytracing {
// to do it without a queue like this leads to most threads finishing and a single thread being the critical path which isn't optimally efficient. // to do it without a queue like this leads to most threads finishing and a single thread being the critical path which isn't optimally efficient.
divs = int(std::log(threads) / std::log(2)) * 4; divs = int(std::log(threads) / std::log(2)) * 4;
} }
ilog << "Generating multithreaded raytracer with " << threads << " threads and " << divs << " divisions! \n"; ilog << "Generating multithreaded raytracer with " << threads << " threads and " << divs << " divisions! \n";
std::vector<RaycasterImageBounds> bounds; std::vector<RaycasterImageBounds> bounds;
// we need to subdivide the image for the threads, since this is really quick it's fine to due sequentially // we need to subdivide the image for the threads, since this is really quick it's fine to due sequentially
for (int dx = 0; dx < divs; dx++) { for (int dx = 0; dx < divs; dx++) {
for (int dy = 0; dy < divs; dy++) { for (int dy = 0; dy < divs; dy++) {
bounds.push_back({ bounds.push_back({
image.getWidth() / divs, image.getWidth() / divs,
image.getHeight() / divs, image.getHeight() / divs,
(image.getWidth() / divs) * dx, (image.getWidth() / divs) * dx,
(image.getHeight() / divs) * dy (image.getHeight() / divs) * dy
}); });
} }
} }
return bounds; return bounds;
} }
void Raycaster::setupQueue(const std::vector<RaycasterImageBounds>& bounds) { void RayCaster::setupQueue(const std::vector<RaycasterImageBounds>& bounds) {
delete(unprocessedQuads); delete (unprocessedQuads);
unprocessedQuads = new std::queue<RaycasterImageBounds>(); unprocessedQuads = new std::queue<RaycasterImageBounds>();
for (auto& b : bounds) for (auto& b: bounds)
unprocessedQuads->push(b); unprocessedQuads->push(b);
} }
} }

View File

@ -7,7 +7,7 @@
#include "engine/image/stb_image.h" #include "engine/image/stb_image.h"
namespace Raytracing { namespace Raytracing {
World::~World() { World::~World() {
for (auto* p: objects) for (auto* p: objects)
delete (p); delete (p);
@ -15,7 +15,7 @@ namespace Raytracing {
delete (p.second); delete (p.second);
//delete(bvhObjects); //delete(bvhObjects);
} }
HitData SphereObject::checkIfHit(const Ray& ray, PRECISION_TYPE min, PRECISION_TYPE max) const { HitData SphereObject::checkIfHit(const Ray& ray, PRECISION_TYPE min, PRECISION_TYPE max) const {
PRECISION_TYPE radiusSquared = radius * radius; PRECISION_TYPE radiusSquared = radius * radius;
// move the ray to be with respects to the sphere // move the ray to be with respects to the sphere
@ -28,11 +28,11 @@ namespace Raytracing {
// = 0: the ray has one root, we hit the edge of the sphere // = 0: the ray has one root, we hit the edge of the sphere
// < 0: ray isn't inside the sphere. // < 0: ray isn't inside the sphere.
PRECISION_TYPE discriminant = b * b - (a * c); PRECISION_TYPE discriminant = b * b - (a * c);
// < 0: ray isn't inside the sphere. Don't need to bother calculating the roots. // < 0: ray isn't inside the sphere. Don't need to bother calculating the roots.
if (discriminant < 0) if (discriminant < 0)
return {false, Vec4(), Vec4(), 0}; return {false, Vec4(), Vec4(), 0};
// now we have to find the root which exists inside our range [min,max] // now we have to find the root which exists inside our range [min,max]
auto root = (-b - std::sqrt(discriminant)) / a; auto root = (-b - std::sqrt(discriminant)) / a;
// if the first root isn't in our range // if the first root isn't in our range
@ -48,7 +48,7 @@ namespace Raytracing {
auto RayAtRoot = ray.along(root); auto RayAtRoot = ray.along(root);
// The normal of a sphere is just the point of the hit minus the center position // The normal of a sphere is just the point of the hit minus the center position
auto normal = (RayAtRoot - position) / radius; auto normal = (RayAtRoot - position) / radius;
/*if (Raytracing::vec4::dot(ray.getDirection(), normal) > 0.0) { /*if (Raytracing::vec4::dot(ray.getDirection(), normal) > 0.0) {
tlog << "ray inside sphere\n"; tlog << "ray inside sphere\n";
} else } else
@ -60,16 +60,16 @@ namespace Raytracing {
// have to invert the v since we have to invert the v again later due to triangles // have to invert the v since we have to invert the v again later due to triangles
return {true, RayAtRoot, normal, root, u, 1.0 - v}; return {true, RayAtRoot, normal, root, u, 1.0 - v};
} }
std::pair<HitData, Object*> World::checkIfHit(const Ray& ray, PRECISION_TYPE min, PRECISION_TYPE max) const { std::pair<HitData, Object*> World::checkIfHit(const Ray& ray, PRECISION_TYPE min, PRECISION_TYPE max) const {
// actually speeds up rendering by about 110,000ms (total across 16 threads) // actually speeds up rendering by about 110,000ms (total across 16 threads)
if (bvhObjects != nullptr && m_config.useBVH){ if (bvhObjects != nullptr && m_config.useBVH) {
auto hResult = HitData{false, Vec4(), Vec4(), max}; auto hResult = HitData{false, Vec4(), Vec4(), max};
Object* objPtr = nullptr; Object* objPtr = nullptr;
auto intersected = bvhObjects->rayAnyHitIntersect(ray, min, max); auto intersected = bvhObjects->rayAnyHitIntersect(ray, min, max);
for (const auto& ptr : intersected) { for (const auto& ptr: intersected) {
auto cResult = ptr.ptr->checkIfHit(ray, min, hResult.length); auto cResult = ptr.ptr->checkIfHit(ray, min, hResult.length);
if (cResult.hit) { if (cResult.hit) {
hResult = cResult; hResult = cResult;
@ -106,21 +106,21 @@ namespace Raytracing {
return {hResult, objPtr}; return {hResult, objPtr};
} }
} }
void World::generateBVH() { void World::generateBVH() {
bvhObjects = std::make_unique<BVHTree>(objects); bvhObjects = std::make_unique<BVHTree>(objects);
} }
ScatterResults DiffuseMaterial::scatter(const Ray& ray, const HitData& hitData) const { ScatterResults DiffuseMaterial::scatter(const Ray& ray, const HitData& hitData) const {
Vec4 newRay = hitData.normal + Raytracing::Raycaster::randomUnitVector().normalize(); Vec4 newRay = hitData.normal + Raytracing::RayCaster::randomUnitVector().normalize();
// rays that are close to zero are liable to floating point precision errors // rays that are close to zero are liable to floating point precision errors
if (newRay.x() < EPSILON && newRay.y() < EPSILON && newRay.z() < EPSILON && newRay.w() < EPSILON) if (newRay.x() < EPSILON && newRay.y() < EPSILON && newRay.z() < EPSILON && newRay.w() < EPSILON)
newRay = hitData.normal; newRay = hitData.normal;
return {true, Ray{hitData.hitPoint, newRay}, getBaseColor()}; return {true, Ray{hitData.hitPoint, newRay}, getBaseColor()};
} }
ScatterResults MetalMaterial::scatter(const Ray& ray, const HitData& hitData) const { ScatterResults MetalMaterial::scatter(const Ray& ray, const HitData& hitData) const {
// create a ray reflection // create a ray reflection
Vec4 newRay = reflect(ray.getDirection().normalize(), hitData.normal); Vec4 newRay = reflect(ray.getDirection().normalize(), hitData.normal);
@ -128,22 +128,22 @@ namespace Raytracing {
bool shouldReflect = Vec4::dot(newRay, hitData.normal) > 0; bool shouldReflect = Vec4::dot(newRay, hitData.normal) > 0;
return {shouldReflect, Ray{hitData.hitPoint, newRay}, getBaseColor()}; return {shouldReflect, Ray{hitData.hitPoint, newRay}, getBaseColor()};
} }
ScatterResults BrushedMetalMaterial::scatter(const Ray& ray, const HitData& hitData) const { ScatterResults BrushedMetalMaterial::scatter(const Ray& ray, const HitData& hitData) const {
// create a ray reflection // create a ray reflection
Vec4 newRay = reflect(ray.getDirection().normalize(), hitData.normal); Vec4 newRay = reflect(ray.getDirection().normalize(), hitData.normal);
// make sure our reflected ray is outside the sphere and doesn't point inwards // make sure our reflected ray is outside the sphere and doesn't point inwards
bool shouldReflect = Vec4::dot(newRay, hitData.normal) > 0; bool shouldReflect = Vec4::dot(newRay, hitData.normal) > 0;
return {shouldReflect, Ray{hitData.hitPoint, newRay + Raycaster::randomUnitVector() * fuzzyness}, getBaseColor()}; return {shouldReflect, Ray{hitData.hitPoint, newRay + RayCaster::randomUnitVector() * fuzzyness}, getBaseColor()};
} }
ScatterResults TexturedMaterial::scatter(const Ray& ray, const HitData& hitData) const { ScatterResults TexturedMaterial::scatter(const Ray& ray, const HitData& hitData) const {
Vec4 newRay = hitData.normal + Raytracing::Raycaster::randomUnitVector().normalize(); Vec4 newRay = hitData.normal + Raytracing::RayCaster::randomUnitVector().normalize();
// rays that are close to zero are liable to floating point precision errors // rays that are close to zero are liable to floating point precision errors
if (newRay.x() < EPSILON && newRay.y() < EPSILON && newRay.z() < EPSILON && newRay.w() < EPSILON) if (newRay.x() < EPSILON && newRay.y() < EPSILON && newRay.z() < EPSILON && newRay.w() < EPSILON)
newRay = hitData.normal; newRay = hitData.normal;
return {true, Ray{hitData.hitPoint, newRay}, getColor(hitData.u, hitData.v)}; return {true, Ray{hitData.hitPoint, newRay}, getColor(hitData.u, hitData.v)};
} }
Vec4 TexturedMaterial::getColor(PRECISION_TYPE u, PRECISION_TYPE v) const { Vec4 TexturedMaterial::getColor(PRECISION_TYPE u, PRECISION_TYPE v) const {
@ -155,11 +155,11 @@ namespace Raytracing {
// fix that pesky issue of the v being rotated 90* compared to the image // fix that pesky issue of the v being rotated 90* compared to the image
v = 1.0 - clamp(v, 0.0, 1.0); v = 1.0 - clamp(v, 0.0, 1.0);
auto imageX = (int)(width * u); auto imageX = (int) (width * u);
auto imageY = (int)(height * v); auto imageY = (int) (height * v);
if (imageX >= width) imageX = width-1; if (imageX >= width) imageX = width - 1;
if (imageY >= height) imageY = height-1; if (imageY >= height) imageY = height - 1;
// since stbi loads in RGB8 [0, 255] but the engine works on [0, 1] we need to scale the data down. // since stbi loads in RGB8 [0, 255] but the engine works on [0, 1] we need to scale the data down.
// this is best done with a single division followed by multiple multiplication. // this is best done with a single division followed by multiple multiplication.
@ -169,7 +169,7 @@ namespace Raytracing {
return {pixelData[0] * colorFactor, pixelData[1] * colorFactor, pixelData[2] * colorFactor}; return {pixelData[0] * colorFactor, pixelData[1] * colorFactor, pixelData[2] * colorFactor};
} }
TexturedMaterial::TexturedMaterial(const std::string& file) : Material({}) { TexturedMaterial::TexturedMaterial(const std::string& file): Material({}) {
// we are going to have to ignore transparency for now. TODO:? // we are going to have to ignore transparency for now. TODO:?
data = stbi_load(file.c_str(), &width, &height, &channels, 0); data = stbi_load(file.c_str(), &width, &height, &channels, 0);
if (!data) if (!data)
@ -188,7 +188,7 @@ namespace Raytracing {
return baseColor; return baseColor;
} }
PRECISION_TYPE sign(PRECISION_TYPE i){ PRECISION_TYPE sign(PRECISION_TYPE i) {
return i >= 0 ? 1 : -1; return i >= 0 ? 1 : -1;
} }
@ -200,25 +200,25 @@ namespace Raytracing {
PRECISION_TYPE a, f, u, v; PRECISION_TYPE a, f, u, v;
edge1 = (theTriangle.vertex2 + position) - (theTriangle.vertex1 + position); edge1 = (theTriangle.vertex2 + position) - (theTriangle.vertex1 + position);
edge2 = (theTriangle.vertex3 + position) - (theTriangle.vertex1 + position); edge2 = (theTriangle.vertex3 + position) - (theTriangle.vertex1 + position);
h = Vec4::cross(ray.getDirection(), edge2); h = Vec4::cross(ray.getDirection(), edge2);
a = Vec4::dot(edge1, h); a = Vec4::dot(edge1, h);
if (a > -EPSILON && a < EPSILON) if (a > -EPSILON && a < EPSILON)
return {false, Vec4(), Vec4(), 0}; //parallel to triangle return {false, Vec4(), Vec4(), 0}; //parallel to triangle
f = 1.0 / a; f = 1.0 / a;
s = ray.getStartingPoint() - (theTriangle.vertex1 + position); s = ray.getStartingPoint() - (theTriangle.vertex1 + position);
u = f * Vec4::dot(s, h); u = f * Vec4::dot(s, h);
if (u < 0.0 || u > 1.0) if (u < 0.0 || u > 1.0)
return {false, Vec4(), Vec4(), 0}; return {false, Vec4(), Vec4(), 0};
q = Vec4::cross(s, edge1); q = Vec4::cross(s, edge1);
v = f * Vec4::dot(ray.getDirection(), q); v = f * Vec4::dot(ray.getDirection(), q);
if (v < 0.0 || u + v > 1.0) if (v < 0.0 || u + v > 1.0)
return {false, Vec4(), Vec4(), 0}; return {false, Vec4(), Vec4(), 0};
// At this stage we can compute t to find out where the intersection point is on the line. // At this stage we can compute t to find out where the intersection point is on the line.
PRECISION_TYPE t = f * Vec4::dot(edge2, q); PRECISION_TYPE t = f * Vec4::dot(edge2, q);
// keep t in reasonable bounds, ensuring we respect depth // keep t in reasonable bounds, ensuring we respect depth
@ -226,7 +226,7 @@ namespace Raytracing {
// ray intersects // ray intersects
Vec4 rayIntersectionPoint = ray.along(t); Vec4 rayIntersectionPoint = ray.along(t);
Vec4 normal; Vec4 normal;
// calculate triangle berry centric coords // calculate triangle berry centric coords
// first we need the vector that runs between the vertex and the intersection point for all three vertices // first we need the vector that runs between the vertex and the intersection point for all three vertices
// we must subtract the position of the triangle from the intersection point because this calc must happen in triangle space not world space. // we must subtract the position of the triangle from the intersection point because this calc must happen in triangle space not world space.
@ -234,7 +234,7 @@ namespace Raytracing {
auto vertex1ToIntersect = theTriangle.vertex1 - (rayIntersectionPoint - position); auto vertex1ToIntersect = theTriangle.vertex1 - (rayIntersectionPoint - position);
auto vertex2ToIntersect = theTriangle.vertex2 - (rayIntersectionPoint - position); auto vertex2ToIntersect = theTriangle.vertex2 - (rayIntersectionPoint - position);
auto vertex3ToIntersect = theTriangle.vertex3 - (rayIntersectionPoint - position); auto vertex3ToIntersect = theTriangle.vertex3 - (rayIntersectionPoint - position);
// the magnitude of the cross product of two vectors is double the area formed by the triangle of their intersection. // the magnitude of the cross product of two vectors is double the area formed by the triangle of their intersection.
auto fullAreaVec = Vec4::cross(theTriangle.vertex1 - theTriangle.vertex2, theTriangle.vertex1 - theTriangle.vertex3); auto fullAreaVec = Vec4::cross(theTriangle.vertex1 - theTriangle.vertex2, theTriangle.vertex1 - theTriangle.vertex3);
auto areaVert1Vec = Vec4::cross(vertex2ToIntersect, vertex3ToIntersect); auto areaVert1Vec = Vec4::cross(vertex2ToIntersect, vertex3ToIntersect);
@ -271,7 +271,7 @@ namespace Raytracing {
return {false, Vec4(), Vec4(), 0}; return {false, Vec4(), Vec4(), 0};
} }
HitData ModelObject::checkIfHit(const Ray& ray, PRECISION_TYPE min, PRECISION_TYPE max) const { HitData ModelObject::checkIfHit(const Ray& ray, PRECISION_TYPE min, PRECISION_TYPE max) const {
auto hResult = HitData{false, Vec4(), Vec4(), max}; auto hResult = HitData{false, Vec4(), Vec4(), max};
@ -280,7 +280,7 @@ namespace Raytracing {
// must check through all the triangles in the object // must check through all the triangles in the object
// respecting depth along the way // respecting depth along the way
// but reducing the max it can reach my the last longest vector length. // but reducing the max it can reach my the last longest vector length.
for (const auto& t : triangles) { for (const auto& t: triangles) {
auto cResult = checkIfTriangleGotHit(*t, position, ray, min, hResult.length); auto cResult = checkIfTriangleGotHit(*t, position, ray, min, hResult.length);
if (cResult.hit) if (cResult.hit)
hResult = cResult; hResult = cResult;

View File

@ -7,14 +7,17 @@
#include <config.h> #include <config.h>
#ifdef COMPILE_GUI #ifdef COMPILE_GUI
#define GLFW_EXPOSE_NATIVE_X11 #define GLFW_EXPOSE_NATIVE_X11
#define GLFW_EXPOSE_NATIVE_GLX #define GLFW_EXPOSE_NATIVE_GLX
#include <GLFW/glfw3.h>
#include <GLFW/glfw3native.h> #include <GLFW/glfw3.h>
#include <GLFW/glfw3native.h>
#endif #endif
#include <cstddef> #include <cstddef>
#include <utility> #include <utility>
#include "engine/math/vectors.h"
namespace Raytracing { namespace Raytracing {
@ -23,21 +26,23 @@ namespace Raytracing {
void OpenCL::init() { void OpenCL::init() {
openCl = std::make_shared<OpenCL>(0, 0); openCl = std::make_shared<OpenCL>(0, 0);
} }
OpenCL::OpenCL(int platformID, int deviceID): m_activePlatform(platformID) {
OpenCL::OpenCL(int platformID, int deviceID):
m_activePlatform(platformID) {
m_CL_ERR = CL_SUCCESS; m_CL_ERR = CL_SUCCESS;
m_numPlatforms = 0; m_numPlatforms = 0;
m_CL_ERR = clGetPlatformIDs(0, NULL, &m_numPlatforms ); m_CL_ERR = clGetPlatformIDs(0, NULL, &m_numPlatforms);
if (m_CL_ERR == CL_SUCCESS) if (m_CL_ERR == CL_SUCCESS)
dlog << "We found " << m_numPlatforms << " OpenCL Platforms.\n"; dlog << "We found " << m_numPlatforms << " OpenCL Platforms.\n";
else else
elog << "OpenCL Error! " << m_CL_ERR << "\n"; elog << "OpenCL Error! " << m_CL_ERR << "\n";
m_platformIDs = new cl_platform_id[m_numPlatforms]; m_platformIDs = new cl_platform_id[m_numPlatforms];
m_CL_ERR = clGetPlatformIDs(m_numPlatforms, m_platformIDs, &m_numOfPlatformIDs); m_CL_ERR = clGetPlatformIDs(m_numPlatforms, m_platformIDs, &m_numOfPlatformIDs);
m_CL_ERR = clGetDeviceIDs(m_platformIDs[platformID], CL_DEVICE_TYPE_GPU, 1, &m_deviceID, &m_numOfDevices); m_CL_ERR = clGetDeviceIDs(m_platformIDs[platformID], CL_DEVICE_TYPE_GPU, 1, &m_deviceID, &m_numOfDevices);
printDeviceInfo(m_deviceID); printDeviceInfo(m_deviceID);
cl_context_properties proper[3] = { cl_context_properties proper[3] = {
@ -46,7 +51,7 @@ namespace Raytracing {
}; };
m_context = clCreateContext(proper, 1, &m_deviceID, NULL, NULL, &m_CL_ERR); m_context = clCreateContext(proper, 1, &m_deviceID, NULL, NULL, &m_CL_ERR);
if (m_CL_ERR != CL_SUCCESS) if (m_CL_ERR != CL_SUCCESS)
elog << "OpenCL Error Creating Context! " << m_CL_ERR << "\n"; elog << "OpenCL Error Creating Context! " << m_CL_ERR << "\n";
@ -78,27 +83,31 @@ namespace Raytracing {
clGetDeviceInfo(device, CL_DEVICE_VERSION, sizeof(dv), &dv, NULL); clGetDeviceInfo(device, CL_DEVICE_VERSION, sizeof(dv), &dv, NULL);
dlog << "Opening OpenCL Device!\n"; dlog << "Opening OpenCL Device!\n";
dlog << "Device CL String " << dv.opencl_space << dv.major << dv.dot << dv.minor << dv.space << dv.vendor<< "\n"; dlog << "Device CL String " << dv.opencl_space << dv.major << dv.dot << dv.minor << dv.space << dv.vendor << "\n";
dlog << "Device Address Bits: " << deviceAddressBits << "\n"; dlog << "Device Address Bits: " << deviceAddressBits << "\n";
dlog << "Device is currently " << (deviceAvailable ? "available" : "unavailable") << "\n"; dlog << "Device is currently " << (deviceAvailable ? "available" : "unavailable") << "\n";
dlog << "Device has " << cacheSize/1024 << "kb of cache with a cache line width of " << cacheLineSize << " bytes\n"; dlog << "Device has " << cacheSize / 1024 << "kb of cache with a cache line width of " << cacheLineSize << " bytes\n";
dlog << "Device " << (textureSupport ? "has" : "doesn't have") << " texture support\n"; dlog << "Device " << (textureSupport ? "has" : "doesn't have") << " texture support\n";
dlog << "Device has " << maxWorkgroups << " max workgroup size.\n"; dlog << "Device has " << maxWorkgroups << " max workgroup size.\n";
dlog << "Device has " << m_computeUnits << " compute units running at a max clock frequency " << m_deviceClockFreq << "\n"; dlog << "Device has " << m_computeUnits << " compute units running at a max clock frequency " << m_deviceClockFreq << "\n";
if (!textureSupport) if (!textureSupport)
elog << "Warning! The OpenCL device lacks texture support!\n"; elog << "Warning! The OpenCL device lacks texture support!\n";
} }
void OpenCL::createCLProgram(CLProgram& program) { void OpenCL::createCLProgram(CLProgram& program) {
program.loadCLShader(openCl->m_context, openCl->m_deviceID); program.loadCLShader(openCl->m_context, openCl->m_deviceID);
} }
OpenCL::~OpenCL() { OpenCL::~OpenCL() {
delete[](m_platformIDs); delete[](m_platformIDs);
clReleaseDevice(m_deviceID); clReleaseDevice(m_deviceID);
clReleaseContext(m_context); clReleaseContext(m_context);
} }
cl_uint OpenCL::activeDeviceComputeUnits() { cl_uint OpenCL::activeDeviceComputeUnits() {
return openCl->m_computeUnits; return openCl->m_computeUnits;
} }
cl_uint OpenCL::activeDeviceFrequency() { cl_uint OpenCL::activeDeviceFrequency() {
return openCl->m_deviceClockFreq; return openCl->m_deviceClockFreq;
} }
@ -107,6 +116,7 @@ namespace Raytracing {
CLProgram::CLProgram(const std::string& file) { CLProgram::CLProgram(const std::string& file) {
m_source = ShaderLoader::loadShaderFile(file); m_source = ShaderLoader::loadShaderFile(file);
} }
void CLProgram::loadCLShader(cl_context context, cl_device_id deviceID) { void CLProgram::loadCLShader(cl_context context, cl_device_id deviceID) {
this->m_context = context; this->m_context = context;
this->m_deviceID = deviceID; this->m_deviceID = deviceID;
@ -118,49 +128,41 @@ namespace Raytracing {
if (m_CL_ERR != CL_SUCCESS) if (m_CL_ERR != CL_SUCCESS)
elog << "Unable to create CL program!\n"; elog << "Unable to create CL program!\n";
m_CL_ERR = clBuildProgram(m_program, 1, &deviceID, NULL, NULL, NULL); m_CL_ERR = clBuildProgram(m_program, 1, &deviceID, NULL, NULL, NULL);
if (m_CL_ERR != CL_SUCCESS) { if (m_CL_ERR != CL_SUCCESS) {
elog << "Unable to build CL program!\n"; elog << "Unable to build CL program!\n";
size_t len; size_t len;
clGetProgramBuildInfo(m_program, m_deviceID, CL_PROGRAM_BUILD_LOG, 0, NULL, &len); clGetProgramBuildInfo(m_program, m_deviceID, CL_PROGRAM_BUILD_LOG, 0, NULL, &len);
char buffer[len]; char buffer[len];
clGetProgramBuildInfo(m_program, m_deviceID, CL_PROGRAM_BUILD_LOG, len, buffer, NULL); clGetProgramBuildInfo(m_program, m_deviceID, CL_PROGRAM_BUILD_LOG, len, buffer, NULL);
elog << buffer << "\n"; elog << buffer << "\n";
} }
} }
/**
* Buffers are the quintessential datastructures in OpenCL. They are basically regions of memory allocated to a program.
* @param bufferName the name of the buffer used to store internally
* @param flags read write flags for the buffer. One of CL_MEM_READ_ONLY | CL_MEM_WRITE_ONLY | CL_MEM_READ_WRITE
* @param bytes the number of bytes to be allocated.
*/
void CLProgram::createBuffer(const std::string& bufferName, cl_mem_flags flags, size_t bytes) { void CLProgram::createBuffer(const std::string& bufferName, cl_mem_flags flags, size_t bytes) {
// create the buffer on the GPU // create the buffer on the GPU
cl_mem buff = clCreateBuffer(m_context, flags, bytes, NULL, &m_CL_ERR); cl_mem buff = clCreateBuffer(m_context, flags, bytes, NULL, &m_CL_ERR);
// then store it in our buffer map for easy access. // then store it in our buffer map for easy access.
buffers.insert({bufferName, buff}); buffers.insert({bufferName, buff});
} }
/**
* Creates a buffer on the GPU using the data pointed to by the supplied pointer. This copy happens as soon as this is called.
* @param bufferName the name of the buffer used to store internally
* @param flags One of CL_MEM_READ_ONLY | CL_MEM_WRITE_ONLY | CL_MEM_READ_WRITE
* @param bytes the number of bytes to be allocated. Must be less than equal to the number of bytes at ptr
* @param ptr the pointer to copy to the GPU.
*/
void CLProgram::createBuffer(const std::string& bufferName, cl_mem_flags flags, size_t bytes, void* ptr) { void CLProgram::createBuffer(const std::string& bufferName, cl_mem_flags flags, size_t bytes, void* ptr) {
// create the buffer on the GPU // create the buffer on the GPU
cl_mem buff = clCreateBuffer(m_context, CL_MEM_COPY_HOST_PTR | flags, bytes, ptr, &m_CL_ERR); cl_mem buff = clCreateBuffer(m_context, CL_MEM_COPY_HOST_PTR | flags, bytes, ptr, &m_CL_ERR);
// then store it in our buffer map for easy access. // then store it in our buffer map for easy access.
buffers.insert({bufferName, buff}); buffers.insert({bufferName, buff});
} }
void CLProgram::createImage(const std::string& imageName, int width, int height) { void CLProgram::createImage(const std::string& imageName, int width, int height) {
// create the texture on the GPU // create the texture on the GPU
cl_image_format format {CL_RGBA, CL_UNORM_INT8}; cl_image_format format{CL_RGBA, CL_UNORM_INT8};
cl_image_desc imageDesc; cl_image_desc imageDesc;
imageDesc.image_type = CL_MEM_OBJECT_IMAGE2D; imageDesc.image_type = CL_MEM_OBJECT_IMAGE2D;
imageDesc.image_width = width; imageDesc.image_width = width;
@ -171,10 +173,10 @@ namespace Raytracing {
imageDesc.num_samples = 0; imageDesc.num_samples = 0;
imageDesc.buffer = NULL; imageDesc.buffer = NULL;
cl_mem tex = clCreateImage(m_context, CL_MEM_READ_WRITE, &format, &imageDesc, NULL, &m_CL_ERR); cl_mem tex = clCreateImage(m_context, CL_MEM_READ_WRITE, &format, &imageDesc, NULL, &m_CL_ERR);
if (m_CL_ERR != CL_SUCCESS){ if (m_CL_ERR != CL_SUCCESS) {
elog << "Unable to create image texture!\n"; elog << "Unable to create image texture!\n";
checkBasicErrors(); checkBasicErrors();
switch (m_CL_ERR){ switch (m_CL_ERR) {
case CL_INVALID_VALUE: case CL_INVALID_VALUE:
elog << "\tFlags are not valid!\n"; elog << "\tFlags are not valid!\n";
// this is straight from the docs // this is straight from the docs
@ -208,63 +210,37 @@ namespace Raytracing {
// then store it in our buffer map for easy access. // then store it in our buffer map for easy access.
buffers.insert({imageName, tex}); buffers.insert({imageName, tex});
} }
/**
* Kernels are the entry points in OpenCL. You can have multiple of them in a single program.
* @param kernelName both the name of the kernel function in the source and the reference to the kernel object used in other functions in this class.
*/
void CLProgram::createKernel(const std::string& kernelName) { void CLProgram::createKernel(const std::string& kernelName) {
auto kernel = clCreateKernel(m_program, kernelName.c_str(), &m_CL_ERR); auto kernel = clCreateKernel(m_program, kernelName.c_str(), &m_CL_ERR);
if (m_CL_ERR != CL_SUCCESS) if (m_CL_ERR != CL_SUCCESS)
elog << "Unable to create CL kernel " << kernelName << "!\n"; elog << "Unable to create CL kernel " << kernelName << "!\n";
kernels.insert({kernelName, kernel}); kernels.insert({kernelName, kernel});
} }
/**
* Allows you to bind certain buffers to a specific index in the kernel's argument list.
* @param kernel kernel to bind to
* @param buffer buffer to bind to argIndex
* @param argIndex the index of the argument for this buffer.
*/
void CLProgram::setKernelArgument(const std::string& kernel, const std::string& buffer, int argIndex) { void CLProgram::setKernelArgument(const std::string& kernel, const std::string& buffer, int argIndex) {
m_CL_ERR = clSetKernelArg(kernels[kernel], argIndex, sizeof(cl_mem), (void*) &buffers[buffer]); m_CL_ERR = clSetKernelArg(kernels[kernel], argIndex, sizeof(cl_mem), (void*) &buffers[buffer]);
if (m_CL_ERR != CL_SUCCESS) if (m_CL_ERR != CL_SUCCESS)
elog << "Unable to bind argument " << buffer << " to CL kernel " << kernel << "!\n"; elog << "Unable to bind argument " << buffer << " to CL kernel " << kernel << "!\n";
} }
/**
* Enqueues a write command to the buffer specified by the buffer name,
* @param buffer the buffer to write to
* @param bytes the number of bytes to be copied
* @param ptr the pointer to copy from. Must have at least bytes available
* @param blocking should this function wait for the bytes to be uploaded to the GPU?
* @param offset offset in the buffer object to write to
*/
void CLProgram::writeBuffer(const std::string& buffer, size_t bytes, void* ptr, cl_bool blocking, size_t offset) { void CLProgram::writeBuffer(const std::string& buffer, size_t bytes, void* ptr, cl_bool blocking, size_t offset) {
m_CL_ERR = clEnqueueWriteBuffer(m_commandQueue, buffers[buffer], blocking, offset, bytes, ptr, 0, NULL, NULL); m_CL_ERR = clEnqueueWriteBuffer(m_commandQueue, buffers[buffer], blocking, offset, bytes, ptr, 0, NULL, NULL);
if (m_CL_ERR != CL_SUCCESS) if (m_CL_ERR != CL_SUCCESS)
elog << "Unable to enqueue write to " << buffer << " buffer!\n"; elog << "Unable to enqueue write to " << buffer << " buffer!\n";
} }
/**
* Enqueues a read command from the buffered specified by the buffer name.
* Defaults to blocking but can be set to be non-blocking.
* @param buffer buffer to read from
* @param bytes the number of bytes to read. Make sure ptr has at least those bytes available.
* @param ptr the ptr to write the read bytes to.
* @param blocking should we wait for the read or do it async?
* @param offset offset in the buffer to read from.
*/
void CLProgram::readBuffer(const std::string& buffer, size_t bytes, void* ptr, cl_bool blocking, size_t offset) { void CLProgram::readBuffer(const std::string& buffer, size_t bytes, void* ptr, cl_bool blocking, size_t offset) {
m_CL_ERR = clEnqueueReadBuffer(m_commandQueue, buffers[buffer], blocking, offset, bytes, ptr, 0, NULL, NULL); m_CL_ERR = clEnqueueReadBuffer(m_commandQueue, buffers[buffer], blocking, offset, bytes, ptr, 0, NULL, NULL);
if (m_CL_ERR != CL_SUCCESS) if (m_CL_ERR != CL_SUCCESS)
elog << "Unable to enqueue read from " << buffer << " buffer!\n"; elog << "Unable to enqueue read from " << buffer << " buffer!\n";
} }
/**
* Issues all previously queued OpenCL commands in a command-queue to the device associated with the command-queue.
*/
void CLProgram::flushCommands() { void CLProgram::flushCommands() {
clFlush(m_commandQueue); clFlush(m_commandQueue);
} }
/**
* Blocks until all previously queued OpenCL commands in a command-queue are issued to the associated device and have completed.
*/
void CLProgram::finishCommands() { void CLProgram::finishCommands() {
flushCommands(); flushCommands();
clFinish(m_commandQueue); clFinish(m_commandQueue);
@ -274,13 +250,14 @@ namespace Raytracing {
m_CL_ERR = clEnqueueNDRangeKernel(m_commandQueue, kernels[kernel], 1, globalWorkOffset, &globalWorkSize, &localWorkSize, 0, NULL, NULL); m_CL_ERR = clEnqueueNDRangeKernel(m_commandQueue, kernels[kernel], 1, globalWorkOffset, &globalWorkSize, &localWorkSize, 0, NULL, NULL);
} }
void CLProgram::runKernel(const std::string& kernel, size_t* globalWorkSize, size_t* localWorkSize, cl_uint workDim, const size_t* globalWorkOffset) { void
CLProgram::runKernel(const std::string& kernel, size_t* globalWorkSize, size_t* localWorkSize, cl_uint workDim, const size_t* globalWorkOffset) {
m_CL_ERR = clEnqueueNDRangeKernel(m_commandQueue, kernels[kernel], workDim, globalWorkOffset, globalWorkSize, localWorkSize, 0, NULL, NULL); m_CL_ERR = clEnqueueNDRangeKernel(m_commandQueue, kernels[kernel], workDim, globalWorkOffset, globalWorkSize, localWorkSize, 0, NULL, NULL);
} }
void CLProgram::readImage(const std::string& imageName, size_t width, size_t height, void* ptr, cl_bool blocking, size_t x, size_t y) { void CLProgram::readImage(const std::string& imageName, size_t width, size_t height, void* ptr, cl_bool blocking, size_t x, size_t y) {
size_t origin[3] {x, y, 0}; size_t origin[3]{x, y, 0};
size_t region[3] {width, height, 1}; size_t region[3]{width, height, 1};
m_CL_ERR = clEnqueueReadImage(m_commandQueue, buffers[imageName], blocking, origin, region, 0, 0, ptr, 0, NULL, NULL); m_CL_ERR = clEnqueueReadImage(m_commandQueue, buffers[imageName], blocking, origin, region, 0, 0, ptr, 0, NULL, NULL);
if (m_CL_ERR != CL_SUCCESS) { if (m_CL_ERR != CL_SUCCESS) {
elog << "Unable to enqueue read from " << imageName << " image:\n"; elog << "Unable to enqueue read from " << imageName << " image:\n";
@ -298,15 +275,16 @@ namespace Raytracing {
CLProgram::~CLProgram() { CLProgram::~CLProgram() {
finishCommands(); finishCommands();
for (const auto& kernel : kernels) for (const auto& kernel: kernels)
clReleaseKernel(kernel.second); clReleaseKernel(kernel.second);
clReleaseProgram(m_program); clReleaseProgram(m_program);
for (const auto& buffer : buffers) for (const auto& buffer: buffers)
clReleaseMemObject(buffer.second); clReleaseMemObject(buffer.second);
clReleaseCommandQueue(m_commandQueue); clReleaseCommandQueue(m_commandQueue);
} }
void CLProgram::checkBasicErrors() const { void CLProgram::checkBasicErrors() const {
switch (m_CL_ERR){ switch (m_CL_ERR) {
case CL_OUT_OF_HOST_MEMORY: case CL_OUT_OF_HOST_MEMORY:
elog << "\tHost is out of memory!\n"; elog << "\tHost is out of memory!\n";
break; break;
@ -331,4 +309,23 @@ namespace Raytracing {
} }
} }
void CLProgram::readImage(const std::string& imageName, Image& image) {
// Create an array for copying bytes from the GPU to the CPU with
unsigned char bytes[image.getWidth() * image.getHeight() * 4];
for (int i = 0; i < image.getWidth() * image.getHeight() * 4; i++)
bytes[i] = 0;
// Call to OpenCL to read the image from the GPU and send it to the location of our buffer
this->readImage(imageName, image.getWidth(), image.getHeight(), bytes);
// multiplication is much faster than division,
// so to speed up the function precalculate the color multiplier used to convert from RGBA8 format to floating point format used by the raytracer.
const PRECISION_TYPE colorFactor = 1.0 / 255.0;
// copy the data from the buffer into the image by calculating its byte offset based on its pixel position.
for (int i = 0; i < image.getWidth(); i++) {
for (int j = 0; j < image.getHeight(); j++) {
const auto pixelData = bytes + (j * 4 * image.getWidth() + i * 4);
image.setPixelColor(i, j, {pixelData[0] * colorFactor, pixelData[1] * colorFactor, pixelData[2] * colorFactor});
}
}
}
} }