diff --git a/Step 3/.idea/codeStyles/Project.xml b/Step 3/.idea/codeStyles/Project.xml
new file mode 100644
index 0000000..a7819ce
--- /dev/null
+++ b/Step 3/.idea/codeStyles/Project.xml	
@@ -0,0 +1,73 @@
+<component name="ProjectCodeStyleConfiguration">
+  <code_scheme name="Project" version="173">
+    <option name="RIGHT_MARGIN" value="150" />
+    <Objective-C>
+      <option name="INDENT_CLASS_MEMBERS" value="8" />
+      <option name="INDENT_VISIBILITY_KEYWORDS" value="4" />
+      <option name="INDENT_PREPROCESSOR_DIRECTIVE" value="4" />
+      <option name="FUNCTION_NON_TOP_AFTER_RETURN_TYPE_WRAP" value="0" />
+      <option name="FUNCTION_TOP_AFTER_RETURN_TYPE_WRAP" value="0" />
+      <option name="FUNCTION_PARAMETERS_NEW_LINE_AFTER_LPAR" value="true" />
+      <option name="FUNCTION_PARAMETERS_NEW_LINE_BEFORE_RPAR" value="true" />
+      <option name="LAMBDA_CAPTURE_LIST_ALIGN_MULTILINE" value="true" />
+      <option name="STRUCTURED_BINDING_LIST_ALIGN_MULTILINE" value="true" />
+      <option name="FUNCTION_CALL_ARGUMENTS_NEW_LINE_AFTER_LPAR" value="true" />
+      <option name="FUNCTION_CALL_ARGUMENTS_NEW_LINE_BEFORE_RPAR" value="true" />
+      <option name="CLASS_CONSTRUCTOR_INIT_LIST_NEW_LINE_BEFORE_COLON" value="0" />
+      <option name="CLASS_CONSTRUCTOR_INIT_LIST_NEW_LINE_AFTER_COLON" value="1" />
+      <option name="SUPERCLASS_LIST_BEFORE_COLON" value="0" />
+      <option name="SPACE_BEFORE_INIT_LIST_COLON" value="false" />
+      <option name="SPACE_BEFORE_POINTER_IN_DECLARATION" value="false" />
+      <option name="SPACE_AFTER_POINTER_IN_DECLARATION" value="true" />
+      <option name="SPACE_BEFORE_REFERENCE_IN_DECLARATION" value="false" />
+      <option name="SPACE_AFTER_REFERENCE_IN_DECLARATION" value="true" />
+    </Objective-C>
+    <Objective-C-extensions>
+      <rules>
+        <rule entity="NAMESPACE" visibility="ANY" specifier="ANY" prefix="" style="CAMEL_CASE" suffix="" />
+        <rule entity="MACRO" visibility="ANY" specifier="ANY" prefix="" style="SCREAMING_SNAKE_CASE" suffix="" />
+        <rule entity="CLASS" visibility="ANY" specifier="ANY" prefix="" style="PASCAL_CASE" suffix="" />
+        <rule entity="STRUCT" visibility="ANY" specifier="ANY" prefix="" style="CAMEL_CASE" suffix="" />
+        <rule entity="ENUM" visibility="ANY" specifier="ANY" prefix="" style="CAMEL_CASE" suffix="" />
+        <rule entity="ENUMERATOR" visibility="ANY" specifier="ANY" prefix="" style="PASCAL_CASE" suffix="" />
+        <rule entity="TYPEDEF" visibility="ANY" specifier="ANY" prefix="" style="CAMEL_CASE" suffix="" />
+        <rule entity="UNION" visibility="ANY" specifier="ANY" prefix="" style="CAMEL_CASE" suffix="" />
+        <rule entity="CLASS_MEMBER_FUNCTION" visibility="ANY" specifier="ANY" prefix="" style="CAMEL_CASE" suffix="" />
+        <rule entity="STRUCT_MEMBER_FUNCTION" visibility="ANY" specifier="ANY" prefix="" style="CAMEL_CASE" suffix="" />
+        <rule entity="CLASS_MEMBER_FIELD" visibility="ANY" specifier="ANY" prefix="" style="CAMEL_CASE" suffix="" />
+        <rule entity="STRUCT_MEMBER_FIELD" visibility="ANY" specifier="ANY" prefix="" style="CAMEL_CASE" suffix="" />
+        <rule entity="GLOBAL_FUNCTION" visibility="ANY" specifier="ANY" prefix="" style="CAMEL_CASE" suffix="" />
+        <rule entity="GLOBAL_VARIABLE" visibility="ANY" specifier="ANY" prefix="" style="CAMEL_CASE" suffix="" />
+        <rule entity="PARAMETER" visibility="ANY" specifier="ANY" prefix="" style="CAMEL_CASE" suffix="" />
+        <rule entity="LOCAL_VARIABLE" visibility="ANY" specifier="ANY" prefix="" style="CAMEL_CASE" suffix="" />
+      </rules>
+    </Objective-C-extensions>
+    <Objective-C-extensions>
+      <rules>
+        <rule entity="NAMESPACE" visibility="ANY" specifier="ANY" prefix="" style="CAMEL_CASE" suffix="" />
+        <rule entity="MACRO" visibility="ANY" specifier="ANY" prefix="" style="SCREAMING_SNAKE_CASE" suffix="" />
+        <rule entity="CLASS" visibility="ANY" specifier="ANY" prefix="" style="PASCAL_CASE" suffix="" />
+        <rule entity="STRUCT" visibility="ANY" specifier="ANY" prefix="" style="CAMEL_CASE" suffix="" />
+        <rule entity="ENUM" visibility="ANY" specifier="ANY" prefix="" style="CAMEL_CASE" suffix="" />
+        <rule entity="ENUMERATOR" visibility="ANY" specifier="ANY" prefix="" style="PASCAL_CASE" suffix="" />
+        <rule entity="TYPEDEF" visibility="ANY" specifier="ANY" prefix="" style="CAMEL_CASE" suffix="" />
+        <rule entity="UNION" visibility="ANY" specifier="ANY" prefix="" style="CAMEL_CASE" suffix="" />
+        <rule entity="CLASS_MEMBER_FUNCTION" visibility="ANY" specifier="ANY" prefix="" style="CAMEL_CASE" suffix="" />
+        <rule entity="STRUCT_MEMBER_FUNCTION" visibility="ANY" specifier="ANY" prefix="" style="CAMEL_CASE" suffix="" />
+        <rule entity="CLASS_MEMBER_FIELD" visibility="ANY" specifier="ANY" prefix="" style="CAMEL_CASE" suffix="" />
+        <rule entity="STRUCT_MEMBER_FIELD" visibility="ANY" specifier="ANY" prefix="" style="CAMEL_CASE" suffix="" />
+        <rule entity="GLOBAL_FUNCTION" visibility="ANY" specifier="ANY" prefix="" style="CAMEL_CASE" suffix="" />
+        <rule entity="GLOBAL_VARIABLE" visibility="ANY" specifier="ANY" prefix="" style="CAMEL_CASE" suffix="" />
+        <rule entity="PARAMETER" visibility="ANY" specifier="ANY" prefix="" style="CAMEL_CASE" suffix="" />
+        <rule entity="LOCAL_VARIABLE" visibility="ANY" specifier="ANY" prefix="" style="CAMEL_CASE" suffix="" />
+      </rules>
+    </Objective-C-extensions>
+    <codeStyleSettings language="ObjectiveC">
+      <option name="RIGHT_MARGIN" value="150" />
+      <indentOptions>
+        <option name="LABEL_INDENT_SIZE" value="-4" />
+        <option name="KEEP_INDENTS_ON_EMPTY_LINES" value="true" />
+      </indentOptions>
+    </codeStyleSettings>
+  </code_scheme>
+</component>
\ No newline at end of file
diff --git a/Step 3/.idea/codeStyles/codeStyleConfig.xml b/Step 3/.idea/codeStyles/codeStyleConfig.xml
index a55e7a1..79ee123 100644
--- a/Step 3/.idea/codeStyles/codeStyleConfig.xml	
+++ b/Step 3/.idea/codeStyles/codeStyleConfig.xml	
@@ -1,5 +1,5 @@
 <component name="ProjectCodeStyleConfiguration">
   <state>
-    <option name="PREFERRED_PROJECT_CODE_STYLE" value="Default" />
+    <option name="USE_PER_PROJECT_SETTINGS" value="true" />
   </state>
 </component>
\ No newline at end of file
diff --git a/Step 3/.idea/copyright/GPL3.xml b/Step 3/.idea/copyright/GPL3.xml
new file mode 100644
index 0000000..b28474b
--- /dev/null
+++ b/Step 3/.idea/copyright/GPL3.xml	
@@ -0,0 +1,5 @@
+<component name="CopyrightManager">
+  <copyright>
+    <option name="myName" value="GPL3" />
+  </copyright>
+</component>
\ No newline at end of file
diff --git a/Step 3/.idea/scopes/GPL_3.xml b/Step 3/.idea/scopes/GPL_3.xml
new file mode 100644
index 0000000..f921103
--- /dev/null
+++ b/Step 3/.idea/scopes/GPL_3.xml	
@@ -0,0 +1,3 @@
+<component name="DependencyValidationManager">
+  <scope name="GPL_3" pattern="file[Step 2]:*/&amp;&amp;!file[Step 2]:.idea//*&amp;&amp;!file[Step 2]:Old//*&amp;&amp;!file[Step 2]:Old 2//*&amp;&amp;!file[Step 2]:resources//*&amp;&amp;!file:cmake_make_command.sh&amp;&amp;!file:CMakeLists.txt&amp;&amp;!file:Dev Log Week of 2022-10-23.odt&amp;&amp;!file:st3-rd-debug.cap&amp;&amp;!file:st3-rd-release.cap&amp;&amp;!file:Submission.zip" />
+</component>
\ No newline at end of file
diff --git a/Step 3/.idea/scopes/GPL_3_LOCAL.xml b/Step 3/.idea/scopes/GPL_3_LOCAL.xml
new file mode 100644
index 0000000..0da52bc
--- /dev/null
+++ b/Step 3/.idea/scopes/GPL_3_LOCAL.xml	
@@ -0,0 +1,3 @@
+<component name="DependencyValidationManager">
+  <scope name="GPL_3_LOCAL" pattern="file[Step 2]:*/&amp;&amp;!file[Step 2]:.idea//*&amp;&amp;!file[Step 2]:Old//*&amp;&amp;!file[Step 2]:Old 2//*&amp;&amp;!file[Step 2]:resources//*&amp;&amp;!file:cmake_make_command.sh&amp;&amp;!file:CMakeLists.txt&amp;&amp;!file:Dev Log Week of 2022-10-23.odt&amp;&amp;!file:st3-rd-debug.cap&amp;&amp;!file:st3-rd-release.cap&amp;&amp;!file:Submission.zip" />
+</component>
\ No newline at end of file
diff --git a/Step 3/include/engine/raytracing.h b/Step 3/include/engine/raytracing.h
index d39ffd8..e18290e 100644
--- a/Step 3/include/engine/raytracing.h	
+++ b/Step 3/include/engine/raytracing.h	
@@ -19,13 +19,13 @@
 #include <queue>
 
 namespace Raytracing {
-
+    
     class Camera {
         private:
             /* Image details */
             const Image image;
             const PRECISION_TYPE aspectRatio;
-
+            
             /* Camera details */
             PRECISION_TYPE viewportHeight;
             PRECISION_TYPE viewportWidth;
@@ -35,14 +35,14 @@ namespace Raytracing {
             const PRECISION_TYPE FAR_PLANE = 500;
             PRECISION_TYPE tanFovHalf;
             PRECISION_TYPE frustumLength;
-
+            
             Vec4 position{0, 0, 0};
             Vec4 horizontalAxis;
             Vec4 verticalAxis;
             Vec4 imageOrigin;
             
-            Vec4 up {0, 1, 0};
-            
+            Vec4 up{0, 1, 0};
+        
         public:
             Camera(PRECISION_TYPE fov, const Image& image): image(image),
                                                             aspectRatio(double(image.getWidth()) / double(image.getHeight())) {
@@ -58,20 +58,20 @@ namespace Raytracing {
                 verticalAxis = (Vec4{0, viewportHeight, 0, 0});
                 // lower left of the camera's view port. used to project our vectors from image space to world space
                 imageOrigin = (position - horizontalAxis / 2 - verticalAxis / 2 - Vec4(0, 0, focalLength, 0));
-
+                
                 tlog << viewportHeight << "\n";
                 tlog << viewportWidth << "\n";
                 tlog << "\n";
                 tlog << horizontalAxis << "\n";
                 tlog << verticalAxis << "\n";
                 tlog << imageOrigin << "\n";
-
+                
             }
-
+            
             Ray projectRay(PRECISION_TYPE x, PRECISION_TYPE y);
-
+            
             void setPosition(const Vec4& pos) { this->position = pos; }
-
+            
             void setRotation(PRECISION_TYPE yaw, PRECISION_TYPE pitch);
             
             // the follow utility functions are actually taking forever to get right
@@ -79,7 +79,7 @@ namespace Raytracing {
             // got to install GLM to test which function works and which does. Maybe they are both bad. or Maybe it's my matrix impl
             // or maybe the whole rendering stack sucks
             [[nodiscard]] Mat4x4 project() const {
-                Mat4x4 project {emptyMatrix};
+                Mat4x4 project{emptyMatrix};
                 
                 // this should be all it takes to create a mostly correct projection matrix
                 project.m00(float(1.0 / (aspectRatio * tanFovHalf)));
@@ -106,12 +106,12 @@ namespace Raytracing {
                 view.m01(float(w.y()));
                 view.m02(float(w.z()));
                 view.m03(float(w.w()));
-    
+                
                 view.m10(float(u.x()));
                 view.m11(float(u.y()));
                 view.m12(float(u.z()));
                 view.m13(float(u.w()));
-    
+                
                 view.m20(float(v.x()));
                 view.m21(float(v.y()));
                 view.m22(float(v.z()));
@@ -126,31 +126,31 @@ namespace Raytracing {
                 return view;
             }
             Mat4x4 view(PRECISION_TYPE yaw, PRECISION_TYPE pitch);
-
-            [[nodiscard]] inline Vec4 getPosition() const {return position;};
-        
+            
+            [[nodiscard]] inline Vec4 getPosition() const { return position; };
+            
             // the camera's position must be set with setPosition(Vec4);
             // uses an internal up vector, assumed to be {0, 1, 0}
             // will make the camera look at provided position with respects to the current camera position.
             void lookAt(const Vec4& lookAtPos);
     };
-
+    
     static Random rnd{-1.0, 1.0};
-
+    
     struct RaycasterImageBounds {
-        int width,height, x,y;
+        int width, height, x, y;
     };
-
-    class Raycaster {
+    
+    class RayCaster {
         private:
             int maxBounceDepth = 50;
             int raysPerPixel = 50;
-
+            
             Camera& camera;
             Image& image;
             World& world;
             
-            std::vector<std::unique_ptr<std::thread>> executors {};
+            std::vector<std::unique_ptr<std::thread>> executors{};
             // is the raytracer still running?
             bool stillRunning = true;
             unsigned int finishedThreads = 0;
@@ -159,17 +159,17 @@ namespace Raytracing {
             // and compared to the actual runtime of the raytracing it's very small!
             std::mutex queueSync;
             std::queue<RaycasterImageBounds>* unprocessedQuads = nullptr;
-
+            
             Vec4 raycasti(const Ray& ray, int depth);
             Vec4 raycast(const Ray& ray);
             void runRaycastingAlgorithm(RaycasterImageBounds imageBounds, int loopX, int loopY);
             void setupQueue(const std::vector<RaycasterImageBounds>& bounds);
         public:
-            inline void updateRayInfo(int maxBounce, int perPixel){
+            inline void updateRayInfo(int maxBounce, int perPixel) {
                 raysPerPixel = perPixel;
                 maxBounceDepth = maxBounce;
             }
-            inline void resetRayInfo(){
+            inline void resetRayInfo() {
                 raysPerPixel = 50;
                 maxBounceDepth = 50;
             }
@@ -189,33 +189,33 @@ namespace Raytracing {
                 // the second creates better results but is 18% slower (better defined shadows)
                 // likely due to not over generating unit vectors biased towards the corners
             }
-            Raycaster(Camera& c, Image& i, World& world, const Parser& p): camera(c), image(i), world(world) {
+            RayCaster(Camera& c, Image& i, World& world, const Parser& p): camera(c), image(i), world(world) {
                 world.generateBVH();
             }
             void runSTDThread(int threads = -1);
             void runOpenMP(int threads = -1);
             void runMPI(std::queue<RaycasterImageBounds> bounds);
-            [[nodiscard]] inline bool areThreadsStillRunning() const {return finishedThreads == executors.size();}
-            inline void join(){
-                for (auto& p : executors)
+            [[nodiscard]] inline bool areThreadsStillRunning() const { return finishedThreads == executors.size(); }
+            inline void join() {
+                for (auto& p: executors)
                     p->join();
             }
-            void deleteThreads(){
-                for (auto& p : executors){
+            void deleteThreads() {
+                for (auto& p: executors) {
                     // wait for all threads to exit before trying to delete them.
                     try {
                         if (p->joinable())
                             p->join();
-                    } catch (std::exception& e){}
+                    } catch (std::exception& e) {}
                 }
                 // since executors contains the only reference to the unique_ptr it will be deleted automatically
                 executors.clear();
             }
-            ~Raycaster() {
+            ~RayCaster() {
                 deleteThreads();
-                delete(unprocessedQuads);
+                delete (unprocessedQuads);
             }
     };
-
+    
 }
 #endif //STEP_2_RAYTRACING_H
diff --git a/Step 3/include/graphics/graphics.h b/Step 3/include/graphics/graphics.h
index f1dd447..abcabb4 100644
--- a/Step 3/include/graphics/graphics.h	
+++ b/Step 3/include/graphics/graphics.h	
@@ -42,8 +42,8 @@ namespace Raytracing {
     
     void drawQuad();
     void deleteQuad();
-    
-    #ifdef USE_GLFW
+
+#ifdef USE_GLFW
     
     class XWindow {
         private:
@@ -52,7 +52,7 @@ namespace Raytracing {
             bool isCloseRequested = false;
             long lastFrameTime{};
             PRECISION_TYPE delta{};
-            PRECISION_TYPE frameTimeMs{},frameTimeS{};
+            PRECISION_TYPE frameTimeMs{}, frameTimeS{};
             PRECISION_TYPE fps{};
         public:
             XWindow(int width, int height);
@@ -62,21 +62,21 @@ namespace Raytracing {
             
             [[nodiscard]] inline bool shouldWindowClose() const { return isCloseRequested; }
             
-            [[nodiscard]] inline PRECISION_TYPE getFrameTimeMillis() const {return frameTimeMs;}
-            [[nodiscard]] inline PRECISION_TYPE getFrameTimeSeconds() const {return frameTimeS;}
-            [[nodiscard]] inline PRECISION_TYPE getFPS() const {return fps;}
+            [[nodiscard]] inline PRECISION_TYPE getFrameTimeMillis() const { return frameTimeMs; }
+            [[nodiscard]] inline PRECISION_TYPE getFrameTimeSeconds() const { return frameTimeS; }
+            [[nodiscard]] inline PRECISION_TYPE getFPS() const { return fps; }
             
             void setMouseGrabbed(bool grabbed);
             bool isMouseGrabbed();
-            [[nodiscard]] inline int displayWidth() const {return m_displayWidth;}
-            [[nodiscard]] inline int displayHeight() const {return m_displayHeight;}
-            [[nodiscard]] inline GLFWwindow* getWindow() const {return window;}
+            [[nodiscard]] inline int displayWidth() const { return m_displayWidth; }
+            [[nodiscard]] inline int displayHeight() const { return m_displayHeight; }
+            [[nodiscard]] inline GLFWwindow* getWindow() const { return window; }
             
             void closeWindow();
             ~XWindow();
     };
-    
-    #else
+
+#else
     class XWindow {
         private:
             // X11 display itself
@@ -125,7 +125,7 @@ namespace Raytracing {
             void closeWindow();
             ~XWindow();
     };
-    #endif
+#endif
     
     /**
      * The display renderer class handles all the major rendering events outside of window functions
@@ -138,7 +138,7 @@ namespace Raytracing {
             World& m_world;
             Shader& m_imageShader;
             Shader& m_worldShader;
-            Raycaster& m_raycaster;
+            RayCaster& m_raycaster;
             Parser& m_parser;
             Camera& m_camera;
         public:
@@ -147,7 +147,7 @@ namespace Raytracing {
                             World& world,
                             Shader& mImageShader,
                             Shader& mWorldShader,
-                            Raycaster& mRaycaster,
+                            RayCaster& mRaycaster,
                             Parser& mParser,
                             Camera& mCamera)
                     : m_window(mWindow), m_mainImage(mMainImage), m_imageShader(mImageShader), m_worldShader(mWorldShader), m_raycaster(mRaycaster),
diff --git a/Step 3/include/opencl/cl.h b/Step 3/include/opencl/cl.h
index 45986a6..43a6dc6 100644
--- a/Step 3/include/opencl/cl.h	
+++ b/Step 3/include/opencl/cl.h	
@@ -9,6 +9,7 @@
 // OpenCL includes
 #include <CL/cl.h>
 #include <CL/cl_gl.h>
+#include <engine/image/image.h>
 #include <config.h>
 
 #ifdef COMPILE_GUI
@@ -34,26 +35,146 @@ namespace Raytracing {
             std::unordered_map<std::string, cl_mem> buffers;
             std::unordered_map<std::string, cl_kernel> kernels;
             
+            /**
+             * Checks for some basic errors after calling OpenCL commands. Stuff like GPU out of memory... etc.
+             */
             void checkBasicErrors() const;
+        
         public:
+            /**
+             * Loads the shader from a file on class creation
+             * @param file file to load OpenCL "shader" (code) file
+             */
             explicit CLProgram(const std::string& file);
+            
+            /**
+             * Used by the OpenCL class to create a basic OpenCL program
+             * @param context provided by the OpenCL class.
+             * @param deviceID provided by the OpenCL class.
+             */
             void loadCLShader(cl_context context, cl_device_id deviceID);
             
+            /**
+             * Kernels are the entry points in OpenCL. You can have multiple of them in a single program.
+             * @param kernelName both the name of the kernel function in the source and the reference to the kernel object used in other functions in this class.
+             */
             void createKernel(const std::string& kernelName);
+            
+            
+            /**
+             * Buffers are the quintessential datastructures in OpenCL. They are basically regions of memory allocated to a program.
+             * @param bufferName the name of the buffer used to store internally
+             * @param flags read write flags for the buffer. One of CL_MEM_READ_ONLY | CL_MEM_WRITE_ONLY | CL_MEM_READ_WRITE
+             * @param bytes the number of bytes to be allocated.
+             */
             void createBuffer(const std::string& bufferName, cl_mem_flags flags, size_t bytes);
+            
+            /**
+             * Creates a buffer on the GPU using the data pointed to by the supplied pointer. This copy happens as soon as this is called.
+             * @param bufferName the name of the buffer used to store internally
+             * @param flags One of CL_MEM_READ_ONLY | CL_MEM_WRITE_ONLY | CL_MEM_READ_WRITE
+             * @param bytes the number of bytes to be allocated. Must be less than equal to the number of bytes at ptr
+             * @param ptr the pointer to copy to the GPU.
+             */
             void createBuffer(const std::string& bufferName, cl_mem_flags flags, size_t bytes, void* ptr);
+            
+            /**
+             * Creates a buffer on the GPU using the data pointed to by the supplied pointer. This copy happens as soon as this is called.
+             * @param bufferName the name of the buffer used to store internally
+             * @param flags One of CL_MEM_READ_ONLY | CL_MEM_WRITE_ONLY | CL_MEM_READ_WRITE
+             * @param bytes the number of bytes to be allocated. Must be less than equal to the number of bytes at ptr
+             * @param ptr the pointer to copy to the GPU.
+             */
             void createImage(const std::string& imageName, int width, int height);
             
+            /**
+             * Allows you to bind certain buffers to a specific index in the kernel's argument list.
+             * @param kernel kernel to bind to
+             * @param buffer buffer to bind to argIndex
+             * @param argIndex the index of the argument for this buffer.
+             */
             void setKernelArgument(const std::string& kernel, const std::string& buffer, int argIndex);
             
+            /**
+             * Runs the kernel code on the GPU. Is blocking.
+             * @param kernel kernel function name to call
+             * @param globalWorkSize the total number of times to execute the kernel function code. Corresponds to the result of get_global_id
+             * @param localWorkSize how many work items make up a work group to be executed by a kernel. 64 is recommended, must not exceed the printed value "device max workgroup size"
+             * @param globalWorkOffset not used. can be used to set an offset to the result of get_global_id
+             */
             void runKernel(const std::string& kernel, size_t globalWorkSize, size_t localWorkSize, const size_t* globalWorkOffset = NULL);
-            void runKernel(const std::string& kernel, size_t* globalWorkSize, size_t* localWorkSize, cl_uint workDim = 1, const size_t* globalWorkOffset = NULL);
             
+            
+            /**
+             * Runs the kernel code on the GPU. Is blocking.
+             * This version allows you to specify the number of work dimensions.
+             * globalWorkSize and localWorkSize must be an array of workDim size which specify the work size for each kernel
+             * For example a work dim of 2 allows for two separate work sizes to be set per dimension.
+             * An image is two dimensional and so global work size would be {width of image, height of image}
+             * and local work size would be {8, 8} for a total of 64 (again recommended). Alternatively specify CL_D2_64_LOCAL_SIZE
+             * The resulting execution causes get_global_id(0) to run [0, width) times and get_global_id(1) to run [0, height) times
+             * @param kernel kernel function name to call
+             * @param globalWorkSize the total number of times to execute the kernel function code. Corresponds to the result of get_global_id(dim)
+             * @param localWorkSize how many work items make up a work group to be executed by a kernel. total 64 is recommended, total must not exceed the printed value "device max workgroup size"
+             * @param workDim number of dimensions to the work group being executed.
+             * @param globalWorkOffset not used. can be used to set an offset to the result of get_global_id
+             */
+            void runKernel(
+                    const std::string& kernel, size_t* globalWorkSize, size_t* localWorkSize, cl_uint workDim = 1,
+                    const size_t* globalWorkOffset = NULL
+            );
+            
+            /**
+             * Enqueues a write command to the buffer specified by the buffer name,
+             * @param buffer the buffer to write to
+             * @param bytes the number of bytes to be copied
+             * @param ptr the pointer to copy from. Must have at least bytes available
+             * @param blocking should this function wait for the bytes to be uploaded to the GPU?
+             * @param offset offset in the buffer object to write to
+             */
             void writeBuffer(const std::string& buffer, size_t bytes, void* ptr, cl_bool blocking = CL_TRUE, size_t offset = 0);
-            void readBuffer(const std::string& buffer, size_t bytes, void* ptr, cl_bool blocking = CL_TRUE, size_t offset = 0);
-            void readImage(const std::string& imageName, size_t width, size_t height, void* ptr, cl_bool blocking = CL_TRUE, size_t x = 0, size_t y = 0);
             
+            /**
+             * Enqueues a read command from the buffered specified by the buffer name.
+             * Defaults to blocking but can be set to be non-blocking.
+             * @param buffer buffer to read from
+             * @param bytes the number of bytes to read. Make sure ptr has at least those bytes available.
+             * @param ptr the ptr to write the read bytes to.
+             * @param blocking should we wait for the read or do it async?
+             * @param offset offset in the buffer to read from.
+             */
+            void readBuffer(const std::string& buffer, size_t bytes, void* ptr, cl_bool blocking = CL_TRUE, size_t offset = 0);
+            
+            /**
+             * Reads an image from the GPU into the memory region specified. Allocated memory region must be large enough to hold the image.
+             * @param imageName name of the buffer to read from
+             * @param width width of the image. Must be less than or equal to the width of the image on the GPU
+             * @param height height of the image. Also must be less than or equal to the height of the image on the GPU
+             * @param ptr pointer to the memory region to read into
+             * @param blocking should we wait for the read operation to complete? Defaults to yes.
+             * @param x x coordinate to start the read from. Defaults to zero since it's unlikely to be needed here. Included for possible future use.
+             * @param y y coordinate to start the read from.
+             */
+            void readImage(
+                    const std::string& imageName, size_t width, size_t height, void* ptr, cl_bool blocking = CL_TRUE, size_t x = 0, size_t y = 0
+            );
+            
+            /**
+             * Reads an image buffer into a RayCasting Image class.
+             * Image supplied must have a with and height that matches the width and height of the image buffer specified by the name.
+             * @param imageName name of the buffer you wish to read from
+             * @param image reference to an image that you want the GPU data read into.
+             */
+            void readImage(const std::string& imageName, Image& image);
+            
+            /**
+             * Issues all previously queued OpenCL commands in a command-queue to the device associated with the command-queue.
+             */
             void flushCommands();
+            
+            /**
+             * Blocks until all previously queued OpenCL commands in a command-queue are issued to the associated device and have completed.
+             */
             void finishCommands();
             
             ~CLProgram();
@@ -76,17 +197,43 @@ namespace Raytracing {
             
             cl_context m_context;
             
+            /**
+             * prints out the important info about the specified device.
+             * @param device device to data dump
+             */
             void printDeviceInfo(cl_device_id device);
-        public:
-            explicit OpenCL(int platformID = 0, int deviceID = 0);
-            static void init();
-            static void createCLProgram(CLProgram& program);
-            static cl_uint activeDeviceComputeUnits();
-            static cl_uint activeDeviceFrequency();
         
+        public:
+            /**
+             * creates an opencl instance on the specified platform and device. Defaults to the first GPU device
+             */
+            explicit OpenCL(int platformID = 0, int deviceID = 0);
+            
+            /**
+             * Creates the global OpenCL instance for the engine
+             */
+            static void init();
+            
+            /**
+             * Creates an OpenCL program object using the global OpenCL connection
+             * @param program
+             */
+            static void createCLProgram(CLProgram& program);
+            
+            /**
+             * @return the number of compute units the device has
+             */
+            static cl_uint activeDeviceComputeUnits();
+            
+            /**
+             * the frequency in megahertz of the device
+             * @return
+             */
+            static cl_uint activeDeviceFrequency();
+            
             ~OpenCL();
     };
-
+    
 }
 
 #endif //STEP_3_CL_H
diff --git a/Step 3/src/engine/main.cpp b/Step 3/src/engine/main.cpp
index 8ddd394..31ab184 100644
--- a/Step 3/src/engine/main.cpp	
+++ b/Step 3/src/engine/main.cpp	
@@ -9,24 +9,24 @@
 #include <csignal>
 
 #ifdef USE_MPI
-    
-    #include <engine/mpi.h>
+
+#include <engine/mpi.h>
 
 #endif
 //#include <sys/time.h>
 //#include <sys/resource.h>
 
 #ifdef COMPILE_GUI
-    
-    #include <graphics/graphics.h>
-    #include <graphics/gl/gl.h>
-    #include <graphics/gl/shader.h>
+
+#include <graphics/graphics.h>
+#include <graphics/gl/gl.h>
+#include <graphics/gl/shader.h>
 
 #endif
 
 #ifdef COMPILE_OPENCL
-    
-    #include <opencl/cl.h>
+
+#include <opencl/cl.h>
 
 #endif
 
@@ -81,10 +81,10 @@ int main(int argc, char** args) {
                                     "\tSets the directory where the resources are stored.\n"
                                     "\tThis can be relative.Must have trailing '/' \n", "../resources/");
     parser.addOption("--mpi", "Use OpenMPI\n"
-                                    "\tTells the raycaster to use OpenMPI to run the raycaster algorithm\n");
+                              "\tTells the raycaster to use OpenMPI to run the raycaster algorithm\n");
     parser.addOption("--openmp", "Use OpenMP\n"
-                              "\tTells the raycaster to use OpenMP to run the raycaster algorithm\n");
-
+                                 "\tTells the raycaster to use OpenMP to run the raycaster algorithm\n");
+    
     // disabled because don't currently have a way to parse vectors. TODO
     //parser.addOption("--position", "Camera Position\n\tSets the position used to render the scene with the camera.\n", "{0, 0, 0}");
     
@@ -111,22 +111,22 @@ int main(int argc, char** args) {
     tlog << "Parsing complete! Starting raytracer with options:" << std::endl;
     // not perfect (contains duplicates) but good enough.
     parser.printAllInInfo();
-    
-    #ifdef USE_MPI
-        Raytracing::MPI::init(argc, args);
-    #endif
-    
-    #ifdef COMPILE_GUI
-        XWindow* window;
-        if (parser.hasOption("--gui") || parser.hasOption("-g"))
-            window = new XWindow(1440, 720);
-        Shader worldShader("../resources/shaders/world.vs", "../resources/shaders/world.fs");
-    #endif
-    
-    #ifdef COMPILE_OPENCL
+
+#ifdef USE_MPI
+    Raytracing::MPI::init(argc, args);
+#endif
+
+#ifdef COMPILE_GUI
+    XWindow* window;
+    if (parser.hasOption("--gui") || parser.hasOption("-g"))
+        window = new XWindow(1440, 720);
+    Shader worldShader("../resources/shaders/world.vs", "../resources/shaders/world.fs");
+#endif
+
+#ifdef COMPILE_OPENCL
     OpenCL::init();
-    
-    #endif
+
+#endif
     
     Raytracing::Image image(1440, 720);
     //Raytracing::Image image(std::stoi(parser.getOptionValue("-w")), std::stoi(parser.getOptionValue("-h")));
@@ -135,13 +135,13 @@ int main(int argc, char** args) {
     //camera.setPosition({0, 0, 1});
     camera.setPosition({6, 5, 6});
     camera.lookAt({0, 0, 0});
-    
-    
-    #ifdef COMPILE_GUI
+
+
+#ifdef COMPILE_GUI
     WorldConfig worldConfig{worldShader};
-    #else
+#else
     WorldConfig worldConfig;
-    #endif
+#endif
     worldConfig.useBVH = true;
     
     Raytracing::World world{worldConfig};
@@ -175,10 +175,10 @@ int main(int argc, char** args) {
     //world.add(new Raytracing::ModelObject({0, 0, 0}, debugCube, world.getMaterial("cat")));
     
     if (parser.hasOption("--gui") || parser.hasOption("-g")) {
-        #ifdef COMPILE_GUI
-        Raytracing::Raycaster raycaster{camera, image, world, parser};
+#ifdef COMPILE_GUI
+        Raytracing::RayCaster rayCaster{camera, image, world, parser};
         Texture mainImage(&image);
-    
+        
         CLProgram program(parser.getOptionValue("--resources") + "opencl/image.cl");
         OpenCL::createCLProgram(program);
         program.createKernel("drawImage");
@@ -186,52 +186,42 @@ int main(int argc, char** args) {
         program.setKernelArgument("drawImage", "mainImage", 0);
         program.setKernelArgument("drawImage", "mainImage", 1);
         
-        size_t works[2] {(size_t) image.getWidth(), (size_t) image.getHeight()};
-        size_t localWorks[2] {8, 8};
+        size_t works[2]{(size_t) image.getWidth(), (size_t) image.getHeight()};
+        size_t localWorks[2]{8, 8};
         
-        unsigned char bytes[image.getWidth() * image.getHeight() * 4];
-        for (int i = 0; i < image.getWidth() * image.getHeight() * 4; i++)
-            bytes[i] = 0;
         
         Shader shader("../resources/shaders/basic.vs", "../resources/shaders/basic.fs");
-        Raytracing::DisplayRenderer renderer{*window, mainImage, world, shader, worldShader, raycaster, parser, camera};
+        Raytracing::DisplayRenderer renderer{*window, mainImage, world, shader, worldShader, rayCaster, parser, camera};
         while (!window->shouldWindowClose()) {
             window->beginUpdate();
             renderer.draw();
             program.runKernel("drawImage", works, localWorks, 2);
-            program.readImage("mainImage", image.getWidth(), image.getHeight(), bytes);
-            const PRECISION_TYPE colorFactor = 1.0 / 255.0;
-            for (int i = 0; i < image.getWidth(); i++){
-                for (int j = 0; j < image.getHeight(); j++){
-                    const auto pixelData = bytes + (j * 4 * image.getWidth() + i * 4);
-                    //tlog << (int)pixelData[0] << " " << (int)pixelData[1] << " " << (int)pixelData[2] << "\n";
-                    image.setPixelColor(i, j, {pixelData[0] * colorFactor, pixelData[1] * colorFactor, pixelData[2] * colorFactor});
-                }
-            }
+            program.readImage("mainImage", image);
+            
             glPolygonMode(GL_FRONT_AND_BACK, GL_LINE);
             world.drawBVH(worldShader);
             glPolygonMode(GL_FRONT_AND_BACK, GL_FILL);
             window->endUpdate();
         }
         RTSignal->haltExecution = true;
-        raycaster.join();
-        #else
+        rayCaster.join();
+#else
         flog << "Program not compiled with GUI support! Unable to open GUI\n";
-        #endif
+#endif
     } else {
-        Raytracing::Raycaster rayCaster{camera, image, world, parser};
+        Raytracing::RayCaster rayCaster{camera, image, world, parser};
         ilog << "Running RayCaster (NO_GUI)!\n";
         // we don't actually have to check for --single since it's implied to be default true.
         int threads = std::stoi(parser.getOptionValue("--threads"));
         if (parser.hasOption("--mpi")) {
             // We need to make sure that if the user requests that MPI be run while not having MPI compiled, they get a helpful error warning.
-            #ifdef USE_MPI
+#ifdef USE_MPI
             rayCaster.runMPI(Raytracing::MPI::getCurrentImageRegionAssociation(rayCaster));
-            #else
+#else
             flog << "Unable to run with MPI, CMake not set to compile MPI!\n";
             return 33;
-            #endif
-        } else if(parser.hasOption("--openmp")){
+#endif
+        } else if (parser.hasOption("--openmp")) {
             rayCaster.runOpenMP(threads);
         } else {
             rayCaster.runSTDThread(threads);
@@ -258,39 +248,39 @@ int main(int argc, char** args) {
             image.fromArray(buffer, doubleSize, i);
         }
 #endif
-        // write the image to the file
-        Raytracing::ImageOutput imageOutput(image);
-
-        auto t = std::time(nullptr);
-        auto now = std::localtime(&t);
-        std::stringstream timeString;
-        timeString << (1900 + now->tm_year);
-        timeString << "-";
-        timeString << (1 + now->tm_mon);
-        timeString << "-";
-        timeString << now->tm_mday;
-        timeString << " ";
-        timeString << now->tm_hour;
-        timeString << ":";
-        timeString << now->tm_min;
-        timeString << ":";
-        timeString << now->tm_sec;
-        ilog << "Writing Image!\n";
-        imageOutput.write(parser.getOptionValue("--output") + timeString.str(), parser.getOptionValue("--format"));
+    // write the image to the file
+    Raytracing::ImageOutput imageOutput(image);
+    
+    auto t = std::time(nullptr);
+    auto now = std::localtime(&t);
+    std::stringstream timeString;
+    timeString << (1900 + now->tm_year);
+    timeString << "-";
+    timeString << (1 + now->tm_mon);
+    timeString << "-";
+    timeString << now->tm_mday;
+    timeString << " ";
+    timeString << now->tm_hour;
+    timeString << ":";
+    timeString << now->tm_min;
+    timeString << ":";
+    timeString << now->tm_sec;
+    ilog << "Writing Image!\n";
+    imageOutput.write(parser.getOptionValue("--output") + timeString.str(), parser.getOptionValue("--format"));
 #ifdef USE_MPI
     }
     // wait for all processes to finish sending and receiving before we exit all of them.
     MPI_Barrier(MPI_COMM_WORLD);
 #endif
-
+    
     delete (RTSignal);
-    #ifdef COMPILE_GUI
+#ifdef COMPILE_GUI
     deleteQuad();
-    #endif
-    #ifdef USE_MPI
+#endif
+#ifdef USE_MPI
     MPI_Finalize();
-    #endif
-
+#endif
+    
     tlog << "Goodbye!\n";
     return 0;
 }
diff --git a/Step 3/src/engine/raytracing.cpp b/Step 3/src/engine/raytracing.cpp
index 7cb8728..cca1eb6 100644
--- a/Step 3/src/engine/raytracing.cpp	
+++ b/Step 3/src/engine/raytracing.cpp	
@@ -44,21 +44,21 @@ namespace Raytracing {
     void Camera::setRotation(const PRECISION_TYPE yaw, const PRECISION_TYPE pitch) {
         // TODO:
     }
-    Mat4x4 Camera::view(PRECISION_TYPE yaw, PRECISION_TYPE pitch)  {
+    Mat4x4 Camera::view(PRECISION_TYPE yaw, PRECISION_TYPE pitch) {
         Mat4x4 view;
-    
+        
         pitch = degreeeToRadian(pitch);
         yaw = degreeeToRadian(yaw);
-    
+        
         PRECISION_TYPE cosPitch = std::cos(pitch);
         PRECISION_TYPE cosYaw = std::cos(yaw);
         PRECISION_TYPE sinPitch = std::sin(pitch);
         PRECISION_TYPE sinYaw = std::sin(yaw);
-    
+        
         auto x = Vec4{cosYaw, 0, -sinYaw}; // forward
         auto y = Vec4{sinYaw * sinPitch, cosPitch, cosYaw * sinPitch}; // right
         auto z = Vec4{sinYaw * cosPitch, -sinPitch, cosPitch * cosYaw}; // up
-    
+        
         // we can actually take those x, y, z vectors and use them to compute the raytracer camera settings
         viewportHeight = 2 * tanFovHalf;
         viewportWidth = aspectRatio * viewportHeight;
@@ -71,23 +71,23 @@ namespace Raytracing {
         view.m01(float(x.y()));
         view.m02(float(x.z()));
         view.m03(float(x.w()));
-    
+        
         view.m10(float(y.x()));
         view.m11(float(y.y()));
         view.m12(float(y.z()));
         view.m13(float(y.w()));
-    
+        
         view.m20(float(z.x()));
         view.m21(float(z.y()));
         view.m22(float(z.z()));
         view.m23(float(z.w()));
-    
+        
         // view matrix are inverted, dot product to simulate translate matrix multiplication
         view.m03(-float(Vec4::dot(x, position)));
         view.m13(-float(Vec4::dot(y, position)));
         view.m23(-float(Vec4::dot(z, position)));
         view.m33(1);
-    
+        
         return view;
     }
     
@@ -97,14 +97,14 @@ namespace Raytracing {
         Vec4 color;
     };
     
-    Vec4 Raycaster::raycasti(const Ray& ray, int depth){
+    Vec4 RayCaster::raycasti(const Ray& ray, int depth) {
         return {};
     }
     
-    Vec4 Raycaster::raycast(const Ray& ray) {
+    Vec4 RayCaster::raycast(const Ray& ray) {
         Ray localRay = ray;
-        Vec4 color {1.0, 1.0, 1.0};
-        for (int CURRENT_BOUNCE = 0; CURRENT_BOUNCE < maxBounceDepth; CURRENT_BOUNCE++){
+        Vec4 color{1.0, 1.0, 1.0};
+        for (int CURRENT_BOUNCE = 0; CURRENT_BOUNCE < maxBounceDepth; CURRENT_BOUNCE++) {
             if (RTSignal->haltExecution || RTSignal->haltRaytracing)
                 return color;
             while (RTSignal->pauseRaytracing) // sleep for 1/60th of a second, or about 1 frame.
@@ -138,8 +138,8 @@ namespace Raytracing {
         
         return color;
     }
-
-    void Raycaster::runRaycastingAlgorithm(RaycasterImageBounds imageBounds, int loopX, int loopY) {
+    
+    void RayCaster::runRaycastingAlgorithm(RaycasterImageBounds imageBounds, int loopX, int loopY) {
         try {
             int x = imageBounds.x + loopX;
             int y = imageBounds.y + loopY;
@@ -160,9 +160,9 @@ namespace Raytracing {
             flog << error.what() << "\n";
         }
     }
-
-
-    void Raycaster::runSTDThread(int threads){
+    
+    
+    void RayCaster::runSTDThread(int threads) {
         setupQueue(partitionScreen(threads));
         ilog << "Running std::thread\n";
         for (int i = 0; i < threads; i++) {
@@ -170,7 +170,7 @@ namespace Raytracing {
                 // run through all the quadrants
                 std::stringstream str;
                 str << "Threading of #";
-                str << (i+1);
+                str << (i + 1);
                 profiler::start("Raytracer Results", str.str());
                 while (unprocessedQuads != nullptr) {
                     RaycasterImageBounds imageBoundingData{};
@@ -195,12 +195,12 @@ namespace Raytracing {
             }));
         }
     }
-
-    void Raycaster::runOpenMP(int threads){
+    
+    void RayCaster::runOpenMP(int threads) {
         setupQueue(partitionScreen(threads));
-        #ifdef USE_OPENMP
+#ifdef USE_OPENMP
         ilog << "Running OpenMP\n";
-        #pragma omp parallel num_threads(threads+1) default(none) shared(threads)
+    #pragma omp parallel num_threads(threads+1) default(none) shared(threads)
         {
             int threadID = omp_get_thread_num();
             // an attempt at making the omp command non-blocking.
@@ -214,7 +214,7 @@ namespace Raytracing {
                 bool running = true;
                 while (running) {
                     RaycasterImageBounds imageBoundingData{};
-                    #pragma omp critical
+#pragma omp critical
                     {
                         if (unprocessedQuads->empty())
                             running = false;
@@ -231,18 +231,18 @@ namespace Raytracing {
                         }
                     }
                 }
-                #pragma omp critical
+#pragma omp critical
                 finishedThreads++;
                 profiler::end("Raytracer Results", str.str());
             }
         }
         tlog << "OpenMP finished!\n";
-        #else
-            flog << "Not compiled with OpenMP! Unable to run raytracing.\n";
-            system_threads;
-        #endif
+#else
+        flog << "Not compiled with OpenMP! Unable to run raytracing.\n";
+        system_threads;
+#endif
     }
-    void Raycaster::runMPI(std::queue<RaycasterImageBounds> bounds){
+    void RayCaster::runMPI(std::queue<RaycasterImageBounds> bounds) {
         ilog << "Running MPI\n";
         dlog << "We have " << bounds.size() << " bounds currently pending!\n";
         while (!bounds.empty()) {
@@ -254,17 +254,17 @@ namespace Raytracing {
             }
             bounds.pop();
         }
-        #ifdef USE_MPI
+#ifdef USE_MPI
         dlog << "Finished running MPI on " << currentProcessID << "\n";
-        #endif
+#endif
     }
-
-    std::vector<RaycasterImageBounds> Raycaster::partitionScreen(int threads) {
+    
+    std::vector<RaycasterImageBounds> RayCaster::partitionScreen(int threads) {
         // if we are running single threaded, disable everything special
         // the reason we run single threaded in a seperate thread is because the GUI requires its own set of updating commands
         // which cannot be blocked by the raytracer, otherwise it would become unresponsive.
         int divs = 1;
-        if (threads < 0 || threads == 1){
+        if (threads < 0 || threads == 1) {
             threads = 1;
             divs = 1;
         } else {
@@ -275,30 +275,30 @@ namespace Raytracing {
             // to do it without a queue like this leads to most threads finishing and a single thread being the critical path which isn't optimally efficient.
             divs = int(std::log(threads) / std::log(2)) * 4;
         }
-
+        
         ilog << "Generating multithreaded raytracer with " << threads << " threads and " << divs << " divisions! \n";
-
+        
         std::vector<RaycasterImageBounds> bounds;
-
+        
         // we need to subdivide the image for the threads, since this is really quick it's fine to due sequentially
         for (int dx = 0; dx < divs; dx++) {
             for (int dy = 0; dy < divs; dy++) {
                 bounds.push_back({
-                                               image.getWidth() / divs,
-                                               image.getHeight() / divs,
-                                               (image.getWidth() / divs) * dx,
-                                               (image.getHeight() / divs) * dy
-                                       });
+                                         image.getWidth() / divs,
+                                         image.getHeight() / divs,
+                                         (image.getWidth() / divs) * dx,
+                                         (image.getHeight() / divs) * dy
+                                 });
             }
         }
         return bounds;
     }
-
-    void Raycaster::setupQueue(const std::vector<RaycasterImageBounds>& bounds) {
-        delete(unprocessedQuads);
+    
+    void RayCaster::setupQueue(const std::vector<RaycasterImageBounds>& bounds) {
+        delete (unprocessedQuads);
         unprocessedQuads = new std::queue<RaycasterImageBounds>();
-        for (auto& b : bounds)
+        for (auto& b: bounds)
             unprocessedQuads->push(b);
     }
-
+    
 }
diff --git a/Step 3/src/engine/world.cpp b/Step 3/src/engine/world.cpp
index 23f738e..bc86edd 100644
--- a/Step 3/src/engine/world.cpp	
+++ b/Step 3/src/engine/world.cpp	
@@ -7,7 +7,7 @@
 #include "engine/image/stb_image.h"
 
 namespace Raytracing {
-
+    
     World::~World() {
         for (auto* p: objects)
             delete (p);
@@ -15,7 +15,7 @@ namespace Raytracing {
             delete (p.second);
         //delete(bvhObjects);
     }
-
+    
     HitData SphereObject::checkIfHit(const Ray& ray, PRECISION_TYPE min, PRECISION_TYPE max) const {
         PRECISION_TYPE radiusSquared = radius * radius;
         // move the ray to be with respects to the sphere
@@ -28,11 +28,11 @@ namespace Raytracing {
         // = 0: the ray has one root, we hit the edge of the sphere
         // < 0: ray isn't inside the sphere.
         PRECISION_TYPE discriminant = b * b - (a * c);
-
+        
         // < 0: ray isn't inside the sphere. Don't need to bother calculating the roots.
         if (discriminant < 0)
             return {false, Vec4(), Vec4(), 0};
-
+        
         // now we have to find the root which exists inside our range [min,max]
         auto root = (-b - std::sqrt(discriminant)) / a;
         // if the first root isn't in our range
@@ -48,7 +48,7 @@ namespace Raytracing {
         auto RayAtRoot = ray.along(root);
         // The normal of a sphere is just the point of the hit minus the center position
         auto normal = (RayAtRoot - position) / radius;
-
+        
         /*if (Raytracing::vec4::dot(ray.getDirection(), normal) > 0.0) {
             tlog << "ray inside sphere\n";
         } else
@@ -60,16 +60,16 @@ namespace Raytracing {
         // have to invert the v since we have to invert the v again later due to triangles
         return {true, RayAtRoot, normal, root, u, 1.0 - v};
     }
-
+    
     std::pair<HitData, Object*> World::checkIfHit(const Ray& ray, PRECISION_TYPE min, PRECISION_TYPE max) const {
         // actually speeds up rendering by about 110,000ms (total across 16 threads)
-        if (bvhObjects != nullptr && m_config.useBVH){
+        if (bvhObjects != nullptr && m_config.useBVH) {
             auto hResult = HitData{false, Vec4(), Vec4(), max};
             Object* objPtr = nullptr;
             
             auto intersected = bvhObjects->rayAnyHitIntersect(ray, min, max);
             
-            for (const auto& ptr : intersected) {
+            for (const auto& ptr: intersected) {
                 auto cResult = ptr.ptr->checkIfHit(ray, min, hResult.length);
                 if (cResult.hit) {
                     hResult = cResult;
@@ -106,21 +106,21 @@ namespace Raytracing {
             return {hResult, objPtr};
         }
     }
-
+    
     void World::generateBVH() {
         bvhObjects = std::make_unique<BVHTree>(objects);
     }
-
+    
     ScatterResults DiffuseMaterial::scatter(const Ray& ray, const HitData& hitData) const {
-        Vec4 newRay = hitData.normal + Raytracing::Raycaster::randomUnitVector().normalize();
-
+        Vec4 newRay = hitData.normal + Raytracing::RayCaster::randomUnitVector().normalize();
+        
         // rays that are close to zero are liable to floating point precision errors
         if (newRay.x() < EPSILON && newRay.y() < EPSILON && newRay.z() < EPSILON && newRay.w() < EPSILON)
             newRay = hitData.normal;
-
+        
         return {true, Ray{hitData.hitPoint, newRay}, getBaseColor()};
     }
-
+    
     ScatterResults MetalMaterial::scatter(const Ray& ray, const HitData& hitData) const {
         // create a ray reflection
         Vec4 newRay = reflect(ray.getDirection().normalize(), hitData.normal);
@@ -128,22 +128,22 @@ namespace Raytracing {
         bool shouldReflect = Vec4::dot(newRay, hitData.normal) > 0;
         return {shouldReflect, Ray{hitData.hitPoint, newRay}, getBaseColor()};
     }
-
+    
     ScatterResults BrushedMetalMaterial::scatter(const Ray& ray, const HitData& hitData) const {
         // create a ray reflection
         Vec4 newRay = reflect(ray.getDirection().normalize(), hitData.normal);
         // make sure our reflected ray is outside the sphere and doesn't point inwards
         bool shouldReflect = Vec4::dot(newRay, hitData.normal) > 0;
-        return {shouldReflect, Ray{hitData.hitPoint, newRay + Raycaster::randomUnitVector() * fuzzyness}, getBaseColor()};
+        return {shouldReflect, Ray{hitData.hitPoint, newRay + RayCaster::randomUnitVector() * fuzzyness}, getBaseColor()};
     }
     
     ScatterResults TexturedMaterial::scatter(const Ray& ray, const HitData& hitData) const {
-        Vec4 newRay = hitData.normal + Raytracing::Raycaster::randomUnitVector().normalize();
-    
+        Vec4 newRay = hitData.normal + Raytracing::RayCaster::randomUnitVector().normalize();
+        
         // rays that are close to zero are liable to floating point precision errors
         if (newRay.x() < EPSILON && newRay.y() < EPSILON && newRay.z() < EPSILON && newRay.w() < EPSILON)
             newRay = hitData.normal;
-    
+        
         return {true, Ray{hitData.hitPoint, newRay}, getColor(hitData.u, hitData.v)};
     }
     Vec4 TexturedMaterial::getColor(PRECISION_TYPE u, PRECISION_TYPE v) const {
@@ -155,11 +155,11 @@ namespace Raytracing {
         // fix that pesky issue of the v being rotated 90* compared to the image
         v = 1.0 - clamp(v, 0.0, 1.0);
         
-        auto imageX = (int)(width * u);
-        auto imageY = (int)(height * v);
-    
-        if (imageX >= width)  imageX = width-1;
-        if (imageY >= height) imageY = height-1;
+        auto imageX = (int) (width * u);
+        auto imageY = (int) (height * v);
+        
+        if (imageX >= width) imageX = width - 1;
+        if (imageY >= height) imageY = height - 1;
         
         // since stbi loads in RGB8 [0, 255] but the engine works on [0, 1] we need to scale the data down.
         // this is best done with a single division followed by multiple multiplication.
@@ -169,7 +169,7 @@ namespace Raytracing {
         
         return {pixelData[0] * colorFactor, pixelData[1] * colorFactor, pixelData[2] * colorFactor};
     }
-    TexturedMaterial::TexturedMaterial(const std::string& file) : Material({}) {
+    TexturedMaterial::TexturedMaterial(const std::string& file): Material({}) {
         // we are going to have to ignore transparency for now. TODO:?
         data = stbi_load(file.c_str(), &width, &height, &channels, 0);
         if (!data)
@@ -188,7 +188,7 @@ namespace Raytracing {
         return baseColor;
     }
     
-    PRECISION_TYPE sign(PRECISION_TYPE i){
+    PRECISION_TYPE sign(PRECISION_TYPE i) {
         return i >= 0 ? 1 : -1;
     }
     
@@ -200,25 +200,25 @@ namespace Raytracing {
         PRECISION_TYPE a, f, u, v;
         edge1 = (theTriangle.vertex2 + position) - (theTriangle.vertex1 + position);
         edge2 = (theTriangle.vertex3 + position) - (theTriangle.vertex1 + position);
-
+        
         h = Vec4::cross(ray.getDirection(), edge2);
         a = Vec4::dot(edge1, h);
-
+        
         if (a > -EPSILON && a < EPSILON)
             return {false, Vec4(), Vec4(), 0}; //parallel to triangle
-
+        
         f = 1.0 / a;
         s = ray.getStartingPoint() - (theTriangle.vertex1 + position);
         u = f * Vec4::dot(s, h);
-
+        
         if (u < 0.0 || u > 1.0)
             return {false, Vec4(), Vec4(), 0};
-
+        
         q = Vec4::cross(s, edge1);
         v = f * Vec4::dot(ray.getDirection(), q);
         if (v < 0.0 || u + v > 1.0)
             return {false, Vec4(), Vec4(), 0};
-
+        
         // At this stage we can compute t to find out where the intersection point is on the line.
         PRECISION_TYPE t = f * Vec4::dot(edge2, q);
         // keep t in reasonable bounds, ensuring we respect depth
@@ -226,7 +226,7 @@ namespace Raytracing {
             // ray intersects
             Vec4 rayIntersectionPoint = ray.along(t);
             Vec4 normal;
-    
+            
             // calculate triangle berry centric coords
             // first we need the vector that runs between the vertex and the intersection point for all three vertices
             // we must subtract the position of the triangle from the intersection point because this calc must happen in triangle space not world space.
@@ -234,7 +234,7 @@ namespace Raytracing {
             auto vertex1ToIntersect = theTriangle.vertex1 - (rayIntersectionPoint - position);
             auto vertex2ToIntersect = theTriangle.vertex2 - (rayIntersectionPoint - position);
             auto vertex3ToIntersect = theTriangle.vertex3 - (rayIntersectionPoint - position);
-    
+            
             // the magnitude of the cross product of two vectors is double the area formed by the triangle of their intersection.
             auto fullAreaVec = Vec4::cross(theTriangle.vertex1 - theTriangle.vertex2, theTriangle.vertex1 - theTriangle.vertex3);
             auto areaVert1Vec = Vec4::cross(vertex2ToIntersect, vertex3ToIntersect);
@@ -271,7 +271,7 @@ namespace Raytracing {
         
         return {false, Vec4(), Vec4(), 0};
     }
-
+    
     HitData ModelObject::checkIfHit(const Ray& ray, PRECISION_TYPE min, PRECISION_TYPE max) const {
         auto hResult = HitData{false, Vec4(), Vec4(), max};
         
@@ -280,7 +280,7 @@ namespace Raytracing {
         // must check through all the triangles in the object
         // respecting depth along the way
         // but reducing the max it can reach my the last longest vector length.
-        for (const auto& t : triangles) {
+        for (const auto& t: triangles) {
             auto cResult = checkIfTriangleGotHit(*t, position, ray, min, hResult.length);
             if (cResult.hit)
                 hResult = cResult;
diff --git a/Step 3/src/opencl/cl.cpp b/Step 3/src/opencl/cl.cpp
index 85353c7..8938536 100644
--- a/Step 3/src/opencl/cl.cpp	
+++ b/Step 3/src/opencl/cl.cpp	
@@ -7,14 +7,17 @@
 #include <config.h>
 
 #ifdef COMPILE_GUI
-    #define GLFW_EXPOSE_NATIVE_X11
-    #define GLFW_EXPOSE_NATIVE_GLX
-    #include <GLFW/glfw3.h>
-    #include <GLFW/glfw3native.h>
+#define GLFW_EXPOSE_NATIVE_X11
+#define GLFW_EXPOSE_NATIVE_GLX
+
+#include <GLFW/glfw3.h>
+#include <GLFW/glfw3native.h>
+
 #endif
 
 #include <cstddef>
 #include <utility>
+#include "engine/math/vectors.h"
 
 namespace Raytracing {
     
@@ -23,21 +26,23 @@ namespace Raytracing {
     void OpenCL::init() {
         openCl = std::make_shared<OpenCL>(0, 0);
     }
-    OpenCL::OpenCL(int platformID, int deviceID): m_activePlatform(platformID) {
+    
+    OpenCL::OpenCL(int platformID, int deviceID):
+            m_activePlatform(platformID) {
         m_CL_ERR = CL_SUCCESS;
         m_numPlatforms = 0;
-        m_CL_ERR = clGetPlatformIDs(0, NULL, &m_numPlatforms );
-    
+        m_CL_ERR = clGetPlatformIDs(0, NULL, &m_numPlatforms);
+        
         if (m_CL_ERR == CL_SUCCESS)
             dlog << "We found " << m_numPlatforms << " OpenCL Platforms.\n";
         else
             elog << "OpenCL Error! " << m_CL_ERR << "\n";
-    
+        
         m_platformIDs = new cl_platform_id[m_numPlatforms];
         
         m_CL_ERR = clGetPlatformIDs(m_numPlatforms, m_platformIDs, &m_numOfPlatformIDs);
         m_CL_ERR = clGetDeviceIDs(m_platformIDs[platformID], CL_DEVICE_TYPE_GPU, 1, &m_deviceID, &m_numOfDevices);
-    
+        
         printDeviceInfo(m_deviceID);
         
         cl_context_properties proper[3] = {
@@ -46,7 +51,7 @@ namespace Raytracing {
         };
         
         m_context = clCreateContext(proper, 1, &m_deviceID, NULL, NULL, &m_CL_ERR);
-    
+        
         if (m_CL_ERR != CL_SUCCESS)
             elog << "OpenCL Error Creating Context! " << m_CL_ERR << "\n";
         
@@ -78,27 +83,31 @@ namespace Raytracing {
         clGetDeviceInfo(device, CL_DEVICE_VERSION, sizeof(dv), &dv, NULL);
         
         dlog << "Opening OpenCL Device!\n";
-        dlog << "Device CL String " << dv.opencl_space << dv.major << dv.dot << dv.minor << dv.space << dv.vendor<< "\n";
+        dlog << "Device CL String " << dv.opencl_space << dv.major << dv.dot << dv.minor << dv.space << dv.vendor << "\n";
         dlog << "Device Address Bits: " << deviceAddressBits << "\n";
         dlog << "Device is currently " << (deviceAvailable ? "available" : "unavailable") << "\n";
-        dlog << "Device has " << cacheSize/1024 << "kb of cache with a cache line width of " << cacheLineSize << " bytes\n";
+        dlog << "Device has " << cacheSize / 1024 << "kb of cache with a cache line width of " << cacheLineSize << " bytes\n";
         dlog << "Device " << (textureSupport ? "has" : "doesn't have") << " texture support\n";
         dlog << "Device has " << maxWorkgroups << " max workgroup size.\n";
         dlog << "Device has " << m_computeUnits << " compute units running at a max clock frequency " << m_deviceClockFreq << "\n";
         if (!textureSupport)
             elog << "Warning! The OpenCL device lacks texture support!\n";
     }
+    
     void OpenCL::createCLProgram(CLProgram& program) {
         program.loadCLShader(openCl->m_context, openCl->m_deviceID);
     }
+    
     OpenCL::~OpenCL() {
         delete[](m_platformIDs);
         clReleaseDevice(m_deviceID);
         clReleaseContext(m_context);
     }
+    
     cl_uint OpenCL::activeDeviceComputeUnits() {
         return openCl->m_computeUnits;
     }
+    
     cl_uint OpenCL::activeDeviceFrequency() {
         return openCl->m_deviceClockFreq;
     }
@@ -107,6 +116,7 @@ namespace Raytracing {
     CLProgram::CLProgram(const std::string& file) {
         m_source = ShaderLoader::loadShaderFile(file);
     }
+    
     void CLProgram::loadCLShader(cl_context context, cl_device_id deviceID) {
         this->m_context = context;
         this->m_deviceID = deviceID;
@@ -118,49 +128,41 @@ namespace Raytracing {
         
         if (m_CL_ERR != CL_SUCCESS)
             elog << "Unable to create CL program!\n";
-    
+        
         m_CL_ERR = clBuildProgram(m_program, 1, &deviceID, NULL, NULL, NULL);
         if (m_CL_ERR != CL_SUCCESS) {
             elog << "Unable to build CL program!\n";
             size_t len;
-    
+            
             clGetProgramBuildInfo(m_program, m_deviceID, CL_PROGRAM_BUILD_LOG, 0, NULL, &len);
             char buffer[len];
-    
+            
             clGetProgramBuildInfo(m_program, m_deviceID, CL_PROGRAM_BUILD_LOG, len, buffer, NULL);
             
             elog << buffer << "\n";
             
         }
     }
-    /**
-     * Buffers are the quintessential datastructures in OpenCL. They are basically regions of memory allocated to a program.
-     * @param bufferName the name of the buffer used to store internally
-     * @param flags read write flags for the buffer. One of CL_MEM_READ_ONLY | CL_MEM_WRITE_ONLY | CL_MEM_READ_WRITE
-     * @param bytes the number of bytes to be allocated.
-     */
+    
+    
     void CLProgram::createBuffer(const std::string& bufferName, cl_mem_flags flags, size_t bytes) {
         // create the buffer on the GPU
         cl_mem buff = clCreateBuffer(m_context, flags, bytes, NULL, &m_CL_ERR);
         // then store it in our buffer map for easy access.
         buffers.insert({bufferName, buff});
     }
-    /**
-     * Creates a buffer on the GPU using the data pointed to by the supplied pointer. This copy happens as soon as this is called.
-     * @param bufferName the name of the buffer used to store internally
-     * @param flags One of CL_MEM_READ_ONLY | CL_MEM_WRITE_ONLY | CL_MEM_READ_WRITE
-     * @param bytes the number of bytes to be allocated. Must be less than equal to the number of bytes at ptr
-     * @param ptr the pointer to copy to the GPU.
-     */
+    
+    
     void CLProgram::createBuffer(const std::string& bufferName, cl_mem_flags flags, size_t bytes, void* ptr) {
         // create the buffer on the GPU
         cl_mem buff = clCreateBuffer(m_context, CL_MEM_COPY_HOST_PTR | flags, bytes, ptr, &m_CL_ERR);
         // then store it in our buffer map for easy access.
         buffers.insert({bufferName, buff});
     }
+    
     void CLProgram::createImage(const std::string& imageName, int width, int height) {
         // create the texture on the GPU
-        cl_image_format format {CL_RGBA, CL_UNORM_INT8};
+        cl_image_format format{CL_RGBA, CL_UNORM_INT8};
         cl_image_desc imageDesc;
         imageDesc.image_type = CL_MEM_OBJECT_IMAGE2D;
         imageDesc.image_width = width;
@@ -171,10 +173,10 @@ namespace Raytracing {
         imageDesc.num_samples = 0;
         imageDesc.buffer = NULL;
         cl_mem tex = clCreateImage(m_context, CL_MEM_READ_WRITE, &format, &imageDesc, NULL, &m_CL_ERR);
-        if (m_CL_ERR != CL_SUCCESS){
+        if (m_CL_ERR != CL_SUCCESS) {
             elog << "Unable to create image texture!\n";
             checkBasicErrors();
-            switch (m_CL_ERR){
+            switch (m_CL_ERR) {
                 case CL_INVALID_VALUE:
                     elog << "\tFlags are not valid!\n";
                     // this is straight from the docs
@@ -208,63 +210,37 @@ namespace Raytracing {
         // then store it in our buffer map for easy access.
         buffers.insert({imageName, tex});
     }
-    /**
-     * Kernels are the entry points in OpenCL. You can have multiple of them in a single program.
-     * @param kernelName both the name of the kernel function in the source and the reference to the kernel object used in other functions in this class.
-     */
+    
+    
     void CLProgram::createKernel(const std::string& kernelName) {
         auto kernel = clCreateKernel(m_program, kernelName.c_str(), &m_CL_ERR);
         if (m_CL_ERR != CL_SUCCESS)
             elog << "Unable to create CL kernel " << kernelName << "!\n";
         kernels.insert({kernelName, kernel});
     }
-    /**
-     * Allows you to bind certain buffers to a specific index in the kernel's argument list.
-     * @param kernel kernel to bind to
-     * @param buffer buffer to bind to argIndex
-     * @param argIndex the index of the argument for this buffer.
-     */
+    
     void CLProgram::setKernelArgument(const std::string& kernel, const std::string& buffer, int argIndex) {
         m_CL_ERR = clSetKernelArg(kernels[kernel], argIndex, sizeof(cl_mem), (void*) &buffers[buffer]);
         if (m_CL_ERR != CL_SUCCESS)
             elog << "Unable to bind argument " << buffer << " to CL kernel " << kernel << "!\n";
     }
-    /**
-     * Enqueues a write command to the buffer specified by the buffer name,
-     * @param buffer the buffer to write to
-     * @param bytes the number of bytes to be copied
-     * @param ptr the pointer to copy from. Must have at least bytes available
-     * @param blocking should this function wait for the bytes to be uploaded to the GPU?
-     * @param offset offset in the buffer object to write to
-     */
+    
     void CLProgram::writeBuffer(const std::string& buffer, size_t bytes, void* ptr, cl_bool blocking, size_t offset) {
         m_CL_ERR = clEnqueueWriteBuffer(m_commandQueue, buffers[buffer], blocking, offset, bytes, ptr, 0, NULL, NULL);
         if (m_CL_ERR != CL_SUCCESS)
             elog << "Unable to enqueue write to " << buffer << " buffer!\n";
     }
-    /**
-     * Enqueues a read command from the buffered specified by the buffer name.
-     * Defaults to blocking but can be set to be non-blocking.
-     * @param buffer buffer to read from
-     * @param bytes the number of bytes to read. Make sure ptr has at least those bytes available.
-     * @param ptr the ptr to write the read bytes to.
-     * @param blocking should we wait for the read or do it async?
-     * @param offset offset in the buffer to read from.
-     */
+    
     void CLProgram::readBuffer(const std::string& buffer, size_t bytes, void* ptr, cl_bool blocking, size_t offset) {
         m_CL_ERR = clEnqueueReadBuffer(m_commandQueue, buffers[buffer], blocking, offset, bytes, ptr, 0, NULL, NULL);
         if (m_CL_ERR != CL_SUCCESS)
             elog << "Unable to enqueue read from " << buffer << " buffer!\n";
     }
-    /**
-     * Issues all previously queued OpenCL commands in a command-queue to the device associated with the command-queue.
-     */
+    
     void CLProgram::flushCommands() {
         clFlush(m_commandQueue);
     }
-    /**
-     * Blocks until all previously queued OpenCL commands in a command-queue are issued to the associated device and have completed.
-     */
+    
     void CLProgram::finishCommands() {
         flushCommands();
         clFinish(m_commandQueue);
@@ -274,13 +250,14 @@ namespace Raytracing {
         m_CL_ERR = clEnqueueNDRangeKernel(m_commandQueue, kernels[kernel], 1, globalWorkOffset, &globalWorkSize, &localWorkSize, 0, NULL, NULL);
     }
     
-    void CLProgram::runKernel(const std::string& kernel, size_t* globalWorkSize, size_t* localWorkSize, cl_uint workDim, const size_t* globalWorkOffset) {
+    void
+    CLProgram::runKernel(const std::string& kernel, size_t* globalWorkSize, size_t* localWorkSize, cl_uint workDim, const size_t* globalWorkOffset) {
         m_CL_ERR = clEnqueueNDRangeKernel(m_commandQueue, kernels[kernel], workDim, globalWorkOffset, globalWorkSize, localWorkSize, 0, NULL, NULL);
     }
     
     void CLProgram::readImage(const std::string& imageName, size_t width, size_t height, void* ptr, cl_bool blocking, size_t x, size_t y) {
-        size_t origin[3] {x, y, 0};
-        size_t region[3] {width, height, 1};
+        size_t origin[3]{x, y, 0};
+        size_t region[3]{width, height, 1};
         m_CL_ERR = clEnqueueReadImage(m_commandQueue, buffers[imageName], blocking, origin, region, 0, 0, ptr, 0, NULL, NULL);
         if (m_CL_ERR != CL_SUCCESS) {
             elog << "Unable to enqueue read from " << imageName << " image:\n";
@@ -298,15 +275,16 @@ namespace Raytracing {
     
     CLProgram::~CLProgram() {
         finishCommands();
-        for (const auto& kernel : kernels)
+        for (const auto& kernel: kernels)
             clReleaseKernel(kernel.second);
         clReleaseProgram(m_program);
-        for (const auto& buffer : buffers)
+        for (const auto& buffer: buffers)
             clReleaseMemObject(buffer.second);
         clReleaseCommandQueue(m_commandQueue);
     }
+    
     void CLProgram::checkBasicErrors() const {
-        switch (m_CL_ERR){
+        switch (m_CL_ERR) {
             case CL_OUT_OF_HOST_MEMORY:
                 elog << "\tHost is out of memory!\n";
                 break;
@@ -331,4 +309,23 @@ namespace Raytracing {
         }
     }
     
+    void CLProgram::readImage(const std::string& imageName, Image& image) {
+        // Create an array for copying bytes from the GPU to the CPU with
+        unsigned char bytes[image.getWidth() * image.getHeight() * 4];
+        for (int i = 0; i < image.getWidth() * image.getHeight() * 4; i++)
+            bytes[i] = 0;
+        // Call to OpenCL to read the image from the GPU and send it to the location of our buffer
+        this->readImage(imageName, image.getWidth(), image.getHeight(), bytes);
+        // multiplication is much faster than division,
+        // so to speed up the function precalculate the color multiplier used to convert from RGBA8 format to floating point format used by the raytracer.
+        const PRECISION_TYPE colorFactor = 1.0 / 255.0;
+        // copy the data from the buffer into the image by calculating its byte offset based on its pixel position.
+        for (int i = 0; i < image.getWidth(); i++) {
+            for (int j = 0; j < image.getHeight(); j++) {
+                const auto pixelData = bytes + (j * 4 * image.getWidth() + i * 4);
+                image.setPixelColor(i, j, {pixelData[0] * colorFactor, pixelData[1] * colorFactor, pixelData[2] * colorFactor});
+            }
+        }
+    }
+    
 }
\ No newline at end of file