COSC-3P93-Project/Step 3/include/opencl/cl.h

235 lines
11 KiB
C++

/*
* Created by Brett Terpstra 6920201 on 20/11/22.
* Copyright (c) 2022 Brett Terpstra. All Rights Reserved.
*/
#ifndef STEP_3_CL_H
#define STEP_3_CL_H
// OpenCL includes
#include <CL/cl.h>
#include <CL/cl_gl.h>
#include <engine/image/image.h>
#include <config.h>
#include <engine/util/std.h>
namespace Raytracing {
class CLProgram {
private:
cl_int m_CL_ERR{};
std::string m_source;
cl_device_id m_deviceID{};
cl_context m_context{};
cl_command_queue m_commandQueue{};
cl_program m_program{};
std::unordered_map<std::string, cl_mem> buffers;
std::unordered_map<std::string, cl_kernel> kernels;
/**
* Checks for some basic errors after calling OpenCL commands. Stuff like GPU out of memory... etc.
*/
void checkBasicErrors() const;
public:
/**
* Loads the shader from a file on class creation
* @param file file to load OpenCL "shader" (code) file
*/
explicit CLProgram(const std::string& file);
/**
* Used by the OpenCL class to create a basic OpenCL program
* @param context provided by the OpenCL class.
* @param deviceID provided by the OpenCL class.
*/
void loadCLShader(cl_context context, cl_device_id deviceID);
/**
* Kernels are the entry points in OpenCL. You can have multiple of them in a single program.
* @param kernelName both the name of the kernel function in the source and the reference to the kernel object used in other functions in this class.
*/
void createKernel(const std::string& kernelName);
/**
* Buffers are the quintessential datastructures in OpenCL. They are basically regions of memory allocated to a program.
* @param bufferName the name of the buffer used to store internally
* @param flags read write flags for the buffer. One of CL_MEM_READ_ONLY | CL_MEM_WRITE_ONLY | CL_MEM_READ_WRITE
* @param bytes the number of bytes to be allocated.
*/
void createBuffer(const std::string& bufferName, cl_mem_flags flags, size_t bytes);
/**
* Creates a buffer on the GPU using the data pointed to by the supplied pointer. This copy happens as soon as this is called.
* @param bufferName the name of the buffer used to store internally
* @param flags One of CL_MEM_READ_ONLY | CL_MEM_WRITE_ONLY | CL_MEM_READ_WRITE
* @param bytes the number of bytes to be allocated. Must be less than equal to the number of bytes at ptr
* @param ptr the pointer to copy to the GPU.
*/
void createBuffer(const std::string& bufferName, cl_mem_flags flags, size_t bytes, void* ptr);
/**
* Creates a buffer on the GPU using the data pointed to by the supplied pointer. This copy happens as soon as this is called.
* @param bufferName the name of the buffer used to store internally
* @param flags One of CL_MEM_READ_ONLY | CL_MEM_WRITE_ONLY | CL_MEM_READ_WRITE
* @param bytes the number of bytes to be allocated. Must be less than equal to the number of bytes at ptr
* @param ptr the pointer to copy to the GPU.
*/
void createImage(const std::string& imageName, int width, int height);
/**
* Allows you to bind certain buffers to a specific index in the kernel's argument list.
* @param kernel kernel to bind to
* @param buffer buffer to bind to argIndex
* @param argIndex the index of the argument for this buffer.
*/
void setKernelArgument(const std::string& kernel, const std::string& buffer, int argIndex);
/**
* Runs the kernel code on the GPU. Is blocking.
* @param kernel kernel function name to call
* @param globalWorkSize the total number of times to execute the kernel function code. Corresponds to the result of get_global_id
* @param localWorkSize how many work items make up a work group to be executed by a kernel. 64 is recommended, must not exceed the printed value "device max workgroup size"
* @param globalWorkOffset not used. can be used to set an offset to the result of get_global_id
*/
void runKernel(const std::string& kernel, size_t globalWorkSize, size_t localWorkSize, const size_t* globalWorkOffset = NULL);
/**
* Runs the kernel code on the GPU. Is blocking.
* This version allows you to specify the number of work dimensions.
* globalWorkSize and localWorkSize must be an array of workDim size which specify the work size for each kernel
* For example a work dim of 2 allows for two separate work sizes to be set per dimension.
* An image is two dimensional and so global work size would be {width of image, height of image}
* and local work size would be size_t localWork = {8, 8} for a total of 64 (again recommended).
* The resulting execution causes get_global_id(0) to run [0, width) times and get_global_id(1) to run [0, height) times
* @param kernel kernel function name to call
* @param globalWorkSize the total number of times to execute the kernel function code. Corresponds to the result of get_global_id(dim)
* @param localWorkSize how many work items make up a work group to be executed by a kernel. total 64 is recommended, total must not exceed the printed value "device max workgroup size"
* @param workDim number of dimensions to the work group being executed.
* @param globalWorkOffset not used. can be used to set an offset to the result of get_global_id
*/
void runKernel(
const std::string& kernel, size_t* globalWorkSize, size_t* localWorkSize, cl_uint workDim = 1,
const size_t* globalWorkOffset = NULL
);
/**
* Enqueues a write command to the buffer specified by the buffer name,
* @param buffer the buffer to write to
* @param bytes the number of bytes to be copied
* @param ptr the pointer to copy from. Must have at least bytes available
* @param blocking should this function wait for the bytes to be uploaded to the GPU?
* @param offset offset in the buffer object to write to
*/
void writeBuffer(const std::string& buffer, size_t bytes, void* ptr, cl_bool blocking = CL_TRUE, size_t offset = 0);
/**
* Enqueues a read command from the buffered specified by the buffer name.
* Defaults to blocking but can be set to be non-blocking.
* @param buffer buffer to read from
* @param bytes the number of bytes to read. Make sure ptr has at least those bytes available.
* @param ptr the ptr to write the read bytes to.
* @param blocking should we wait for the read or do it async?
* @param offset offset in the buffer to read from.
*/
void readBuffer(const std::string& buffer, size_t bytes, void* ptr, cl_bool blocking = CL_TRUE, size_t offset = 0);
/**
* Reads an image from the GPU into the memory region specified. Allocated memory region must be large enough to hold the image.
* @param imageName name of the buffer to read from
* @param width width of the image. Must be less than or equal to the width of the image on the GPU
* @param height height of the image. Also must be less than or equal to the height of the image on the GPU
* @param ptr pointer to the memory region to read into
* @param blocking should we wait for the read operation to complete? Defaults to yes.
* @param x x coordinate to start the read from. Defaults to zero since it's unlikely to be needed here. Included for possible future use.
* @param y y coordinate to start the read from.
*/
void readImage(
const std::string& imageName, size_t width, size_t height, void* ptr, cl_bool blocking = CL_TRUE, size_t x = 0, size_t y = 0
);
/**
* Reads an image buffer into a RayCasting Image class.
* Image supplied must have a with and height that matches the width and height of the image buffer specified by the name.
* @param imageName name of the buffer you wish to read from
* @param image reference to an image that you want the GPU data read into.
*/
void readImage(const std::string& imageName, Image& image);
/**
* Issues all previously queued OpenCL commands in a command-queue to the device associated with the command-queue.
*/
void flushCommands();
/**
* Blocks until all previously queued OpenCL commands in a command-queue are issued to the associated device and have completed.
*/
void finishCommands();
~CLProgram();
};
class OpenCL {
private:
cl_int m_CL_ERR;
cl_uint m_numPlatforms;
int m_activePlatform;
cl_platform_id* m_platformIDs;
cl_uint m_numOfPlatformIDs{};
cl_device_id m_deviceID{};
cl_uint m_numOfDevices{};
cl_uint m_computeUnits{};
cl_uint m_deviceClockFreq{};
cl_context m_context;
/**
* prints out the important info about the specified device.
* @param device device to data dump
*/
void printDeviceInfo(cl_device_id device);
public:
/**
* creates an opencl instance on the specified platform and device. Defaults to the first GPU device
*/
explicit OpenCL(int platformID = 0, int deviceID = 0);
/**
* Creates the global OpenCL instance for the engine
*/
static void init();
/**
* Creates an OpenCL program object using the global OpenCL connection
* @param program
*/
static void createCLProgram(CLProgram& program);
/**
* @return the number of compute units the device has
*/
static cl_uint activeDeviceComputeUnits();
/**
* the frequency in megahertz of the device
* @return
*/
static cl_uint activeDeviceFrequency();
~OpenCL();
};
}
#endif //STEP_3_CL_H