240 lines
11 KiB
C++
240 lines
11 KiB
C++
/*
|
|
* Created by Brett Terpstra 6920201 on 20/11/22.
|
|
* Copyright (c) 2022 Brett Terpstra. All Rights Reserved.
|
|
*/
|
|
|
|
#ifndef STEP_3_CL_H
|
|
#define STEP_3_CL_H
|
|
|
|
// OpenCL includes
|
|
#include <CL/cl.h>
|
|
#include <CL/cl_gl.h>
|
|
#include <engine/image/image.h>
|
|
#include <config.h>
|
|
|
|
#ifdef COMPILE_GUI
|
|
|
|
#endif
|
|
|
|
|
|
#include <engine/util/std.h>
|
|
|
|
namespace Raytracing {
|
|
|
|
|
|
class CLProgram {
|
|
private:
|
|
cl_int m_CL_ERR{};
|
|
std::string m_source;
|
|
|
|
cl_device_id m_deviceID{};
|
|
cl_context m_context{};
|
|
cl_command_queue m_commandQueue{};
|
|
cl_program m_program{};
|
|
|
|
std::unordered_map<std::string, cl_mem> buffers;
|
|
std::unordered_map<std::string, cl_kernel> kernels;
|
|
|
|
/**
|
|
* Checks for some basic errors after calling OpenCL commands. Stuff like GPU out of memory... etc.
|
|
*/
|
|
void checkBasicErrors() const;
|
|
|
|
public:
|
|
/**
|
|
* Loads the shader from a file on class creation
|
|
* @param file file to load OpenCL "shader" (code) file
|
|
*/
|
|
explicit CLProgram(const std::string& file);
|
|
|
|
/**
|
|
* Used by the OpenCL class to create a basic OpenCL program
|
|
* @param context provided by the OpenCL class.
|
|
* @param deviceID provided by the OpenCL class.
|
|
*/
|
|
void loadCLShader(cl_context context, cl_device_id deviceID);
|
|
|
|
/**
|
|
* Kernels are the entry points in OpenCL. You can have multiple of them in a single program.
|
|
* @param kernelName both the name of the kernel function in the source and the reference to the kernel object used in other functions in this class.
|
|
*/
|
|
void createKernel(const std::string& kernelName);
|
|
|
|
|
|
/**
|
|
* Buffers are the quintessential datastructures in OpenCL. They are basically regions of memory allocated to a program.
|
|
* @param bufferName the name of the buffer used to store internally
|
|
* @param flags read write flags for the buffer. One of CL_MEM_READ_ONLY | CL_MEM_WRITE_ONLY | CL_MEM_READ_WRITE
|
|
* @param bytes the number of bytes to be allocated.
|
|
*/
|
|
void createBuffer(const std::string& bufferName, cl_mem_flags flags, size_t bytes);
|
|
|
|
/**
|
|
* Creates a buffer on the GPU using the data pointed to by the supplied pointer. This copy happens as soon as this is called.
|
|
* @param bufferName the name of the buffer used to store internally
|
|
* @param flags One of CL_MEM_READ_ONLY | CL_MEM_WRITE_ONLY | CL_MEM_READ_WRITE
|
|
* @param bytes the number of bytes to be allocated. Must be less than equal to the number of bytes at ptr
|
|
* @param ptr the pointer to copy to the GPU.
|
|
*/
|
|
void createBuffer(const std::string& bufferName, cl_mem_flags flags, size_t bytes, void* ptr);
|
|
|
|
/**
|
|
* Creates a buffer on the GPU using the data pointed to by the supplied pointer. This copy happens as soon as this is called.
|
|
* @param bufferName the name of the buffer used to store internally
|
|
* @param flags One of CL_MEM_READ_ONLY | CL_MEM_WRITE_ONLY | CL_MEM_READ_WRITE
|
|
* @param bytes the number of bytes to be allocated. Must be less than equal to the number of bytes at ptr
|
|
* @param ptr the pointer to copy to the GPU.
|
|
*/
|
|
void createImage(const std::string& imageName, int width, int height);
|
|
|
|
/**
|
|
* Allows you to bind certain buffers to a specific index in the kernel's argument list.
|
|
* @param kernel kernel to bind to
|
|
* @param buffer buffer to bind to argIndex
|
|
* @param argIndex the index of the argument for this buffer.
|
|
*/
|
|
void setKernelArgument(const std::string& kernel, const std::string& buffer, int argIndex);
|
|
|
|
/**
|
|
* Runs the kernel code on the GPU. Is blocking.
|
|
* @param kernel kernel function name to call
|
|
* @param globalWorkSize the total number of times to execute the kernel function code. Corresponds to the result of get_global_id
|
|
* @param localWorkSize how many work items make up a work group to be executed by a kernel. 64 is recommended, must not exceed the printed value "device max workgroup size"
|
|
* @param globalWorkOffset not used. can be used to set an offset to the result of get_global_id
|
|
*/
|
|
void runKernel(const std::string& kernel, size_t globalWorkSize, size_t localWorkSize, const size_t* globalWorkOffset = NULL);
|
|
|
|
|
|
/**
|
|
* Runs the kernel code on the GPU. Is blocking.
|
|
* This version allows you to specify the number of work dimensions.
|
|
* globalWorkSize and localWorkSize must be an array of workDim size which specify the work size for each kernel
|
|
* For example a work dim of 2 allows for two separate work sizes to be set per dimension.
|
|
* An image is two dimensional and so global work size would be {width of image, height of image}
|
|
* and local work size would be {8, 8} for a total of 64 (again recommended). Alternatively specify CL_D2_64_LOCAL_SIZE
|
|
* The resulting execution causes get_global_id(0) to run [0, width) times and get_global_id(1) to run [0, height) times
|
|
* @param kernel kernel function name to call
|
|
* @param globalWorkSize the total number of times to execute the kernel function code. Corresponds to the result of get_global_id(dim)
|
|
* @param localWorkSize how many work items make up a work group to be executed by a kernel. total 64 is recommended, total must not exceed the printed value "device max workgroup size"
|
|
* @param workDim number of dimensions to the work group being executed.
|
|
* @param globalWorkOffset not used. can be used to set an offset to the result of get_global_id
|
|
*/
|
|
void runKernel(
|
|
const std::string& kernel, size_t* globalWorkSize, size_t* localWorkSize, cl_uint workDim = 1,
|
|
const size_t* globalWorkOffset = NULL
|
|
);
|
|
|
|
/**
|
|
* Enqueues a write command to the buffer specified by the buffer name,
|
|
* @param buffer the buffer to write to
|
|
* @param bytes the number of bytes to be copied
|
|
* @param ptr the pointer to copy from. Must have at least bytes available
|
|
* @param blocking should this function wait for the bytes to be uploaded to the GPU?
|
|
* @param offset offset in the buffer object to write to
|
|
*/
|
|
void writeBuffer(const std::string& buffer, size_t bytes, void* ptr, cl_bool blocking = CL_TRUE, size_t offset = 0);
|
|
|
|
/**
|
|
* Enqueues a read command from the buffered specified by the buffer name.
|
|
* Defaults to blocking but can be set to be non-blocking.
|
|
* @param buffer buffer to read from
|
|
* @param bytes the number of bytes to read. Make sure ptr has at least those bytes available.
|
|
* @param ptr the ptr to write the read bytes to.
|
|
* @param blocking should we wait for the read or do it async?
|
|
* @param offset offset in the buffer to read from.
|
|
*/
|
|
void readBuffer(const std::string& buffer, size_t bytes, void* ptr, cl_bool blocking = CL_TRUE, size_t offset = 0);
|
|
|
|
/**
|
|
* Reads an image from the GPU into the memory region specified. Allocated memory region must be large enough to hold the image.
|
|
* @param imageName name of the buffer to read from
|
|
* @param width width of the image. Must be less than or equal to the width of the image on the GPU
|
|
* @param height height of the image. Also must be less than or equal to the height of the image on the GPU
|
|
* @param ptr pointer to the memory region to read into
|
|
* @param blocking should we wait for the read operation to complete? Defaults to yes.
|
|
* @param x x coordinate to start the read from. Defaults to zero since it's unlikely to be needed here. Included for possible future use.
|
|
* @param y y coordinate to start the read from.
|
|
*/
|
|
void readImage(
|
|
const std::string& imageName, size_t width, size_t height, void* ptr, cl_bool blocking = CL_TRUE, size_t x = 0, size_t y = 0
|
|
);
|
|
|
|
/**
|
|
* Reads an image buffer into a RayCasting Image class.
|
|
* Image supplied must have a with and height that matches the width and height of the image buffer specified by the name.
|
|
* @param imageName name of the buffer you wish to read from
|
|
* @param image reference to an image that you want the GPU data read into.
|
|
*/
|
|
void readImage(const std::string& imageName, Image& image);
|
|
|
|
/**
|
|
* Issues all previously queued OpenCL commands in a command-queue to the device associated with the command-queue.
|
|
*/
|
|
void flushCommands();
|
|
|
|
/**
|
|
* Blocks until all previously queued OpenCL commands in a command-queue are issued to the associated device and have completed.
|
|
*/
|
|
void finishCommands();
|
|
|
|
~CLProgram();
|
|
};
|
|
|
|
class OpenCL {
|
|
private:
|
|
cl_int m_CL_ERR;
|
|
cl_uint m_numPlatforms;
|
|
int m_activePlatform;
|
|
|
|
cl_platform_id* m_platformIDs;
|
|
cl_uint m_numOfPlatformIDs{};
|
|
|
|
cl_device_id m_deviceID{};
|
|
cl_uint m_numOfDevices{};
|
|
|
|
cl_uint m_computeUnits{};
|
|
cl_uint m_deviceClockFreq{};
|
|
|
|
cl_context m_context;
|
|
|
|
/**
|
|
* prints out the important info about the specified device.
|
|
* @param device device to data dump
|
|
*/
|
|
void printDeviceInfo(cl_device_id device);
|
|
|
|
public:
|
|
/**
|
|
* creates an opencl instance on the specified platform and device. Defaults to the first GPU device
|
|
*/
|
|
explicit OpenCL(int platformID = 0, int deviceID = 0);
|
|
|
|
/**
|
|
* Creates the global OpenCL instance for the engine
|
|
*/
|
|
static void init();
|
|
|
|
/**
|
|
* Creates an OpenCL program object using the global OpenCL connection
|
|
* @param program
|
|
*/
|
|
static void createCLProgram(CLProgram& program);
|
|
|
|
/**
|
|
* @return the number of compute units the device has
|
|
*/
|
|
static cl_uint activeDeviceComputeUnits();
|
|
|
|
/**
|
|
* the frequency in megahertz of the device
|
|
* @return
|
|
*/
|
|
static cl_uint activeDeviceFrequency();
|
|
|
|
~OpenCL();
|
|
};
|
|
|
|
}
|
|
|
|
#endif //STEP_3_CL_H
|