working compute shaders!

main
Brett 2023-04-02 14:52:03 -04:00
parent 686c966a77
commit 12b81f1363
11 changed files with 2999 additions and 34 deletions

View File

@ -13,6 +13,25 @@
#include <string>
#include <unordered_map>
/**
* This part was made for this assignment and will likely be used in future projects
*/
class compute_shader {
private:
GLuint shaderID = 0;
GLuint programID = 0;
public:
explicit compute_shader(const std::string& shader_source, bool loadAsString = true);
inline void bind() const {
glUseProgram(programID);
}
inline void execute(int x, int y, int z) const {
bind();
glDispatchCompute(x, y, z);
}
~compute_shader();
};
/**
* Note: This is taken from my final project,
* https://github.com/Tri11Paragon/COSC-3P98-Final-Project/blob/main/include/render/gl.h
@ -24,16 +43,16 @@ class shader {
GLint i = -1;
};
// we can have shaders of many types in OpenGL
unsigned int programID = 0;
GLuint programID = 0;
// but we will only make use of these two for now
unsigned int vertexShaderID = 0;
unsigned int fragmentShaderID = 0;
GLuint vertexShaderID = 0;
GLuint fragmentShaderID = 0;
// while these will remain unused. (Webgl2 apparently doesn't support them despite being based on GL4.3? that's a TODO!)
unsigned int geometryShaderID = 0;
GLuint geometryShaderID = 0;
// this would be very useful however it is highly unlikely webgl will support it
// im leaving some of this stuff in here because I might expand the native application to use some of it.
// im trying to keep the web and native versions the same though
unsigned int tessellationShaderID = 0;
GLuint tessellationShaderID = 0;
std::unordered_map<std::string, IntDefaultedToMinusOne> uniformVars;
static unsigned int createShader(const std::string& source, int type);
@ -48,18 +67,6 @@ class shader {
return loc;
}
static inline std::string removeEmptyFirstLines(const std::string& string){
auto lines = blt::string::split(string, "\n");
std::string new_source_string;
for (const auto& line : lines) {
if (!line.empty() && !blt::string::contains(line, "\"")) {
new_source_string += line;
new_source_string += "\n";
}
}
return new_source_string;
}
public:
/**
* Creates a shader
@ -120,12 +127,6 @@ class shader {
glUseProgram(programID);
}
static void updateProjectionMatrix(const blt::mat4x4& projectionMatrix);
static void updateOrthographicMatrix(const blt::mat4x4& orthoMatrix);
static void updateViewMatrix(const blt::mat4x4& viewMatrix);
// returns the perspective view matrix which is calculated per frame. (This is for optimization)
static const blt::mat4x4& getPVM();
~shader();
};

View File

@ -29,6 +29,10 @@ void init();
void cleanup();
void runPhysicsShader();
void beginExecution();
#endif
#endif //ASSIGN3_HIGH_PERF_H

View File

@ -2,13 +2,21 @@
#include <string>
std::string shader_frag = R"("
#version 460
precision mediump float;
in vec2 uv_;
in float index;
out vec4 out_color;
uniform mediump sampler2DArray texture_array;
void main() {
out_color = vec4(uv_, 0.0, 1.0);
//out_color = vec4(uv_, 0.0, 1.0);
out_color = texture(texture_array, vec3(uv_, index));
if (out_color.a < 0.1)
discard;
}
")";

View File

@ -0,0 +1,77 @@
#ifdef __cplusplus
#include <string>
std::string shader_physics = R"("
#version 460
// execute 1 shader per particle. 128 executions per group
layout (local_size_x = 128, local_size_y = 1, local_size_z = 1) in;
// due to byte alignment, storing pos and dir as vec4 was required anyways.
struct particle_t {
vec4 pos;
vec4 dir;
};
const int offset_size = 8192;
const float SPEED = 1.0f;
const float SPEED_FACTOR = 25.0f;
const float BOUNCE_FACTOR = 0.75f;
const float SPREAD = 4.5f;
const float particle_lifetime = 10.0f;
const vec2 p_min = vec2(-50, -50);
const vec2 p_max = vec2(50, 50);
const vec3 inital_pos = vec3(0.0f, 1.0f, 0.0f);
const vec4 inital_dir = vec4(0.0f, 1.0f, 0.0f, 0.0f);
const vec4 GRAVITY = vec4(0.0, -9.8, 0.0, 0.0);
layout (std430, binding=0) buffer particle_buffer {
particle_t particles[];
};
layout (std430, binding=1) buffer offset_buffer {
vec4 offsets[offset_size];
};
layout(location=0) uniform float deltaSeconds;
void resetParticle(uint i, float w) {
particles[i].dir = inital_dir * SPEED_FACTOR + offsets[i % offset_size] * SPREAD;
particles[i].pos = vec4(inital_pos, w);
}
bool checkBounds(vec2 pos) {
return pos.x > p_min.x && pos.y > p_min.y && pos.x < p_max.x && pos.y < p_max.y;
}
void main() {
uint i = gl_GlobalInvocationID.x;
vec4 pos = particles[i].pos;
vec4 dir = particles[i].dir;
dir.w += deltaSeconds;
if (dir.w > particle_lifetime) {
resetParticle(i, pos.w);
return;
}
pos += vec4(dir.xyz * SPEED * deltaSeconds, 0.0);
dir += vec4(GRAVITY.xyz * deltaSeconds, 0.0);
if (pos.y < 0 && checkBounds(pos.xy)) {
dir.y = -dir.y * BOUNCE_FACTOR;
pos.y = 0;
}
particles[i].dir = dir;
particles[i].pos = pos;
if (pos.y < -50)
resetParticle(i, pos.w);
}
")";
#endif

View File

@ -9,12 +9,14 @@ layout (location = 2) in vec4 pos;
layout (location = 3) in vec4 dir;
out vec2 uv_;
out float index;
uniform mat4 pvm;
void main() {
// passthough the UV (OpenGL interpolates this per fragment)
uv_ = uv;
index = pos.w;
// offset the vertex by the particle's position
gl_Position = pvm * vec4(vertex + pos.xyz, 1.0);
}

2634
include/stb_image_resize.h Normal file

File diff suppressed because it is too large Load Diff

@ -1 +1 @@
Subproject commit 289af1317141c53e04998b32760efced0956db57
Subproject commit 08c542658240b60d1221e3580f3c6b4a2f483919

View File

@ -12,6 +12,18 @@
* https://github.com/Tri11Paragon/COSC-3P98-Final-Project/blob/main/include/render/gl.h
*/
static inline std::string removeEmptyFirstLines(const std::string& string){
auto lines = blt::string::split(string, "\n");
std::string new_source_string;
for (const auto& line : lines) {
if (!line.empty() && !blt::string::contains(line, "\"")) {
new_source_string += line;
new_source_string += "\n";
}
}
return new_source_string;
}
unsigned int shader::createShader(const std::string& source, int type) {
const char* shader_code = source.c_str();
// creates a Shader
@ -154,3 +166,53 @@ shader::shader(shader&& move) noexcept {
// by setting the program ID to -1 we tell the shader it has been moved.
move.programID = -1;
}
/**
* This part was made for this assignment and will likely be used in future projects
*/
compute_shader::compute_shader(const std::string& shader_source, bool loadAsString) {
int status;
std::string source;
const char* c_source;
if (!loadAsString)
source = blt::fs::loadShaderFile(shader_source);
else
source = removeEmptyFirstLines(shader_source);
c_source = source.c_str();
shaderID = glCreateShader(GL_COMPUTE_SHADER);
glShaderSource(shaderID, 1, &c_source, NULL);
glCompileShader(shaderID);
glGetShaderiv(shaderID, GL_COMPILE_STATUS, &status);
if (!status){
int log_length = 0;
glGetShaderiv(shaderID, GL_INFO_LOG_LENGTH, &log_length);
blt::scoped_buffer<GLchar> infoLog{static_cast<unsigned long>(log_length + 1)};
glGetShaderInfoLog(shaderID, log_length + 1, nullptr, infoLog.buffer);
BLT_ERROR("Unable to compile compute shader! (%d)", log_length);
BLT_ERROR(infoLog.buffer);
}
programID = glCreateProgram();
glAttachShader(programID, shaderID);
glLinkProgram(programID);
glGetProgramiv(shaderID, GL_LINK_STATUS, &status);
if (!status){
int log_length = 0;
glGetProgramiv(programID, GL_INFO_LOG_LENGTH, &log_length);
blt::scoped_buffer<GLchar> infoLog{static_cast<unsigned long>(log_length + 1)};
glGetProgramInfoLog(programID, log_length + 1, nullptr, infoLog.buffer);
BLT_ERROR("Unable to link compute shader!");
BLT_ERROR(infoLog.buffer);
}
}
compute_shader::~compute_shader() {
glDeleteShader(shaderID);
}

View File

@ -3,6 +3,7 @@
* Licensed under GNU General Public License V3.0
* See LICENSE file for license detail
*/
#include <locale.h>
#include <high_perf/gl_util.h>
#include <modes/high_perf.h>
#include <util.h>
@ -10,6 +11,9 @@
#include "blt/std/memory.h"
#include <shaders/vertex.vert>
#include <shaders/fragment.frag>
#include <shaders/physics.comp>
#include <stb_image.h>
#include <stb_image_resize.h>
//static inline float degreesToRadian(float deg) {
// return deg * (float)M_PI / 180.0f;
@ -32,13 +36,26 @@ void window_resize(int width, int height) {
}
GLuint particleTranslationsVBO;
// -------{VBO}-------
GLuint particleTranslationsBuffer;
GLuint particleOffsetsBuffer;
GLuint verticesVBO;
GLuint uvsVBO;
GLuint indicesEBO;
// -------{VAO}-------
GLuint particleVAO;
const unsigned int particle_count = 25000000;
// -------{Textures}-------
GLuint textureArrayID;
const unsigned int TEXTURE_COUNT = 10;
const unsigned int TEXTURE_WIDTH = 512;
const unsigned int TEXTURE_HEIGHT = 512;
// -------{Particles}-------
const unsigned int particle_count = 128 * 10000;
const unsigned int offset_count = 8192;
// generally alignment to multiples of 4 floats helps performance, plus we can use that extra space for info we need.
typedef struct {
@ -69,19 +86,43 @@ blt::mat4x4 perspectiveMatrix;
blt::mat4x4 viewMatrix;
shader* instance_shader;
compute_shader* physics_shader;
void updateView() {
viewMatrix = createViewMatrix();
}
bool execute = false;
void beginExecution() {
execute = true;
}
void runPhysicsShader(){
if (!execute)
return;
physics_shader->bind();
glUniform1f(0, (float)((double) getDelta() / 1000000000.0));
glBindBuffer(GL_SHADER_STORAGE_BUFFER, particleTranslationsBuffer);
physics_shader->execute(particle_count / 128, 1, 1);
glMemoryBarrier(GL_SHADER_STORAGE_BARRIER_BIT);
}
void render() {
updateView();
perspectiveMatrix = blt::perspective(FOV, (float)WINDOW_WIDTH / (float)WINDOW_HEIGHT, 0.1f, 1000.0f);
auto pvm = perspectiveMatrix * viewMatrix;
runPhysicsShader();
instance_shader->bind();
instance_shader->setMatrix("pvm", pvm);
glActiveTexture(GL_TEXTURE0);
glBindTexture(GL_TEXTURE_2D_ARRAY, textureArrayID);
glBindVertexArray(particleVAO);
glDrawElementsInstanced(GL_TRIANGLES, 6, GL_UNSIGNED_INT, 0, particle_count);
glBindVertexArray(0);
@ -89,23 +130,65 @@ void render() {
}
void init() {
blt::scoped_buffer<particle_record> translations{particle_count};
blt::random<float> pos{-50.0, 50.0};
BLT_DEBUG("High performance subsystem init");
setlocale(LC_NUMERIC, "");
BLT_INFO("Using %'d particle count", particle_count);
BLT_INFO("Loading %d texture(s) of size (%d, %d)", TEXTURE_COUNT, TEXTURE_WIDTH, TEXTURE_HEIGHT);
BLT_TRACE("Checking system constants");
// number of work groups allowed (min: 65535)
GLint workGroupCountX;
GLint workGroupCountY;
GLint workGroupCountZ;
// max local size of the work groups (min: 1024, 1024, 64)
GLint workGroupSize;
// max number of work group invocations (min: 1024)
GLint workGroupInvocations;
glGetIntegeri_v(GL_MAX_COMPUTE_WORK_GROUP_COUNT, 0, &workGroupCountX);
glGetIntegeri_v(GL_MAX_COMPUTE_WORK_GROUP_COUNT, 1, &workGroupCountY);
glGetIntegeri_v(GL_MAX_COMPUTE_WORK_GROUP_COUNT, 2, &workGroupCountZ);
glGetIntegeri_v(GL_MAX_COMPUTE_WORK_GROUP_SIZE, 0, &workGroupSize);
glGetIntegeri_v(GL_MAX_COMPUTE_WORK_GROUP_INVOCATIONS, 0, &workGroupInvocations);
BLT_INFO("This system's OpenGL supports (%d, %d, %d) work groups", workGroupCountX, workGroupCountY, workGroupCountZ);
BLT_INFO("\tLocal group max total size: %d", workGroupSize);
BLT_INFO("\tMax work group invocations: %d", workGroupInvocations);
blt::scoped_buffer<particle_record> translations{particle_count};
blt::scoped_buffer<vec4> offsets{offset_count};
blt::random<float> dir{-1, 1};
blt::random<float> lifetime{0, 25};
BLT_TRACE("Creating particles");
for (int i = 0; i < particle_count; i++)
translations[i] = particle_record{vec4{pos.get(), pos.get() / 2, pos.get(), (float)(i % 10)}, vec4{0, 0, 0, 0}};
translations[i] = particle_record{vec4{0, 1, 0, (float)(i % 10)}, vec4{0, 1, 0, lifetime.get()}};
for (int i = 0; i < offset_count; i++) {
blt::vec2 v {dir.get(), dir.get()};
v = v.normalize();
offsets[i] = vec4{v[0], 0, v[1], 0};
}
// ----------------------------------
// Create OpenGL Objects
// ----------------------------------
BLT_TRACE("Creating OpenGL objects");
// create our VAO
glGenVertexArrays(1, &particleVAO);
// create our VBOs
glGenBuffers(1, &particleTranslationsVBO);
glGenBuffers(1, &particleTranslationsBuffer);
glGenBuffers(1, &particleOffsetsBuffer);
glGenBuffers(1, &verticesVBO);
glGenBuffers(1, &uvsVBO);
glGenBuffers(1, &indicesEBO);
// create our texture
glGenTextures(1, &textureArrayID);
// ----------------------------------
// Upload/Assign OpenGL Objects
// ----------------------------------
BLT_TRACE("Uploading VBO data and assigning to VAO");
glBindVertexArray(particleVAO);
// bind and upload vertices data to the GPU
@ -124,7 +207,7 @@ void init() {
glEnableVertexAttribArray(1);
int translations_size = sizeof(particle_record) * particle_count;
glBindBuffer(GL_ARRAY_BUFFER, particleTranslationsVBO);
glBindBuffer(GL_ARRAY_BUFFER, particleTranslationsBuffer);
glBufferData(GL_ARRAY_BUFFER, translations_size, translations.buffer, GL_DYNAMIC_DRAW); // allocate some memory on the GPU
glVertexAttribPointer(2, 4, GL_FLOAT, GL_FALSE, sizeof(particle_record), (void*) 0);
glVertexAttribPointer(3, 4, GL_FLOAT, GL_FALSE, sizeof(particle_record), (void*) offsetof(particle_record, dir));
@ -134,18 +217,108 @@ void init() {
glEnableVertexAttribArray(2);
glEnableVertexAttribArray(3);
// allow the particle buffer to be used in the computer shader!
glBindBuffer(GL_SHADER_STORAGE_BUFFER, particleTranslationsBuffer);
glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 0, particleTranslationsBuffer);
// generating random numbers on the GPU is hard, we can use enough precomputed random offsets to simulate real time randomness
glBindBuffer(GL_SHADER_STORAGE_BUFFER, particleOffsetsBuffer);
glBufferData(GL_SHADER_STORAGE_BUFFER, offset_count * sizeof(vec4), offsets.buffer, GL_STATIC_DRAW);
glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 1, particleOffsetsBuffer);
glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, indicesEBO);
glBufferData(GL_ELEMENT_ARRAY_BUFFER, sizeof(float) * 6, indices, GL_STATIC_DRAW);
// ----------------------------------
// Texturing
// ----------------------------------
BLT_TRACE("Creating array texture");
// based on my final project's texture array implementation
glBindTexture(GL_TEXTURE_2D_ARRAY, textureArrayID);
// allocate immutable storage for our textures
// we can change what is stored inside the texture, but we cannot change its size
// which is why we need to be specific here about the type of data we will be storing.
glTexStorage3D(GL_TEXTURE_2D_ARRAY, 4, GL_RGBA8, TEXTURE_WIDTH, TEXTURE_HEIGHT, TEXTURE_COUNT);
std::string texture_locations[TEXTURE_COUNT] = {
"wayland.png",
"SPONGEBOB_YOUTUBE.jpg",
"1618325873904.png",
"1665624414712991.jpg",
"stonks.png",
"yak.png",
"penguin.jpg",
"fFTkb.png",
"depression.png",
"1665624414712991.jpg"
};
constexpr int channel_count = 4;
int level = 0;
stbi_set_flip_vertically_on_load(true);
for (const std::string& texture_loc : texture_locations){
// load the texture
int width, height, channels;
auto* data = stbi_load(
(std::string("resources/") += texture_loc).c_str(), &width, &height,
&channels, channel_count
);
auto* resized_data = data;
// resize if necessary
if (width != TEXTURE_WIDTH || height != TEXTURE_HEIGHT){
// needs to be malloc since stbi_image_free is just free()
auto output_data = (unsigned char*) malloc(
TEXTURE_WIDTH * TEXTURE_HEIGHT * channel_count
);
if (stbir_resize_uint8(
// input
data, width, height, 0,
// output
output_data, TEXTURE_WIDTH, TEXTURE_HEIGHT, 0,
// channels
channel_count
)) {
BLT_WARN("Error resizing block texture image!");
}
stbi_image_free(data);
resized_data = output_data;
}
// upload image to the gpu
glTexSubImage3D(
GL_TEXTURE_2D_ARRAY, 0, 0, 0, level++, TEXTURE_WIDTH, TEXTURE_HEIGHT, 1,
GL_RGBA, GL_UNSIGNED_BYTE, resized_data
);
stbi_image_free(resized_data);
}
glTexParameteri(GL_TEXTURE_2D_ARRAY, GL_TEXTURE_WRAP_S, GL_REPEAT);
glTexParameteri(GL_TEXTURE_2D_ARRAY, GL_TEXTURE_WRAP_T, GL_REPEAT);
glTexParameteri(GL_TEXTURE_2D_ARRAY, GL_TEXTURE_MIN_FILTER, GL_LINEAR_MIPMAP_LINEAR);
glTexParameteri(GL_TEXTURE_2D_ARRAY, GL_TEXTURE_MAG_FILTER, GL_LINEAR_MIPMAP_LINEAR);
// Anisotropy helps preserve textures at oblique angles
float a = 0;
glGetFloatv(GL_MAX_TEXTURE_MAX_ANISOTROPY_EXT, &a);
glTexParameterf(GL_TEXTURE_2D_ARRAY, GL_TEXTURE_MAX_ANISOTROPY_EXT, a);
// mipmaps reduce resolution of textures as the distance to them increases
glGenerateMipmap(GL_TEXTURE_2D_ARRAY);
BLT_TRACE("Loading shaders");
instance_shader = new shader(shader_vert, shader_frag, "", true);
physics_shader = new compute_shader(shader_physics);
BLT_DEBUG("High performance subsystem init complete!");
}
void cleanup() {
// cleanup opengl resources
glDeleteVertexArrays(1, &particleVAO);
glDeleteBuffers(1, &particleTranslationsVBO);
glDeleteBuffers(1, &particleTranslationsBuffer);
glDeleteBuffers(1, &verticesVBO);
glDeleteBuffers(1, &uvsVBO);
glDeleteBuffers(1, &indicesEBO);

View File

@ -140,6 +140,8 @@ int main(int argc, char** argv) {
fountain->toggleSpray();
if (key == 't')
fountain->toggleTexRandomizer();
if (key == 'p')
beginExecution();
});
glutSpecialFunc([](int k, int x, int y) -> void {
cam.specialPress(k);

View File

@ -5,6 +5,8 @@
#include <texture.h>
#define STB_IMAGE_IMPLEMENTATION
#include <stb_image.h>
#define STB_IMAGE_RESIZE_IMPLEMENTATION
#include <stb_image_resize.h>
texture* loadTexture(const std::string& path) {