diff --git a/CMakeLists.txt b/CMakeLists.txt index 3c8a7c2..64dce66 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1,7 +1,7 @@ cmake_minimum_required(VERSION 3.20) include(cmake/color.cmake) -set(BLT_VERSION 0.16.16) +set(BLT_VERSION 0.16.17) set(BLT_TEST_VERSION 0.0.1) set(BLT_TARGET BLT) diff --git a/include/blt/math/matrix.h b/include/blt/math/matrix.h index cb470ef..526adbb 100644 --- a/include/blt/math/matrix.h +++ b/include/blt/math/matrix.h @@ -9,6 +9,7 @@ #include #include +#include #ifndef M_PI // MSVC does not have M_PI @@ -20,14 +21,16 @@ namespace blt class mat4x4 { + static_assert(std::is_trivially_copyable_v && "Vector must be trivially copyable!"); protected: // 4x4 = 16 - union dataType - { - float single[16]; - float dim[4][4]; - }; - dataType data{}; +// union dataType +// { +// float single[16]; +// float dim[4][4]; +// blt::vec4 v[4]; +// }; + blt::vec4 data[4]; friend mat4x4 operator+(const mat4x4& left, const mat4x4& right); @@ -44,10 +47,20 @@ namespace blt friend mat4x4 operator/(float c, const mat4x4& v); public: + static mat4x4 make_empty() + { + mat4x4 ret; + ret.m00(0); + ret.m11(0); + ret.m22(0); + ret.m33(0); + return ret; + } + mat4x4() { - for (float& i : data.single) - i = 0; +// for (float& i : data.single) +// i = 0; // set identity matrix default m00(1); m11(1); @@ -58,17 +71,21 @@ namespace blt mat4x4(const blt::vec4& c1, const blt::vec4& c2, const blt::vec4& c3, const blt::vec4& c4) { // dangerous? - std::memcpy(data.dim[0], c1.data(), 4 * sizeof(float)); - std::memcpy(data.dim[1], c2.data(), 4 * sizeof(float)); - std::memcpy(data.dim[2], c3.data(), 4 * sizeof(float)); - std::memcpy(data.dim[3], c4.data(), 4 * sizeof(float)); +// std::memcpy(data.dim[0], c1.data(), 4 * sizeof(float)); +// std::memcpy(data.dim[1], c2.data(), 4 * sizeof(float)); +// std::memcpy(data.dim[2], c3.data(), 4 * sizeof(float)); +// std::memcpy(data.dim[3], c4.data(), 4 * sizeof(float)); + data[0] = c1; + data[1] = c2; + data[2] = c3; + data[3] = c4; } mat4x4(const mat4x4& mat) { - for (int i = 0; i < 16; i++) + for (int i = 0; i < 4; i++) { - data.single[i] = mat.data.single[i]; + data[i] = mat.data[i]; } } @@ -76,19 +93,24 @@ namespace blt { if (© == this) return *this; - for (int i = 0; i < 16; i++) + for (int i = 0; i < 4; i++) { - data.single[i] = copy.data.single[i]; + data[i] = copy.data[i]; } return *this; } explicit mat4x4(const float dat[16]) { - for (int i = 0; i < 16; i++) - { - data.single[i] = dat[i]; - } + for (int i = 0; i < 4; i++) + for (int j = 0; j < 4; j++) + data[i][j] = dat[j + i * 4]; + } + + explicit mat4x4(const blt::vec4 dat[4]) + { + for (int i = 0; i < 4; i++) + data[i] = dat[i]; } inline mat4x4& translate(float x, float y, float z) @@ -203,63 +225,80 @@ namespace blt [[nodiscard]] mat4x4 adjugate() const { - mat4x4 ad; - ad.w11(w22() * w33() * w44() + w23() * w34() * w42() + w24() * w32() * w43() - - w24() * w33() * w42() - w23() * w32() * w44() - w22() * w34() * w43()); - ad.w12(w21() * w33() * w44() + w23() * w34() * w41() + w24() * w31() * w43() - - w24() * w33() * w41() - w23() * w31() * w44() - w21() * w34() * w43()); - ad.w13(w21() * w32() * w44() + w22() * w34() * w41() + w24() * w31() * w42() - - w24() * w32() * w41() - w22() * w31() * w44() - w21() * w34() * w42()); - ad.w14(w21() * w32() * w43() + w22() * w33() * w41() + w23() * w31() * w42() - - w23() * w32() * w41() - w22() * w31() * w43() - w21() * w33() * w42()); + auto& m = *this; + auto Coef00 = m[2][2] * m[3][3] - m[3][2] * m[2][3]; + auto Coef02 = m[1][2] * m[3][3] - m[3][2] * m[1][3]; + auto Coef03 = m[1][2] * m[2][3] - m[2][2] * m[1][3]; - ad.w21(w12() * w33() * w44() + w13() * w34() * w42() + w14() * w32() * w43() - - w14() * w33() * w42() - w13() * w32() * w44() - w12() * w34() * w43()); - ad.w22(w11() * w33() * w44() + w13() * w34() * w41() + w14() * w31() * w43() - - w14() * w33() * w41() - w13() * w31() * w44() - w11() * w34() * w43()); - ad.w23(w11() * w32() * w44() + w12() * w34() * w41() + w14() * w31() * w42() - - w14() * w32() * w41() - w12() * w31() * w44() - w11() * w34() * w42()); - ad.w24(w11() * w32() * w43() + w12() * w33() * w41() + w13() * w31() * w42() - - w13() * w32() * w41() - w12() * w31() * w43() - w11() * w33() * w42()); + auto Coef04 = m[2][1] * m[3][3] - m[3][1] * m[2][3]; + auto Coef06 = m[1][1] * m[3][3] - m[3][1] * m[1][3]; + auto Coef07 = m[1][1] * m[2][3] - m[2][1] * m[1][3]; - ad.w31(w12() * w23() * w44() + w13() * w24() * w42() + w14() * w22() * w43() - - w14() * w23() * w42() - w13() * w22() * w44() - w12() * w24() * w43()); - ad.w32(w11() * w23() * w44() + w13() * w24() * w41() + w14() * w21() * w43() - - w14() * w23() * w41() - w13() * w21() * w44() - w11() * w24() * w43()); - ad.w33(w11() * w22() * w44() + w12() * w24() * w41() + w14() * w21() * w42() - - w14() * w22() * w41() - w12() * w21() * w44() - w11() * w24() * w42()); - ad.w34(w11() * w22() * w43() + w12() * w23() * w41() + w13() * w21() * w42() - - w13() * w22() * w41() - w12() * w21() * w43() - w11() * w23() * w42()); + auto Coef08 = m[2][1] * m[3][2] - m[3][1] * m[2][2]; + auto Coef10 = m[1][1] * m[3][2] - m[3][1] * m[1][2]; + auto Coef11 = m[1][1] * m[2][2] - m[2][1] * m[1][2]; - ad.w41(w12() * w23() * w34() + w13() * w24() * w32() + w14() * w22() * w33() - - w14() * w23() * w32() - w13() * w22() * w34() - w12() * w24() * w33()); - ad.w42(w11() * w23() * w34() + w13() * w24() * w31() + w14() * w21() * w33() - - w14() * w23() * w31() - w13() * w21() * w34() - w11() * w24() * w33()); - ad.w43(w11() * w22() * w34() + w12() * w24() * w31() + w14() * w21() * w32() - - w14() * w22() * w31() - w12() * w21() * w34() - w11() * w24() * w32()); - ad.w44(w11() * w22() * w33() + w12() * w23() * w31() + w13() * w21() * w32() - - w13() * w22() * w31() - w12() * w21() * w33() - w11() * w23() * w32()); + auto Coef12 = m[2][0] * m[3][3] - m[3][0] * m[2][3]; + auto Coef14 = m[1][0] * m[3][3] - m[3][0] * m[1][3]; + auto Coef15 = m[1][0] * m[2][3] - m[2][0] * m[1][3]; - for (int i = 1; i <= 4; i++) - { - for (int j = 1; j <= 4; j++) - { - auto v = static_cast(std::pow(-1, j + i)); - ad.w(j, i, v * ad.w(j, i)); - } - } - return ad; + auto Coef16 = m[2][0] * m[3][2] - m[3][0] * m[2][2]; + auto Coef18 = m[1][0] * m[3][2] - m[3][0] * m[1][2]; + auto Coef19 = m[1][0] * m[2][2] - m[2][0] * m[1][2]; + + auto Coef20 = m[2][0] * m[3][1] - m[3][0] * m[2][1]; + auto Coef22 = m[1][0] * m[3][1] - m[3][0] * m[1][1]; + auto Coef23 = m[1][0] * m[2][1] - m[2][0] * m[1][1]; + + blt::vec4 Fac0(Coef00, Coef00, Coef02, Coef03); + blt::vec4 Fac1(Coef04, Coef04, Coef06, Coef07); + blt::vec4 Fac2(Coef08, Coef08, Coef10, Coef11); + blt::vec4 Fac3(Coef12, Coef12, Coef14, Coef15); + blt::vec4 Fac4(Coef16, Coef16, Coef18, Coef19); + blt::vec4 Fac5(Coef20, Coef20, Coef22, Coef23); + + blt::vec4 Vec0(m[1][0], m[0][0], m[0][0], m[0][0]); + blt::vec4 Vec1(m[1][1], m[0][1], m[0][1], m[0][1]); + blt::vec4 Vec2(m[1][2], m[0][2], m[0][2], m[0][2]); + blt::vec4 Vec3(m[1][3], m[0][3], m[0][3], m[0][3]); + + blt::vec4 Inv0(Vec1 * Fac0 - Vec2 * Fac1 + Vec3 * Fac2); + blt::vec4 Inv1(Vec0 * Fac0 - Vec2 * Fac3 + Vec3 * Fac4); + blt::vec4 Inv2(Vec0 * Fac1 - Vec1 * Fac3 + Vec3 * Fac5); + blt::vec4 Inv3(Vec0 * Fac2 - Vec1 * Fac4 + Vec2 * Fac5); + + blt::vec4 SignA(+1, -1, +1, -1); + blt::vec4 SignB(-1, +1, -1, +1); + return mat4x4(Inv0 * SignA, Inv1 * SignB, Inv2 * SignA, Inv3 * SignB); } [[nodiscard]] mat4x4 inverse() const { - auto ad = adjugate(); - auto d = 1 / determinant(); - return d * ad; + auto& m = *this; + auto Inverse = adjugate(); + + blt::vec4 Row0(Inverse[0][0], Inverse[1][0], Inverse[2][0], Inverse[3][0]); + + blt::vec4 Dot0(m[0] * Row0); + auto Dot1 = (Dot0.x() + Dot0.y()) + (Dot0.z() + Dot0.w()); + + auto OneOverDeterminant = 1.0f / Dot1; + + return Inverse * OneOverDeterminant; + } + + inline const blt::vec4& operator[](int column) const + { + return data[column]; + } + + inline blt::vec4& operator[](int column) + { + return data[column]; } [[nodiscard]] inline float m(int row, int column) const - { return data.single[row + column * 4]; }; + { return data[column][row]; }; [[nodiscard]] inline float m00() const { return m(0, 0); } @@ -310,7 +349,7 @@ namespace blt { return m(3, 3); } inline float m(int row, int column, float value) - { return data.single[row + column * 4] = value; }; + { return data[column][row] = value; }; inline float m00(float d) { return m(0, 0, d); } @@ -361,7 +400,7 @@ namespace blt { return m(3, 3, d); } [[nodiscard]] inline float w(int row, int column) const - { return data.single[(row - 1) + (column - 1) * 4]; }; + { return data[column - 1][row - 1]; }; [[nodiscard]] inline float w11() const { return m(0, 0); } @@ -412,7 +451,7 @@ namespace blt { return m(3, 3); } inline float w(int row, int column, float value) - { return data.single[(row - 1) + (column - 1) * 4] = value; }; + { return data[column - 1][row - 1] = value; }; inline float w11(float d) { return m(0, 0, d); } @@ -463,25 +502,25 @@ namespace blt { return m(3, 3, d); } inline float* ptr() - { return data.single; } + { return data[0].data(); } }; // adds the two mat4x4 left and right inline mat4x4 operator+(const mat4x4& left, const mat4x4& right) { - float data[16]; - for (int i = 0; i < 16; i++) - data[i] = left.data.single[i] + right.data.single[i]; - return mat4x4{data}; + mat4x4 ret = left; + for (int i = 0; i < 4; i++) + ret[i] += right.data[i]; + return ret; } // subtracts the right mat4x4 from the left. inline mat4x4 operator-(const mat4x4& left, const mat4x4& right) { - float data[16]; - for (int i = 0; i < 16; i++) - data[i] = left.data.single[i] - right.data.single[i]; - return mat4x4{data}; + mat4x4 ret = left; + for (int i = 0; i < 4; i++) + ret[i] -= right.data[i]; + return ret; } // since matrices are made identity by default, we need to create the result collector matrix without identity @@ -494,7 +533,7 @@ namespace blt // multiples the left with the right inline mat4x4 operator*(const mat4x4& left, const mat4x4& right) { - mat4x4 mat{emptyMatrix}; + mat4x4 mat = mat4x4::make_empty(); // TODO: check avx with this?? for (int i = 0; i < 4; i++) @@ -542,9 +581,9 @@ namespace blt { mat4x4 mat{}; - for (int i = 0; i < 16; i++) + for (int i = 0; i < 4; i++) { - mat.data.single[i] = c * v.data.single[i]; + mat.data[i] = c * v.data[i]; } return mat; @@ -555,9 +594,9 @@ namespace blt { mat4x4 mat{}; - for (int i = 0; i < 16; i++) + for (int i = 0; i < 4; i++) { - mat.data.single[i] = v.data.single[i] * c; + mat.data[i] = v.data[i] * c; } return mat; @@ -568,9 +607,9 @@ namespace blt { mat4x4 mat{}; - for (int i = 0; i < 16; i++) + for (int i = 0; i < 4; i++) { - mat.data.single[i] = v.data.single[i] / c; + mat.data[i] = v.data[i] / c; } return mat; @@ -581,9 +620,10 @@ namespace blt { mat4x4 mat{}; - for (int i = 0; i < 16; i++) + for (int i = 0; i < 4; i++) { - mat.data.single[i] = c / v.data.single[i]; + for (int j = 0; j < 4; j++) + mat.data[i][j] = c / v.data[i][j]; } return mat; @@ -594,7 +634,7 @@ namespace blt // http://www.songho.ca/opengl/gl_projectionmatrix.html static inline mat4x4 perspective(float fov, float aspect_ratio, float near, float far) { - mat4x4 perspectiveMat4x4{emptyMatrix}; + mat4x4 perspectiveMat4x4 = mat4x4::make_empty(); float halfTan = tanf(fov * 0.5f * (float) M_PI / 180.0f); perspectiveMat4x4.m00(float(1.0 / (aspect_ratio * halfTan))); @@ -608,7 +648,7 @@ namespace blt static inline mat4x4 ortho(float left, float right, float top, float bottom, float near, float far) { - mat4x4 perspectiveMat4x4{emptyMatrix}; + mat4x4 perspectiveMat4x4 = mat4x4::make_empty(); perspectiveMat4x4.m00(2 / (right - left)); perspectiveMat4x4.m11(2 / (top - bottom));