2022-10-20 11:30:15 -04:00
|
|
|
/*
|
|
|
|
* Created by Brett Terpstra 6920201 on 14/10/22.
|
|
|
|
* Copyright (c) Brett Terpstra 2022 All Rights Reserved
|
|
|
|
*/
|
|
|
|
|
|
|
|
#ifndef STEP_2_VECTORS_H
|
|
|
|
#define STEP_2_VECTORS_H
|
|
|
|
|
|
|
|
// AVX512 isn't supported on my CPU. We will use AVX2 since it is supported by most modern CPUs
|
2022-10-23 23:46:12 -04:00
|
|
|
#include "config.h"
|
2022-11-13 02:15:12 -05:00
|
|
|
//#include <glm/glm.hpp>
|
|
|
|
//#include <glm/gtc/matrix_transform.hpp>
|
|
|
|
//#include <glm/gtc/type_ptr.hpp>
|
2022-10-20 18:02:48 -04:00
|
|
|
|
|
|
|
// I have tested this and when in release mode the O3 optimizations are capable of creating
|
|
|
|
// far better auto-vectorized results. See the table below for more info.
|
|
|
|
// but in debug mode using the AVX instructions is far better. As they say, never try to out optimize the compiler - you'll lose.
|
|
|
|
|
|
|
|
// in debug mode:
|
|
|
|
// multiplication
|
|
|
|
// 2174.43ms normal
|
|
|
|
// 1483.04ms avx
|
|
|
|
// division
|
|
|
|
// 2282.44ms normal
|
|
|
|
// 1627ms avx
|
|
|
|
// addition
|
|
|
|
// 2119.4ms normal
|
|
|
|
// 1495.77ms avx
|
|
|
|
// dot
|
|
|
|
// 1447.9ms normal
|
|
|
|
// 1088.5ms avx
|
|
|
|
// cross
|
|
|
|
// 2840.69ms normal
|
|
|
|
// 2543.66ms avx
|
|
|
|
|
|
|
|
// with release mode
|
|
|
|
// cross
|
|
|
|
// 244.144ms normal
|
|
|
|
// 283.516ms avx
|
|
|
|
// dot
|
|
|
|
// 239.759ms normal
|
|
|
|
// 385.583ms avx
|
|
|
|
// mul
|
|
|
|
// 70.9977ms normal
|
|
|
|
// 286.656ms avx
|
|
|
|
#ifdef COMPILER_DEBUG_ENABLED
|
|
|
|
#define USE_SIMD_CPU
|
|
|
|
#endif
|
2022-10-20 11:30:15 -04:00
|
|
|
|
|
|
|
#ifdef USE_SIMD_CPU
|
|
|
|
|
|
|
|
#include <immintrin.h>
|
|
|
|
|
|
|
|
#endif
|
|
|
|
|
|
|
|
#include <cmath>
|
2022-10-23 23:46:12 -04:00
|
|
|
#include "engine/util/std.h"
|
2022-10-20 11:30:15 -04:00
|
|
|
|
|
|
|
namespace Raytracing {
|
|
|
|
|
|
|
|
// when running on the CPU it's fine to be a double
|
|
|
|
// Your CPU may be faster with floats.
|
|
|
|
// but if we move to the GPU it has to be a float.
|
|
|
|
// since GPUs generally are far more optimized for floats
|
|
|
|
// If using AVX or other SIMD instructions it should be double, only to fit into 256bits.
|
|
|
|
// TODO would be to add support for 128bit AVX vectors.
|
|
|
|
|
|
|
|
#ifdef USE_SIMD_CPU
|
|
|
|
// don't change this. (working on a float version)
|
|
|
|
typedef double PRECISION_TYPE;
|
|
|
|
|
|
|
|
union AVXConvert {
|
|
|
|
struct {
|
|
|
|
double _x, _y, _z, _w;
|
|
|
|
};
|
|
|
|
__m256d avxData;
|
|
|
|
};
|
|
|
|
|
|
|
|
class Vec4 {
|
|
|
|
private:
|
|
|
|
// makes it easy to convert between AVX and double data types.
|
|
|
|
union {
|
|
|
|
struct {
|
|
|
|
PRECISION_TYPE _x{}, _y{}, _z{}, _w{};
|
|
|
|
//PRECISION_TYPE _w, _z, _y, _x;
|
|
|
|
};
|
|
|
|
__m256d avxData;
|
|
|
|
};
|
|
|
|
|
|
|
|
// finally a use for friend!
|
|
|
|
friend Vec4 operator+(const Vec4& left, const Vec4& right);
|
|
|
|
friend Vec4 operator-(const Vec4& left, const Vec4& right);
|
|
|
|
friend Vec4 operator*(const Vec4& left, const Vec4& right);
|
|
|
|
friend Vec4 operator/(const Vec4& left, const Vec4& right);
|
2022-10-20 18:02:48 -04:00
|
|
|
friend Vec4 operator*(PRECISION_TYPE c, const Vec4& v);
|
|
|
|
friend Vec4 operator*(const Vec4& v, PRECISION_TYPE c);
|
|
|
|
friend Vec4 operator/(const Vec4& v, PRECISION_TYPE c);
|
|
|
|
friend Vec4 operator/(PRECISION_TYPE c, const Vec4& v);
|
2022-10-20 11:30:15 -04:00
|
|
|
public:
|
|
|
|
|
|
|
|
Vec4(): avxData(_mm256_setzero_pd()) {}
|
2022-10-20 18:02:48 -04:00
|
|
|
explicit Vec4(const __m256d& data): avxData(data) {}
|
2022-10-20 11:30:15 -04:00
|
|
|
Vec4(PRECISION_TYPE x, PRECISION_TYPE y, PRECISION_TYPE z): avxData(_mm256_setr_pd(x, y, z, 0.0)) {
|
|
|
|
//tlog << x << ":" << _x << " " << y << ":" << _y << " " << z << ":" << _z << "\n";
|
|
|
|
}
|
|
|
|
Vec4(PRECISION_TYPE x, PRECISION_TYPE y, PRECISION_TYPE z, PRECISION_TYPE w): avxData(_mm256_setr_pd(x, y, z, w)) {
|
|
|
|
//dlog << x << ":" << _x << " " << y << ":" << _y << " " << z << ":" << _z << "\n";
|
|
|
|
}
|
|
|
|
Vec4(const Vec4& vec): avxData(_mm256_setr_pd(vec.x(), vec.y(), vec.z(), vec.w())) {
|
|
|
|
//ilog << vec.x() << ":" << _x << " " << vec.y() << ":" << _y << " " << vec.z() << ":" << _z << "\n";
|
|
|
|
}
|
|
|
|
|
|
|
|
// most of the modern c++ here is because clang tidy was annoying me
|
|
|
|
[[nodiscard]] inline PRECISION_TYPE x() const { return _x; }
|
|
|
|
|
|
|
|
[[nodiscard]] inline PRECISION_TYPE y() const { return _y; }
|
|
|
|
|
|
|
|
[[nodiscard]] inline PRECISION_TYPE z() const { return _z; }
|
|
|
|
|
|
|
|
[[nodiscard]] inline PRECISION_TYPE w() const { return _w; }
|
|
|
|
|
|
|
|
[[nodiscard]] inline PRECISION_TYPE r() const { return _x; }
|
|
|
|
|
|
|
|
[[nodiscard]] inline PRECISION_TYPE g() const { return _y; }
|
|
|
|
|
|
|
|
[[nodiscard]] inline PRECISION_TYPE b() const { return _z; }
|
|
|
|
|
|
|
|
[[nodiscard]] inline PRECISION_TYPE a() const { return _w; }
|
|
|
|
|
2022-10-20 18:02:48 -04:00
|
|
|
static inline __m256d getVecFromValue(PRECISION_TYPE c) {
|
|
|
|
return _mm256_set1_pd(c);
|
|
|
|
}
|
|
|
|
|
2022-10-20 11:30:15 -04:00
|
|
|
// negation operator
|
2022-10-20 18:02:48 -04:00
|
|
|
Vec4 operator-() const {
|
|
|
|
return Vec4{_mm256_mul_pd(getVecFromValue(-1), this->avxData)};
|
|
|
|
}
|
2022-10-20 11:30:15 -04:00
|
|
|
|
|
|
|
[[nodiscard]] inline PRECISION_TYPE magnitude() const {
|
|
|
|
return sqrt(lengthSquared());
|
|
|
|
}
|
|
|
|
|
|
|
|
[[nodiscard]] inline PRECISION_TYPE lengthSquared() const {
|
2022-10-20 18:02:48 -04:00
|
|
|
return dot(*this, *this);
|
2022-10-20 11:30:15 -04:00
|
|
|
}
|
|
|
|
|
|
|
|
// returns the unit-vector.
|
|
|
|
[[nodiscard]] inline Vec4 normalize() const {
|
2022-10-20 18:02:48 -04:00
|
|
|
return Vec4{_mm256_div_pd(avxData, getVecFromValue(magnitude()))};
|
2022-10-20 11:30:15 -04:00
|
|
|
}
|
|
|
|
|
|
|
|
// add operator before the vec returns the magnitude
|
|
|
|
PRECISION_TYPE operator+() const {
|
|
|
|
return magnitude();
|
|
|
|
}
|
|
|
|
|
|
|
|
// preforms the dot product of left * right
|
|
|
|
static inline PRECISION_TYPE dot(const Vec4& left, const Vec4& right) {
|
|
|
|
// multiply the elements of the vectors
|
|
|
|
__m256d mul = _mm256_mul_pd(left.avxData, right.avxData);
|
|
|
|
// horizontal add. element 0 and 2 (or 1 and 3) contain the results which we must scalar add.
|
|
|
|
__m256d sum = _mm256_hadd_pd(mul, mul);
|
|
|
|
AVXConvert conv {};
|
|
|
|
conv.avxData = sum;
|
|
|
|
// boom! dot product. much easier than cross
|
|
|
|
return conv._x + conv._z;
|
|
|
|
}
|
|
|
|
|
|
|
|
// preforms the cross product of left X right
|
|
|
|
// since a general solution to the cross product doesn't exist in 4d
|
|
|
|
// we are going to ignore the w.
|
|
|
|
static inline Vec4 cross(const Vec4& left, const Vec4& right) {
|
|
|
|
// shuffle left values for alignment with the cross algorithm
|
|
|
|
// (read the shuffle selector from right to left) takes the y and places it in the first element of the resultant vector
|
|
|
|
// takes the z and places it in the second element of the vector
|
|
|
|
// takes the x element and places it in the 3rd element of the vector
|
|
|
|
// and then the w element in the last element of the vector
|
|
|
|
// creating the alignment {left.y(), left.z(), left.x(), left.w()} (as seen in the cross algorithm
|
|
|
|
__m256d leftLeftShuffle = _mm256_permute4x64_pd(left.avxData, _MM_SHUFFLE(3,0,2,1));
|
|
|
|
// same thing but produces {right.z(), right.x(), right.y(), right.w()}
|
|
|
|
__m256d rightLeftShuffle = _mm256_permute4x64_pd(right.avxData, _MM_SHUFFLE(3,1,0,2));
|
|
|
|
// now we have to do the right side multiplications
|
|
|
|
// {left.z(), left.x(), left.y(), left.w()}
|
|
|
|
__m256d leftRightShuffle = _mm256_permute4x64_pd(left.avxData, _MM_SHUFFLE(3,1,0,2));
|
|
|
|
// {right.y(), right.z(), right.x(), right.w()}
|
|
|
|
__m256d rightRightShuffle = _mm256_permute4x64_pd(right.avxData, _MM_SHUFFLE(3,0,2,1));
|
|
|
|
// multiply to do the first step of the cross process
|
|
|
|
__m256d multiLeft = _mm256_mul_pd(leftLeftShuffle, rightLeftShuffle);
|
|
|
|
// multiply the right sides of the subtraction sign
|
|
|
|
__m256d multiRight = _mm256_mul_pd(leftRightShuffle, rightRightShuffle);
|
|
|
|
// then subtract to produce the cross product
|
|
|
|
__m256d subs = _mm256_sub_pd(multiLeft, multiRight);
|
2022-10-20 18:02:48 -04:00
|
|
|
|
2022-10-20 11:30:15 -04:00
|
|
|
// yes this looks a lot more complicated, but it should be faster!
|
|
|
|
/*auto b = Vec4{left.y() * right.z() - left.z() * right.y(),
|
|
|
|
left.z() * right.x() - left.x() * right.z(),
|
|
|
|
left.x() * right.y() - left.y() * right.x()};
|
|
|
|
tlog << b._x << " " << b._y << " " << b._z << "\n";
|
|
|
|
tlog << conv._x << " " << conv._y << " " << conv._z << "\n\n";*/
|
2022-10-20 18:02:48 -04:00
|
|
|
return Vec4{subs};
|
2022-10-20 11:30:15 -04:00
|
|
|
}
|
|
|
|
};
|
|
|
|
|
|
|
|
// adds the two vectors left and right
|
|
|
|
inline Vec4 operator+(const Vec4& left, const Vec4& right) {
|
2022-10-20 18:02:48 -04:00
|
|
|
return Vec4{_mm256_add_pd(left.avxData, right.avxData)};
|
2022-10-20 11:30:15 -04:00
|
|
|
}
|
|
|
|
|
|
|
|
// subtracts the right vector from the left.
|
|
|
|
inline Vec4 operator-(const Vec4& left, const Vec4& right) {
|
2022-10-20 18:02:48 -04:00
|
|
|
return Vec4{_mm256_sub_pd(left.avxData, right.avxData)};
|
2022-10-20 11:30:15 -04:00
|
|
|
}
|
|
|
|
|
|
|
|
// multiples the left with the right
|
|
|
|
inline Vec4 operator*(const Vec4& left, const Vec4& right) {
|
|
|
|
//dlog << left._x << " " << left._y << " " << left._z << " " << left._w << "\n";
|
|
|
|
//dlog << right._x << " " << right._y << " " << right._z << " " << right._w << "\n";
|
|
|
|
//dlog << conv._x << " " << conv._y << " " << conv._z << " " << conv._w << "\n\n";
|
2022-10-20 18:02:48 -04:00
|
|
|
return Vec4{_mm256_mul_pd(left.avxData, right.avxData)};
|
2022-10-20 11:30:15 -04:00
|
|
|
}
|
|
|
|
|
|
|
|
// divides each element individually
|
|
|
|
inline Vec4 operator/(const Vec4& left, const Vec4& right) {
|
2022-10-20 18:02:48 -04:00
|
|
|
return Vec4{_mm256_div_pd(left.avxData, right.avxData)};
|
|
|
|
}
|
|
|
|
|
|
|
|
// multiplies the const c with each element in the vector v
|
|
|
|
inline Vec4 operator*(PRECISION_TYPE c, const Vec4& v) {
|
|
|
|
return Vec4{_mm256_mul_pd(Vec4::getVecFromValue(c), v.avxData)};
|
|
|
|
}
|
|
|
|
|
|
|
|
// same as above but for right sided constants
|
|
|
|
inline Vec4 operator*(const Vec4& v, PRECISION_TYPE c) {
|
|
|
|
return Vec4{_mm256_mul_pd(v.avxData, Vec4::getVecFromValue(c))};
|
|
|
|
}
|
|
|
|
|
|
|
|
// divides the vector by the constant c
|
|
|
|
inline Vec4 operator/(const Vec4& v, PRECISION_TYPE c) {
|
|
|
|
return Vec4{_mm256_div_pd(v.avxData, Vec4::getVecFromValue(c))};
|
|
|
|
}
|
|
|
|
|
|
|
|
// divides each element in the vector by over the constant
|
|
|
|
inline Vec4 operator/(PRECISION_TYPE c, const Vec4& v) {
|
|
|
|
return Vec4{_mm256_div_pd(Vec4::getVecFromValue(c), v.avxData)};
|
2022-10-20 11:30:15 -04:00
|
|
|
}
|
|
|
|
|
|
|
|
#else
|
|
|
|
// change this if you want
|
|
|
|
typedef double PRECISION_TYPE;
|
2022-10-23 23:46:12 -04:00
|
|
|
|
2022-10-20 11:30:15 -04:00
|
|
|
class Vec4 {
|
|
|
|
private:
|
|
|
|
union xType {
|
|
|
|
PRECISION_TYPE x;
|
|
|
|
PRECISION_TYPE r;
|
|
|
|
};
|
|
|
|
union yType {
|
|
|
|
PRECISION_TYPE y;
|
|
|
|
PRECISION_TYPE g;
|
|
|
|
};
|
|
|
|
union zType {
|
|
|
|
PRECISION_TYPE z;
|
|
|
|
PRECISION_TYPE b;
|
|
|
|
};
|
|
|
|
union wType {
|
|
|
|
PRECISION_TYPE w;
|
|
|
|
PRECISION_TYPE a;
|
|
|
|
};
|
2022-10-23 23:46:12 -04:00
|
|
|
|
2022-10-20 11:30:15 -04:00
|
|
|
struct valueType {
|
|
|
|
xType v1;
|
|
|
|
yType v2;
|
|
|
|
zType v3;
|
|
|
|
wType v4;
|
|
|
|
};
|
|
|
|
// isn't much of a reason to do it this way
|
|
|
|
// it's unlikely that we'll need to use the w component
|
|
|
|
// but it helps better line up with the GPU and other SIMD type instructions, like what's above.
|
|
|
|
valueType value;
|
|
|
|
public:
|
|
|
|
Vec4(): value{0, 0, 0, 0} {}
|
|
|
|
Vec4(PRECISION_TYPE x, PRECISION_TYPE y, PRECISION_TYPE z): value{x, y, z, 0} {}
|
|
|
|
Vec4(PRECISION_TYPE x, PRECISION_TYPE y, PRECISION_TYPE z, PRECISION_TYPE w): value{x, y, z, w} {}
|
|
|
|
Vec4(const Vec4& vec): value{vec.x(), vec.y(), vec.z(), vec.w()} {}
|
2022-10-23 23:46:12 -04:00
|
|
|
|
|
|
|
|
2022-10-20 11:30:15 -04:00
|
|
|
// most of the modern c++ here is because clang tidy was annoying me
|
|
|
|
[[nodiscard]] inline PRECISION_TYPE x() const { return value.v1.x; }
|
2022-10-23 23:46:12 -04:00
|
|
|
|
2022-10-20 11:30:15 -04:00
|
|
|
[[nodiscard]] inline PRECISION_TYPE y() const { return value.v2.y; }
|
2022-10-23 23:46:12 -04:00
|
|
|
|
2022-10-20 11:30:15 -04:00
|
|
|
[[nodiscard]] inline PRECISION_TYPE z() const { return value.v3.z; }
|
2022-10-23 23:46:12 -04:00
|
|
|
|
2022-10-20 11:30:15 -04:00
|
|
|
[[nodiscard]] inline PRECISION_TYPE w() const { return value.v4.w; }
|
2022-10-23 23:46:12 -04:00
|
|
|
|
2022-10-20 11:30:15 -04:00
|
|
|
[[nodiscard]] inline PRECISION_TYPE r() const { return value.v1.r; }
|
2022-10-23 23:46:12 -04:00
|
|
|
|
2022-10-20 11:30:15 -04:00
|
|
|
[[nodiscard]] inline PRECISION_TYPE g() const { return value.v2.g; }
|
2022-10-23 23:46:12 -04:00
|
|
|
|
2022-10-20 11:30:15 -04:00
|
|
|
[[nodiscard]] inline PRECISION_TYPE b() const { return value.v3.b; }
|
2022-10-23 23:46:12 -04:00
|
|
|
|
2022-10-20 11:30:15 -04:00
|
|
|
[[nodiscard]] inline PRECISION_TYPE a() const { return value.v4.a; }
|
2022-10-23 23:46:12 -04:00
|
|
|
|
2022-10-20 11:30:15 -04:00
|
|
|
// negation operator
|
|
|
|
Vec4 operator-() const { return {-x(), -y(), -z(), -w()}; }
|
2022-10-23 23:46:12 -04:00
|
|
|
|
2022-10-20 11:30:15 -04:00
|
|
|
[[nodiscard]] inline PRECISION_TYPE magnitude() const {
|
|
|
|
return sqrt(lengthSquared());
|
|
|
|
}
|
2022-10-23 23:46:12 -04:00
|
|
|
|
2022-10-20 11:30:15 -04:00
|
|
|
[[nodiscard]] inline PRECISION_TYPE lengthSquared() const {
|
|
|
|
return x() * x() + y() * y() + z() * z() + w() * w();
|
|
|
|
}
|
2022-10-23 23:46:12 -04:00
|
|
|
|
2022-10-20 11:30:15 -04:00
|
|
|
// returns the unit-vector.
|
|
|
|
[[nodiscard]] inline Vec4 normalize() const {
|
|
|
|
PRECISION_TYPE mag = magnitude();
|
|
|
|
return {x() / mag, y() / mag, z() / mag, w() / mag};
|
|
|
|
}
|
2022-10-23 23:46:12 -04:00
|
|
|
|
2022-10-20 11:30:15 -04:00
|
|
|
// add operator before the vec returns the magnitude
|
|
|
|
PRECISION_TYPE operator+() const {
|
|
|
|
return magnitude();
|
|
|
|
}
|
2022-10-23 23:46:12 -04:00
|
|
|
|
2022-10-20 11:30:15 -04:00
|
|
|
// preforms the dot product of left * right
|
|
|
|
static inline PRECISION_TYPE dot(const Vec4& left, const Vec4& right) {
|
|
|
|
return left.x() * right.x()
|
2022-10-23 23:46:12 -04:00
|
|
|
+ left.y() * right.y()
|
|
|
|
+ left.z() * right.z();
|
2022-10-20 11:30:15 -04:00
|
|
|
}
|
2022-10-23 23:46:12 -04:00
|
|
|
|
2022-10-20 11:30:15 -04:00
|
|
|
// preforms the cross product of left X right
|
|
|
|
// since a general solution to the cross product doesn't exist in 4d
|
|
|
|
// we are going to ignore the w.
|
|
|
|
static inline Vec4 cross(const Vec4& left, const Vec4& right) {
|
|
|
|
return {left.y() * right.z() - left.z() * right.y(),
|
|
|
|
left.z() * right.x() - left.x() * right.z(),
|
|
|
|
left.x() * right.y() - left.y() * right.x()};
|
|
|
|
}
|
2022-10-23 23:46:12 -04:00
|
|
|
|
2022-10-20 11:30:15 -04:00
|
|
|
};
|
|
|
|
|
|
|
|
// Utility Functions
|
2022-10-23 23:46:12 -04:00
|
|
|
|
2022-10-20 11:30:15 -04:00
|
|
|
// adds the two vectors left and right
|
|
|
|
inline Vec4 operator+(const Vec4& left, const Vec4& right) {
|
|
|
|
return {left.x() + right.x(), left.y() + right.y(), left.z() + right.z(), left.w() + right.w()};
|
|
|
|
}
|
2022-10-23 23:46:12 -04:00
|
|
|
|
2022-10-20 11:30:15 -04:00
|
|
|
// subtracts the right vector from the left.
|
|
|
|
inline Vec4 operator-(const Vec4& left, const Vec4& right) {
|
|
|
|
return {left.x() - right.x(), left.y() - right.y(), left.z() - right.z(), left.w() - right.w()};
|
|
|
|
}
|
2022-10-23 23:46:12 -04:00
|
|
|
|
2022-10-20 11:30:15 -04:00
|
|
|
// multiples the left with the right
|
|
|
|
inline Vec4 operator*(const Vec4& left, const Vec4& right) {
|
|
|
|
return {left.x() * right.x(), left.y() * right.y(), left.z() * right.z(), left.w() * right.w()};
|
|
|
|
}
|
2022-10-23 23:46:12 -04:00
|
|
|
|
2022-10-20 11:30:15 -04:00
|
|
|
// divides each element individually
|
|
|
|
inline Vec4 operator/(const Vec4& left, const Vec4& right) {
|
|
|
|
return {left.x() / right.x(), left.y() / right.y(), left.z() / right.z(), left.w() / right.w()};
|
|
|
|
}
|
|
|
|
|
|
|
|
// multiplies the const c with each element in the vector v
|
|
|
|
inline Vec4 operator*(const PRECISION_TYPE c, const Vec4& v) {
|
|
|
|
return {c * v.x(), c * v.y(), c * v.z(), c * v.w()};
|
|
|
|
}
|
|
|
|
|
|
|
|
// same as above but for right sided constants
|
|
|
|
inline Vec4 operator*(const Vec4& v, PRECISION_TYPE c) {
|
|
|
|
return c * v;
|
|
|
|
}
|
|
|
|
|
|
|
|
// divides the vector by the constant c
|
|
|
|
inline Vec4 operator/(const Vec4& v, PRECISION_TYPE c) {
|
|
|
|
return {v.x() / c, v.y() / c, v.z() / c, v.w() / c};
|
|
|
|
}
|
|
|
|
|
|
|
|
// divides each element in the vector by over the constant
|
|
|
|
inline Vec4 operator/(PRECISION_TYPE c, const Vec4& v) {
|
|
|
|
return {c / v.x(), c / v.y(), c / v.z(), c / v.w()};
|
|
|
|
}
|
|
|
|
|
2022-10-20 18:02:48 -04:00
|
|
|
#endif
|
|
|
|
|
|
|
|
// none of these can be vectorized with AVX instructions
|
|
|
|
|
|
|
|
// useful for printing out the vector to stdout
|
|
|
|
inline std::ostream& operator<<(std::ostream& out, const Vec4& v) {
|
|
|
|
return out << "Vec4{" << v.x() << ", " << v.y() << ", " << v.z() << ", " << v.w() << "} ";
|
|
|
|
}
|
|
|
|
|
2022-10-20 11:30:15 -04:00
|
|
|
class Ray {
|
|
|
|
private:
|
|
|
|
// the starting point for our ray
|
|
|
|
Vec4 start;
|
|
|
|
// and the direction it is currently traveling
|
|
|
|
Vec4 direction;
|
|
|
|
Vec4 inverseDirection;
|
|
|
|
public:
|
|
|
|
Ray(const Vec4& start, const Vec4& direction): start(start), direction(direction), inverseDirection(1 / direction) {}
|
|
|
|
|
|
|
|
[[nodiscard]] Vec4 getStartingPoint() const { return start; }
|
|
|
|
|
|
|
|
[[nodiscard]] Vec4 getDirection() const { return direction; }
|
|
|
|
|
|
|
|
// not always needed, but it's good to not have to calculate the inverse inside the intersection
|
|
|
|
// as that would be very every AABB, and that is expensive
|
|
|
|
[[nodiscard]] Vec4 getInverseDirection() const { return inverseDirection; }
|
|
|
|
|
|
|
|
// returns a point along the ray, extended away from start by the length.
|
|
|
|
[[nodiscard]] inline Vec4 along(PRECISION_TYPE length) const { return start + length * direction; }
|
|
|
|
|
|
|
|
};
|
|
|
|
|
|
|
|
inline std::ostream& operator<<(std::ostream& out, const Ray& v) {
|
|
|
|
return out << "Ray{" << v.getStartingPoint() << " " << v.getDirection() << "} ";
|
|
|
|
}
|
2022-10-23 23:46:12 -04:00
|
|
|
|
|
|
|
#ifdef USE_SIMD_CPU
|
|
|
|
|
|
|
|
#else
|
|
|
|
|
|
|
|
#endif
|
|
|
|
|
2022-10-24 00:40:31 -04:00
|
|
|
// only float supported because GPUs don't like doubles
|
|
|
|
// well they do but there isn't much of a reason to use them since this is for opengl
|
2022-10-23 23:46:12 -04:00
|
|
|
class Mat4x4 {
|
|
|
|
protected:
|
|
|
|
// 4x4 = 16
|
2022-11-07 00:29:12 -05:00
|
|
|
union dataType {
|
|
|
|
float single[16];
|
|
|
|
float dim[4][4];
|
|
|
|
};
|
|
|
|
dataType data {};
|
2022-10-23 23:46:12 -04:00
|
|
|
friend Mat4x4 operator+(const Mat4x4& left, const Mat4x4& right);
|
|
|
|
friend Mat4x4 operator-(const Mat4x4& left, const Mat4x4& right);
|
|
|
|
friend Mat4x4 operator*(const Mat4x4& left, const Mat4x4& right);
|
2022-10-24 00:40:31 -04:00
|
|
|
friend Mat4x4 operator*(float c, const Mat4x4& v);
|
|
|
|
friend Mat4x4 operator*(const Mat4x4& v, float c);
|
|
|
|
friend Mat4x4 operator/(const Mat4x4& v, float c);
|
|
|
|
friend Mat4x4 operator/(float c, const Mat4x4& v);
|
2022-10-23 23:46:12 -04:00
|
|
|
public:
|
|
|
|
Mat4x4() {
|
2022-11-07 00:29:12 -05:00
|
|
|
for (float & i : data.single) {
|
2022-10-23 23:46:12 -04:00
|
|
|
i = 0;
|
|
|
|
}
|
|
|
|
// set identity matrix default
|
|
|
|
m00(1);
|
|
|
|
m11(1);
|
|
|
|
m22(1);
|
|
|
|
m33(1);
|
|
|
|
}
|
2022-11-13 02:15:12 -05:00
|
|
|
/*explicit Mat4x4(glm::mat4x4 mat) {
|
2022-11-07 00:29:12 -05:00
|
|
|
m00(mat[0][0]);
|
|
|
|
m01(mat[1][0]);
|
|
|
|
m02(mat[2][0]);
|
|
|
|
m03(mat[3][0]);
|
|
|
|
|
|
|
|
m10(mat[0][1]);
|
|
|
|
m11(mat[1][1]);
|
|
|
|
m12(mat[2][1]);
|
|
|
|
m13(mat[3][1]);
|
|
|
|
|
|
|
|
m20(mat[0][2]);
|
|
|
|
m21(mat[1][2]);
|
|
|
|
m22(mat[2][2]);
|
|
|
|
m23(mat[3][2]);
|
|
|
|
|
|
|
|
m30(mat[0][3]);
|
|
|
|
m31(mat[1][3]);
|
|
|
|
m32(mat[2][3]);
|
|
|
|
m33(mat[3][3]);
|
2022-11-13 02:15:12 -05:00
|
|
|
}*/
|
2022-10-23 23:46:12 -04:00
|
|
|
Mat4x4(const Mat4x4& mat) {
|
2022-11-07 00:29:12 -05:00
|
|
|
for (int i = 0; i < 16; i++) {
|
|
|
|
data.single[i] = mat.data.single[i];
|
|
|
|
}
|
2022-10-23 23:46:12 -04:00
|
|
|
}
|
2022-10-24 00:40:31 -04:00
|
|
|
explicit Mat4x4(const float dat[16]) {
|
2022-11-07 00:29:12 -05:00
|
|
|
for (int i = 0; i < 16; i++) {
|
|
|
|
data.single[i] = dat[i];
|
|
|
|
}
|
2022-10-23 23:46:12 -04:00
|
|
|
}
|
|
|
|
|
2022-10-24 00:40:31 -04:00
|
|
|
inline Mat4x4& translate(float x, float y, float z) {
|
2022-10-23 23:46:12 -04:00
|
|
|
m03(x);
|
|
|
|
m13(y);
|
|
|
|
m23(z);
|
|
|
|
return *this;
|
|
|
|
}
|
|
|
|
|
|
|
|
inline Mat4x4& translate(const Vec4& vec) {
|
2022-10-31 00:51:51 -04:00
|
|
|
m03(float(vec.x()));
|
|
|
|
m13(float(vec.y()));
|
|
|
|
m23(float(vec.z()));
|
2022-10-23 23:46:12 -04:00
|
|
|
return *this;
|
|
|
|
}
|
|
|
|
|
2022-10-24 00:40:31 -04:00
|
|
|
inline Mat4x4& scale(float x, float y, float z) {
|
2022-10-23 23:46:12 -04:00
|
|
|
m00(x);
|
|
|
|
m11(y);
|
|
|
|
m22(z);
|
|
|
|
return *this;
|
|
|
|
}
|
|
|
|
|
|
|
|
inline Mat4x4& scale(const Vec4& vec) {
|
2022-10-31 00:51:51 -04:00
|
|
|
m00(float(vec.x()));
|
|
|
|
m11(float(vec.y()));
|
|
|
|
m22(float(vec.z()));
|
2022-10-23 23:46:12 -04:00
|
|
|
return *this;
|
|
|
|
}
|
|
|
|
|
2022-11-07 00:29:12 -05:00
|
|
|
float* ptr() {
|
|
|
|
return data.single;
|
2022-10-24 00:40:31 -04:00
|
|
|
}
|
|
|
|
|
2022-11-07 00:29:12 -05:00
|
|
|
Mat4x4& transpose() {
|
|
|
|
Mat4x4 copy {*this};
|
|
|
|
|
|
|
|
m00(copy.m00());
|
|
|
|
m01(copy.m10());
|
|
|
|
m02(copy.m20());
|
|
|
|
m03(copy.m30());
|
|
|
|
|
|
|
|
m10(copy.m01());
|
|
|
|
m11(copy.m11());
|
|
|
|
m12(copy.m21());
|
|
|
|
m13(copy.m31());
|
|
|
|
|
|
|
|
m20(copy.m02());
|
|
|
|
m21(copy.m12());
|
|
|
|
m22(copy.m22());
|
|
|
|
m23(copy.m32());
|
|
|
|
|
|
|
|
m30(copy.m03());
|
|
|
|
m31(copy.m13());
|
|
|
|
m32(copy.m23());
|
|
|
|
m33(copy.m33());
|
|
|
|
|
|
|
|
return *this;
|
|
|
|
}
|
|
|
|
|
|
|
|
// Due to the conversion between the 2d array -> 1d array we must transpose the values.
|
|
|
|
// the old system has been archived (commented) for future debugging
|
|
|
|
// [[nodiscard]] inline float m00() const { return data.dim[0][0]; }
|
|
|
|
// [[nodiscard]] inline float m10() const { return data.dim[1][0]; }
|
|
|
|
// [[nodiscard]] inline float m20() const { return data.dim[2][0]; }
|
|
|
|
// [[nodiscard]] inline float m30() const { return data.dim[3][0]; }
|
|
|
|
// [[nodiscard]] inline float m01() const { return data.dim[0][1]; }
|
|
|
|
// [[nodiscard]] inline float m11() const { return data.dim[1][1]; }
|
|
|
|
// [[nodiscard]] inline float m21() const { return data.dim[2][1]; }
|
|
|
|
// [[nodiscard]] inline float m31() const { return data.dim[3][1]; }
|
|
|
|
// [[nodiscard]] inline float m02() const { return data.dim[0][2]; }
|
|
|
|
// [[nodiscard]] inline float m12() const { return data.dim[1][2]; }
|
|
|
|
// [[nodiscard]] inline float m22() const { return data.dim[2][2]; }
|
|
|
|
// [[nodiscard]] inline float m32() const { return data.dim[3][2]; }
|
|
|
|
// [[nodiscard]] inline float m03() const { return data.dim[0][3]; }
|
|
|
|
// [[nodiscard]] inline float m13() const { return data.dim[1][3]; }
|
|
|
|
// [[nodiscard]] inline float m23() const { return data.dim[2][3]; }
|
|
|
|
// [[nodiscard]] inline float m33() const { return data.dim[3][3]; }
|
|
|
|
// [[nodiscard]] inline float m(int i, int j) const { return data.dim[i][j]; };
|
|
|
|
// inline float m00(float d) { return data.dim[0][0] = d; }
|
|
|
|
// inline float m10(float d) { return data.dim[1][0] = d; }
|
|
|
|
// inline float m20(float d) { return data.dim[2][0] = d; }
|
|
|
|
// inline float m30(float d) { return data.dim[3][0] = d; }
|
|
|
|
// inline float m01(float d) { return data.dim[0][1] = d; }
|
|
|
|
// inline float m11(float d) { return data.dim[1][1] = d; }
|
|
|
|
// inline float m21(float d) { return data.dim[2][1] = d; }
|
|
|
|
// inline float m31(float d) { return data.dim[3][1] = d; }
|
|
|
|
// inline float m02(float d) { return data.dim[0][2] = d; }
|
|
|
|
// inline float m12(float d) { return data.dim[1][2] = d; }
|
|
|
|
// inline float m22(float d) { return data.dim[2][2] = d; }
|
|
|
|
// inline float m32(float d) { return data.dim[3][2] = d; }
|
|
|
|
// inline float m03(float d) { return data.dim[0][3] = d; }
|
|
|
|
// inline float m13(float d) { return data.dim[1][3] = d; }
|
|
|
|
// inline float m23(float d) { return data.dim[2][3] = d; }
|
|
|
|
// inline float m33(float d) { return data.dim[3][3] = d; }
|
|
|
|
|
|
|
|
[[nodiscard]] inline float m00() const { return data.dim[0][0]; }
|
|
|
|
[[nodiscard]] inline float m10() const { return data.dim[0][1]; }
|
|
|
|
[[nodiscard]] inline float m20() const { return data.dim[0][2]; }
|
|
|
|
[[nodiscard]] inline float m30() const { return data.dim[0][3]; }
|
|
|
|
[[nodiscard]] inline float m01() const { return data.dim[1][0]; }
|
|
|
|
[[nodiscard]] inline float m11() const { return data.dim[1][1]; }
|
|
|
|
[[nodiscard]] inline float m21() const { return data.dim[1][2]; }
|
|
|
|
[[nodiscard]] inline float m31() const { return data.dim[1][3]; }
|
|
|
|
[[nodiscard]] inline float m02() const { return data.dim[2][0]; }
|
|
|
|
[[nodiscard]] inline float m12() const { return data.dim[2][1]; }
|
|
|
|
[[nodiscard]] inline float m22() const { return data.dim[2][2]; }
|
|
|
|
[[nodiscard]] inline float m32() const { return data.dim[2][3]; }
|
|
|
|
[[nodiscard]] inline float m03() const { return data.dim[3][0]; }
|
|
|
|
[[nodiscard]] inline float m13() const { return data.dim[3][1]; }
|
|
|
|
[[nodiscard]] inline float m23() const { return data.dim[3][2]; }
|
|
|
|
[[nodiscard]] inline float m33() const { return data.dim[3][3]; }
|
|
|
|
[[nodiscard]] inline float m(int i, int j) const { return data.dim[i][j]; };
|
|
|
|
inline float m00(float d) { return data.dim[0][0] = d; }
|
|
|
|
inline float m10(float d) { return data.dim[0][1] = d; }
|
|
|
|
inline float m20(float d) { return data.dim[0][2] = d; }
|
|
|
|
inline float m30(float d) { return data.dim[0][3] = d; }
|
|
|
|
inline float m01(float d) { return data.dim[1][0] = d; }
|
|
|
|
inline float m11(float d) { return data.dim[1][1] = d; }
|
|
|
|
inline float m21(float d) { return data.dim[1][2] = d; }
|
|
|
|
inline float m31(float d) { return data.dim[1][3] = d; }
|
|
|
|
inline float m02(float d) { return data.dim[2][0] = d; }
|
|
|
|
inline float m12(float d) { return data.dim[2][1] = d; }
|
|
|
|
inline float m22(float d) { return data.dim[2][2] = d; }
|
|
|
|
inline float m32(float d) { return data.dim[2][3] = d; }
|
|
|
|
inline float m03(float d) { return data.dim[3][0] = d; }
|
|
|
|
inline float m13(float d) { return data.dim[3][1] = d; }
|
|
|
|
inline float m23(float d) { return data.dim[3][2] = d; }
|
|
|
|
inline float m33(float d) { return data.dim[3][3] = d; }
|
|
|
|
inline float m(int i, int j, float d) { return data.dim[i][j] = d; };
|
2022-10-23 23:46:12 -04:00
|
|
|
};
|
|
|
|
|
|
|
|
// adds the two Mat4x4 left and right
|
|
|
|
inline Mat4x4 operator+(const Mat4x4& left, const Mat4x4& right) {
|
2022-10-24 00:40:31 -04:00
|
|
|
float data[16];
|
2022-10-23 23:46:12 -04:00
|
|
|
for (int i = 0; i < 16; i++)
|
2022-11-07 00:29:12 -05:00
|
|
|
data[i] = left.data.single[i] + right.data.single[i];
|
2022-10-23 23:46:12 -04:00
|
|
|
return Mat4x4{data};
|
|
|
|
}
|
|
|
|
|
|
|
|
// subtracts the right Mat4x4 from the left.
|
|
|
|
inline Mat4x4 operator-(const Mat4x4& left, const Mat4x4& right) {
|
2022-10-24 00:40:31 -04:00
|
|
|
float data[16];
|
2022-10-23 23:46:12 -04:00
|
|
|
for (int i = 0; i < 16; i++)
|
2022-11-07 00:29:12 -05:00
|
|
|
data[i] = left.data.single[i] - right.data.single[i];
|
2022-10-23 23:46:12 -04:00
|
|
|
return Mat4x4{data};
|
|
|
|
}
|
|
|
|
|
2022-11-07 00:29:12 -05:00
|
|
|
// since matrices are made identity by default, we need to create the result collector matrix without identity
|
|
|
|
// otherwise the diagonal will be 1 off and cause weird results (see black screen issue)
|
|
|
|
constexpr float emptyMatrix[16] = {0, 0, 0, 0,
|
|
|
|
0, 0, 0, 0,
|
|
|
|
0, 0, 0, 0,
|
|
|
|
0, 0, 0, 0};
|
|
|
|
|
2022-10-23 23:46:12 -04:00
|
|
|
// multiples the left with the right
|
|
|
|
inline Mat4x4 operator*(const Mat4x4& left, const Mat4x4& right) {
|
2022-11-07 00:29:12 -05:00
|
|
|
Mat4x4 mat{emptyMatrix};
|
2022-10-23 23:46:12 -04:00
|
|
|
|
|
|
|
// TODO: check avx with this??
|
|
|
|
for (int i = 0; i < 4; i++) {
|
|
|
|
for (int j = 0; j < 4; j++) {
|
|
|
|
for (int k = 0; k < 4; k++) {
|
|
|
|
mat.m(i, j, mat.m(i, j) + left.m(i, k) * right.m(k, j));
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
return mat;
|
|
|
|
}
|
|
|
|
|
|
|
|
// multiplies the const c with each element in the Mat4x4 v
|
2022-10-24 00:40:31 -04:00
|
|
|
inline Mat4x4 operator*(float c, const Mat4x4& v) {
|
2022-10-23 23:46:12 -04:00
|
|
|
Mat4x4 mat{};
|
|
|
|
|
|
|
|
for (int i = 0; i < 16; i++) {
|
2022-11-07 00:29:12 -05:00
|
|
|
mat.data.single[i] = c * v.data.single[i];
|
2022-10-23 23:46:12 -04:00
|
|
|
}
|
|
|
|
|
|
|
|
return mat;
|
|
|
|
}
|
|
|
|
|
|
|
|
// same as above but for right sided constants
|
2022-10-24 00:40:31 -04:00
|
|
|
inline Mat4x4 operator*(const Mat4x4& v, float c) {
|
2022-10-23 23:46:12 -04:00
|
|
|
Mat4x4 mat{};
|
|
|
|
|
|
|
|
for (int i = 0; i < 16; i++) {
|
2022-11-07 00:29:12 -05:00
|
|
|
mat.data.single[i] = v.data.single[i] * c;
|
2022-10-23 23:46:12 -04:00
|
|
|
}
|
|
|
|
|
|
|
|
return mat;
|
|
|
|
}
|
|
|
|
|
|
|
|
// divides the Mat4x4 by the constant c
|
2022-10-24 00:40:31 -04:00
|
|
|
inline Mat4x4 operator/(const Mat4x4& v, float c) {
|
2022-10-23 23:46:12 -04:00
|
|
|
Mat4x4 mat{};
|
|
|
|
|
|
|
|
for (int i = 0; i < 16; i++) {
|
2022-11-07 00:29:12 -05:00
|
|
|
mat.data.single[i] = v.data.single[i] / c;
|
2022-10-23 23:46:12 -04:00
|
|
|
}
|
|
|
|
|
|
|
|
return mat;
|
|
|
|
}
|
|
|
|
|
|
|
|
// divides each element in the Mat4x4 by over the constant
|
2022-10-24 00:40:31 -04:00
|
|
|
inline Mat4x4 operator/(float c, const Mat4x4& v) {
|
2022-10-23 23:46:12 -04:00
|
|
|
Mat4x4 mat{};
|
|
|
|
|
|
|
|
for (int i = 0; i < 16; i++) {
|
2022-11-07 00:29:12 -05:00
|
|
|
mat.data.single[i] = c / v.data.single[i];
|
2022-10-23 23:46:12 -04:00
|
|
|
}
|
|
|
|
|
|
|
|
return mat;
|
|
|
|
}
|
2022-10-20 11:30:15 -04:00
|
|
|
|
2022-11-07 00:29:12 -05:00
|
|
|
inline std::ostream& operator<<(std::ostream& out, const Mat4x4& v) {
|
|
|
|
return out << "\rMatrix4x4{" << v.m00() << ", " << v.m01() << ", " << v.m02() << ", " << v.m03() << "} \n"\
|
|
|
|
<< " {" << v.m10() << ", " << v.m11() << ", " << v.m12() << ", " << v.m13() << "} \n"\
|
|
|
|
<< " {" << v.m20() << ", " << v.m21() << ", " << v.m22() << ", " << v.m23() << "} \n"\
|
|
|
|
<< " {" << v.m30() << ", " << v.m31() << ", " << v.m32() << ", " << v.m33() << "} \n";
|
|
|
|
}
|
|
|
|
|
2022-10-23 23:46:12 -04:00
|
|
|
};
|
2022-10-20 11:30:15 -04:00
|
|
|
|
|
|
|
#endif //STEP_2_VECTORS_H
|