COSC-3P93-Project/Step 2/Submission/include/math/vectors.h

/*
 * Created by Brett Terpstra 6920201 on 14/10/22.
 * Copyright (c) Brett Terpstra 2022 All Rights Reserved
 */

#ifndef STEP_2_VECTORS_H
#define STEP_2_VECTORS_H

// AVX512 isn't supported on my CPU. We will use AVX2 since it is supported by most modern CPUs
// THIS IS CURRENTLY BROKEN. DO NOT USE. it's a great idea, but I've run out of time to debug this. Will be in step 3
//#define USE_SIMD_CPU

#ifdef USE_SIMD_CPU

    #include <immintrin.h>

#endif

#include <cmath>
#include "util/std.h"

namespace Raytracing {

    // when running on the CPU it's fine to be a double
    // Your CPU may be faster with floats.
    // but if we move to the GPU it has to be a float.
    // since GPUs generally are far more optimized for floats
    // If using AVX or other SIMD instructions it should be double, only to fit into 256bits.
    // TODO would be to add support for 128bit AVX vectors.

    #ifdef USE_SIMD_CPU
    // don't change this. (working on a float version)
    typedef double PRECISION_TYPE;

    union AVXConvert {
        struct {
            double _x, _y, _z, _w;
        };
        __m256d avxData;
    };

    class Vec4 {
        private:
            // makes it easy to convert between AVX and double data types.
            union {
                struct {
                    double _x, _y, _z, _w;
                };
                __m256d avxData;
            };

            // finally a use for friend!
            friend Vec4 operator+(const Vec4& left, const Vec4& right);
            friend Vec4 operator-(const Vec4& left, const Vec4& right);
            friend Vec4 operator*(const Vec4& left, const Vec4& right);
            friend Vec4 operator/(const Vec4& left, const Vec4& right);
        public:

            Vec4(): avxData{_mm256_set_pd(0.0, 0.0, 0.0, 0.0)} {}
            Vec4(PRECISION_TYPE x, PRECISION_TYPE y, PRECISION_TYPE z): avxData{_mm256_set_pd(x, y, z, 0)} {}
            Vec4(PRECISION_TYPE x, PRECISION_TYPE y, PRECISION_TYPE z, PRECISION_TYPE w): avxData{_mm256_set_pd(x, y, z, w)} {}
            Vec4(const Vec4& vec): avxData{_mm256_set_pd(vec.x(), vec.y(), vec.z(), vec.w())} {}

            // most of the modern c++ here is because clang tidy was annoying me
            [[nodiscard]] inline PRECISION_TYPE x() const { return _x; }

            [[nodiscard]] inline PRECISION_TYPE y() const { return _y; }

            [[nodiscard]] inline PRECISION_TYPE z() const { return _z; }

            [[nodiscard]] inline PRECISION_TYPE w() const { return _w; }

            [[nodiscard]] inline PRECISION_TYPE r() const { return _x; }

            [[nodiscard]] inline PRECISION_TYPE g() const { return _y; }

            [[nodiscard]] inline PRECISION_TYPE b() const { return _z; }

            [[nodiscard]] inline PRECISION_TYPE a() const { return _w; }

            // negation operator
            Vec4 operator-() const { return {-x(), -y(), -z(), -w()}; }

            [[nodiscard]] inline PRECISION_TYPE magnitude() const {
                return sqrt(lengthSquared());
            }

            [[nodiscard]] inline PRECISION_TYPE lengthSquared() const {
                return Vec4::dot(*this, *this);
            }

            // returns the unit-vector.
            [[nodiscard]] inline Vec4 normalize() const {
                PRECISION_TYPE mag = magnitude();
                return {x() / mag, y() / mag, z() / mag, w() / mag};
            }

            // add operator before the vec returns the magnitude
            PRECISION_TYPE operator+() const {
                return magnitude();
            }

            // preforms the dot product of left * right
            static inline PRECISION_TYPE dot(const Vec4& left, const Vec4& right) {
                // multiply the elements of the vectors
                __m256d mul = _mm256_mul_pd(left.avxData, right.avxData);
                // horizontal add. element 0 and 2 (or 1 and 3) contain the results which we must scalar add.
                __m256d sum = _mm256_hadd_pd(mul, mul);
                AVXConvert conv {};
                conv.avxData = sum;
                // boom! dot product. much easier than cross
                return conv._x + conv._y;
            }

            // preforms the cross product of left X right
            // since a general solution to the cross product doesn't exist in 4d
            // we are going to ignore the w.
            static inline Vec4 cross(const Vec4& left, const Vec4& right) {
                // shuffle left values for alignment with the cross algorithm
                // (read the shuffle selector from right to left) takes the y and places it in the first element of the resultant vector
                // takes the z and places it in the second element of the vector
                // takes the x element and places it in the 3rd element of the vector
                // and then the w element in the last element of the vector
                // creating the alignment {left.y(), left.z(), left.x(), left.w()} (as seen in the cross algorithm
                __m256d leftLeftShuffle = _mm256_shuffle_pd(left.avxData, left.avxData, _MM_SHUFFLE(3,0,2,1));
                // same thing but produces {right.z(), right.x(), right.y(), right.w()}
                __m256d rightLeftShuffle = _mm256_shuffle_pd(right.avxData, right.avxData, _MM_SHUFFLE(3,1,0,2));
                // multiply to do the first step of the cross process
                __m256d multiLeft = _mm256_mul_pd(leftLeftShuffle, rightLeftShuffle);
                // now we have to do the right side multiplications
                // {left.z(), left.x(), left.y(), left.w()}
                __m256d leftRightShuffle = _mm256_shuffle_pd(left.avxData, left.avxData, _MM_SHUFFLE(3,1,0,2));
                // {right.y(), right.z(), right.x(), right.w()}
                __m256d rightRightShuffle = _mm256_shuffle_pd(right.avxData, right.avxData, _MM_SHUFFLE(3,0,2,1));
                // multiply the right sides of the subtraction sign
                __m256d multiRight = _mm256_mul_pd(leftRightShuffle, rightRightShuffle);
                // then subtract to produce the cross product
                __m256d subs = _mm256_sub_pd(multiLeft, multiRight);
                // yes this looks a lot more complicated, but it should be faster!
                AVXConvert conv {};
                conv.avxData = subs;
                return {conv._x, conv._y, conv._z, conv._w};
            }
    };

    // adds the two vectors left and right
    inline Vec4 operator+(const Vec4& left, const Vec4& right) {
        __m256d added = _mm256_add_pd(left.avxData, right.avxData);
        AVXConvert conv {};
        conv.avxData = added;
        return {conv._x, conv._y, conv._z, conv._w};
    }

    // subtracts the right vector from the left.
    inline Vec4 operator-(const Vec4& left, const Vec4& right) {
        __m256d subbed = _mm256_sub_pd(left.avxData, right.avxData);
        AVXConvert conv {};
        conv.avxData = subbed;
        return {conv._x, conv._y, conv._z, conv._w};
    }

    // multiples the left with the right
    inline Vec4 operator*(const Vec4& left, const Vec4& right) {
        __m256d multiplied = _mm256_mul_pd(left.avxData, right.avxData);
        AVXConvert conv {};
        conv.avxData = multiplied;
        return {conv._x, conv._y, conv._z, conv._w};
    }

    // divides each element individually
    inline Vec4 operator/(const Vec4& left, const Vec4& right) {
        __m256d dived = _mm256_div_pd(left.avxData, right.avxData);
        AVXConvert conv {};
        conv.avxData = dived;
        return {conv._x, conv._y, conv._z, conv._w};
    }

    #else
    // change this if you want
    typedef double PRECISION_TYPE;

    class Vec4 {
        private:
            union xType {
                PRECISION_TYPE x;
                PRECISION_TYPE r;
            };
            union yType {
                PRECISION_TYPE y;
                PRECISION_TYPE g;
            };
            union zType {
                PRECISION_TYPE z;
                PRECISION_TYPE b;
            };
            union wType {
                PRECISION_TYPE w;
                PRECISION_TYPE a;
            };

            struct valueType {
                xType v1;
                yType v2;
                zType v3;
                wType v4;
            };
            // isn't much of a reason to do it this way
            // it's unlikely that we'll need to use the w component
            // but it helps better line up with the GPU and other SIMD type instructions, like what's above.
            valueType value;
        public:
            Vec4(): value{0, 0, 0, 0} {}
            Vec4(PRECISION_TYPE x, PRECISION_TYPE y, PRECISION_TYPE z): value{x, y, z, 0} {}
            Vec4(PRECISION_TYPE x, PRECISION_TYPE y, PRECISION_TYPE z, PRECISION_TYPE w): value{x, y, z, w} {}
            Vec4(const Vec4& vec): value{vec.x(), vec.y(), vec.z(), vec.w()} {}


            // most of the modern c++ here is because clang tidy was annoying me
            [[nodiscard]] inline PRECISION_TYPE x() const { return value.v1.x; }

            [[nodiscard]] inline PRECISION_TYPE y() const { return value.v2.y; }

            [[nodiscard]] inline PRECISION_TYPE z() const { return value.v3.z; }

            [[nodiscard]] inline PRECISION_TYPE w() const { return value.v4.w; }

            [[nodiscard]] inline PRECISION_TYPE r() const { return value.v1.r; }

            [[nodiscard]] inline PRECISION_TYPE g() const { return value.v2.g; }

            [[nodiscard]] inline PRECISION_TYPE b() const { return value.v3.b; }

            [[nodiscard]] inline PRECISION_TYPE a() const { return value.v4.a; }

            // negation operator
            Vec4 operator-() const { return {-x(), -y(), -z(), -w()}; }

            [[nodiscard]] inline PRECISION_TYPE magnitude() const {
                return sqrt(lengthSquared());
            }

            [[nodiscard]] inline PRECISION_TYPE lengthSquared() const {
                return x() * x() + y() * y() + z() * z() + w() * w();
            }

            // returns the unit-vector.
            [[nodiscard]] inline Vec4 normalize() const {
                PRECISION_TYPE mag = magnitude();
                return {x() / mag, y() / mag, z() / mag, w() / mag};
            }

            // add operator before the vec returns the magnitude
            PRECISION_TYPE operator+() const {
                return magnitude();
            }

            // preforms the dot product of left * right
            static inline PRECISION_TYPE dot(const Vec4& left, const Vec4& right) {
                return left.x() * right.x()
                    + left.y() * right.y()
                    + left.z() * right.z();
            }

            // preforms the cross product of left X right
            // since a general solution to the cross product doesn't exist in 4d
            // we are going to ignore the w.
            static inline Vec4 cross(const Vec4& left, const Vec4& right) {
                return {left.y() * right.z() - left.z() * right.y(),
                        left.z() * right.x() - left.x() * right.z(),
                        left.x() * right.y() - left.y() * right.x()};
            }

    };

// Utility Functions

    // adds the two vectors left and right
    inline Vec4 operator+(const Vec4& left, const Vec4& right) {
        return {left.x() + right.x(), left.y() + right.y(), left.z() + right.z(), left.w() + right.w()};
    }

    // subtracts the right vector from the left.
    inline Vec4 operator-(const Vec4& left, const Vec4& right) {
        return {left.x() - right.x(), left.y() - right.y(), left.z() - right.z(), left.w() - right.w()};
    }

    // multiples the left with the right
    inline Vec4 operator*(const Vec4& left, const Vec4& right) {
        return {left.x() * right.x(), left.y() * right.y(), left.z() * right.z(), left.w() * right.w()};
    }

    // divides each element individually
    inline Vec4 operator/(const Vec4& left, const Vec4& right) {
        return {left.x() / right.x(), left.y() / right.y(), left.z() / right.z(), left.w() / right.w()};
    }

    #endif

    // none of these can be vectorized with AVX instructions

    // useful for printing out the vector to stdout
    inline std::ostream& operator<<(std::ostream& out, const Vec4& v) {
        return out << "Vec4{" << v.x() << ", " << v.y() << ", " << v.z() << ", " << v.w() << "} ";
    }

    // multiplies the const c with each element in the vector v
    inline Vec4 operator*(const PRECISION_TYPE c, const Vec4& v) {
        return {c * v.x(), c * v.y(), c * v.z(), c * v.w()};
    }

    // same as above but for right sided constants
    inline Vec4 operator*(const Vec4& v, PRECISION_TYPE c) {
        return c * v;
    }

    // divides the vector by the constant c
    inline Vec4 operator/(const Vec4& v, PRECISION_TYPE c) {
        return {v.x() / c, v.y() / c, v.z() / c, v.w() / c};
    }

    // divides each element in the vector by over the constant
    inline Vec4 operator/(PRECISION_TYPE c, const Vec4& v) {
        return {c / v.x(), c / v.y(), c / v.z(), c / v.w()};
    }

    class Ray {
        private:
            // the starting point for our ray
            Vec4 start;
            // and the direction it is currently traveling
            Vec4 direction;
            Vec4 inverseDirection;
        public:
            Ray(const Vec4& start, const Vec4& direction): start(start), direction(direction), inverseDirection(1 / direction) {}

            [[nodiscard]] Vec4 getStartingPoint() const { return start; }

            [[nodiscard]] Vec4 getDirection() const { return direction; }

            // not always needed, but it's good to not have to calculate the inverse inside the intersection
            // as that would be very every AABB, and that is expensive
            [[nodiscard]] Vec4 getInverseDirection() const { return inverseDirection; }

            // returns a point along the ray, extended away from start by the length.
            [[nodiscard]] inline Vec4 along(PRECISION_TYPE length) const { return start + length * direction; }

    };

    inline std::ostream& operator<<(std::ostream& out, const Ray& v) {
        return out << "Ray{" << v.getStartingPoint() << " " << v.getDirection() << "} ";
    }

}

#endif //STEP_2_VECTORS_H