From bb695ab703457735947bede9eaab9e4e7165fb6d Mon Sep 17 00:00:00 2001
From: Brett <brettmaster1@gmail.com>
Date: Wed, 6 Mar 2024 20:35:54 -0500
Subject: [PATCH 1/4] page aligned allocators, if you are not using huge pages
 please disable, it'll provide slight performance improvement

---
 include/blt/std/allocator.h | 188 ++++++++++++++++++++++++++++--------
 1 file changed, 149 insertions(+), 39 deletions(-)

diff --git a/include/blt/std/allocator.h b/include/blt/std/allocator.h
index dd9071c..f3255ae 100644
--- a/include/blt/std/allocator.h
+++ b/include/blt/std/allocator.h
@@ -27,9 +27,11 @@
 #include <stdexcept>
 #include "logging.h"
 #include <cstdlib>
-
+    
     #ifdef __unix__
+        
         #include <sys/mman.h>
+    
     #endif
 
 namespace blt
@@ -537,18 +539,77 @@ namespace blt
             }
     };
     
-    template<blt::size_t BLOCK_SIZE = 4096 * 16>
+    template<blt::size_t BLOCK_SIZE = 4096 * 512, bool USE_HUGE = true, blt::size_t HUGE_PAGE_SIZE = 4096 * 512>
     class bump_allocator2
     {
             // power of two
-            static_assert(BLOCK_SIZE && ((BLOCK_SIZE & (BLOCK_SIZE - 1)) == 0));
+            static_assert(((BLOCK_SIZE & (BLOCK_SIZE - 1)) == 0) && "Must be a power of two!");
         private:
+            template<typename LOG_FUNC>
+            static void handle_mmap_error(LOG_FUNC func = BLT_ERROR_STREAM)
+            {
+#define BLT_WRITE(arg) func << arg << '\n';
+                switch (errno)
+                {
+                    case EACCES:
+                        BLT_WRITE("fd not set to open!");
+                        break;
+                    case EAGAIN:
+                        BLT_WRITE("The file has been locked, or too much memory has been locked");
+                        break;
+                    case EBADF:
+                        BLT_WRITE("fd is not a valid file descriptor");
+                        break;
+                    case EEXIST:
+                        BLT_WRITE("MAP_FIXED_NOREPLACE was specified in flags, and the range covered "
+                                  "by addr and length clashes with an existing mapping.");
+                        break;
+                    case EINVAL:
+                        BLT_WRITE("We don't like addr, length, or offset (e.g., they are too large, or not aligned on a page boundary).");
+                        BLT_WRITE("Or length was 0");
+                        BLT_WRITE("Or flags contained none of MAP_PRIVATE, MAP_SHARED, or MAP_SHARED_VALIDATE.");
+                        break;
+                    case ENFILE:
+                        BLT_WRITE("The system-wide limit on the total number of open files has been reached.");
+                        break;
+                    case ENODEV:
+                        BLT_WRITE("The underlying filesystem of the specified file does not support memory mapping.");
+                        break;
+                    case ENOMEM:
+                        BLT_WRITE("No memory is available.");
+                        BLT_WRITE("Or The process's maximum number of mappings would have been exceeded.  "
+                                  "This error can also occur for munmap(), when unmapping a region in the middle of an existing mapping, "
+                                  "since this results in two smaller mappings on either side of the region being unmapped.");
+                        BLT_WRITE("Or The process's RLIMIT_DATA limit, described in getrlimit(2), would have been exceeded.");
+                        BLT_WRITE("Or We don't like addr, because it exceeds the virtual address space of the CPU.");
+                        break;
+                    case EOVERFLOW:
+                        BLT_WRITE("On 32-bit architecture together with the large file extension (i.e., using 64-bit off_t): "
+                                  "the number of pages used for length plus number of "
+                                  "pages used for offset would overflow unsigned long (32 bits).");
+                        break;
+                    case EPERM:
+                        BLT_WRITE("The prot argument asks for PROT_EXEC but the mapped area "
+                                  "belongs to a file on a filesystem that was mounted no-exec.");
+                        BLT_WRITE("Or The operation was prevented by a file seal");
+                        BLT_WRITE("Or The MAP_HUGETLB flag was specified, but the caller "
+                                  "was not privileged (did not have the CAP_IPC_LOCK capability) "
+                                  "and is not a member of the sysctl_hugetlb_shm_group group; "
+                                  "see the description of /proc/sys/vm/sysctl_hugetlb_shm_group");
+                        break;
+                    case ETXTBSY:
+                        BLT_WRITE("MAP_DENYWRITE was set but the object specified by fd is open for writing.");
+                        break;
+                }
+            }
+            
             struct block
             {
                 struct
                 {
                     blt::size_t allocated_objects = 0;
                     block* next = nullptr;
+                    block* prev = nullptr;
                     blt::u8* offset = nullptr;
                 } metadata;
                 blt::u8 buffer[BLOCK_SIZE - sizeof(metadata)]{};
@@ -562,16 +623,46 @@ namespace blt
             block* base = nullptr;
             block* head = nullptr;
             
+            std::vector<block*> allocated_blocks;
+            
             block* allocate_block()
             {
-                auto* buffer = reinterpret_cast<block*>(std::aligned_alloc(BLOCK_SIZE, BLOCK_SIZE));
+                block* buffer;
+#ifdef __unix__
+                if constexpr (USE_HUGE)
+                {
+                    static_assert((BLOCK_SIZE & (HUGE_PAGE_SIZE - 1)) == 0 && "Must be multiple of the huge page size!");
+                    buffer = static_cast<block*>(mmap(nullptr, BLOCK_SIZE, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS | MAP_HUGETLB, -1, 0));
+                    // if we fail to allocate a huge page we can try to allocate normally
+                    if (buffer == MAP_FAILED)
+                    {
+                        BLT_WARN_STREAM << "We failed to allocate huge pages\n";
+                        handle_mmap_error(BLT_WARN_STREAM);
+                        buffer = static_cast<block*>(mmap(nullptr, BLOCK_SIZE * 2, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0));
+                        if (buffer == MAP_FAILED)
+                        {
+                            BLT_ERROR_STREAM << "Failed to allocate normal pages\n";
+                            handle_mmap_error(BLT_ERROR_STREAM);
+                            throw std::bad_alloc();
+                        }
+                        blt::size_t bytes = BLOCK_SIZE * 2;
+                        auto* ptr = static_cast<void*>(buffer);
+                        buffer = static_cast<block*>(std::align(BLOCK_SIZE, BLOCK_SIZE, ptr, bytes));
+                    }
+                } else
+                    buffer = reinterpret_cast<block*>(std::aligned_alloc(BLOCK_SIZE, BLOCK_SIZE));
+#else
+                buffer = reinterpret_cast<block*>(std::aligned_alloc(BLOCK_SIZE, BLOCK_SIZE));
+#endif
                 construct(buffer);
+                allocated_blocks.push_back(buffer);
                 return buffer;
             }
             
             void allocate_forward()
             {
                 auto* block = allocate_block();
+                block->metadata.prev = head;
                 head->metadata.next = block;
                 head = block;
             }
@@ -579,19 +670,28 @@ namespace blt
             template<typename T>
             T* allocate_back()
             {
-                size_t remaining_bytes = BLOCK_SIZE - static_cast<size_t>(head->metadata.offset - head->buffer);
-                
-//                auto& back = blocks.back();
-//                size_t remaining_bytes = size_ - static_cast<size_t>(back.offset - back.buffer);
-//                auto pointer = static_cast<void*>(back.offset);
-//                const auto aligned_address = std::align(alignof(T), sizeof(T), pointer, remaining_bytes);
-//                if (aligned_address != nullptr)
-//                {
-//                    back.offset = static_cast<blt::u8*>(aligned_address) + sizeof(T);
-//                    back.allocated_objects++;
-//                }
-//
-//                return static_cast<T*>(aligned_address);
+                blt::size_t remaining_bytes = BLOCK_SIZE - static_cast<blt::size_t>(head->metadata.offset - head->buffer);
+                auto pointer = static_cast<void*>(head->metadata.offset);
+                const auto aligned_address = std::align(alignof(T), sizeof(T), pointer, remaining_bytes);
+                if (aligned_address != nullptr)
+                {
+                    head->metadata.allocated_objects++;
+                    head->metadata.offset = static_cast<blt::u8*>(aligned_address) + sizeof(T);
+                }
+                return static_cast<T*>(aligned_address);
+            }
+            
+            inline void del(block* p)
+            {
+                if constexpr (USE_HUGE)
+                {
+                    if (munmap(p, BLOCK_SIZE))
+                    {
+                        BLT_ERROR_STREAM << "FAILED TO DEALLOCATE BLOCK\n";
+                        handle_mmap_error(BLT_ERROR_STREAM);
+                    }
+                }else
+                    free(p);
             }
         
         public:
@@ -600,16 +700,45 @@ namespace blt
                 base = head = allocate_block();
             };
             
+            explicit bump_allocator2(blt::size_t): bump_allocator2()
+            {}
+            
             template<typename T>
             [[nodiscard]] T* allocate()
             {
-            
+                if constexpr (sizeof(T) > BLOCK_SIZE)
+                    throw std::bad_alloc();
+                
+                if (T* ptr = allocate_back<T>(); ptr == nullptr)
+                    allocate_forward();
+                else
+                    return ptr;
+                
+                if (T* ptr = allocate_back<T>(); ptr == nullptr)
+                    throw std::bad_alloc();
+                else
+                    return ptr;
             }
             
             template<typename T>
             void deallocate(T* p)
             {
-            
+                if (p == nullptr)
+                    return;
+                //BLT_DEBUG(p);
+                auto* blk = reinterpret_cast<block*>(reinterpret_cast<std::uintptr_t>(p) & static_cast<std::uintptr_t>(~(BLOCK_SIZE - 1)));
+                //BLT_TRACE(blk);
+                //for (const auto& v : allocated_blocks)
+                //    BLT_INFO(" %p", v);
+                if (--blk->metadata.allocated_objects == 0)
+                {
+                    if (blk == base)
+                        base = allocate_block();
+                    if (blk->metadata.prev != nullptr)
+                        blk->metadata.prev->metadata.next = blk->metadata.next;
+                    
+                    del(blk);
+                }
             }
             
             template<typename T, typename... Args>
@@ -638,30 +767,11 @@ namespace blt
                 while (next != nullptr)
                 {
                     auto* after = next->metadata.next;
-                    free(next);
+                    del(next);
                     next = after;
                 }
             }
     };
-    
-    template<typename T>
-    class constexpr_allocator
-    {
-        public:
-            constexpr constexpr_allocator() = default;
-            
-            constexpr T* allocate(blt::size_t n)
-            {
-                return ::new T[n];
-            }
-            
-            constexpr void deallocate(T* t, blt::size_t)
-            {
-                ::delete[] t;
-            }
-            
-            BLT_CPP20_CONSTEXPR ~constexpr_allocator() = default;
-    };
 }
 
 #define BLT_ALLOCATOR_H

From 06892a3418b8340701a57393e725dc5379305897 Mon Sep 17 00:00:00 2001
From: Brett <brettmaster1@gmail.com>
Date: Wed, 6 Mar 2024 21:48:17 -0500
Subject: [PATCH 2/4] more alignment in the allocator

---
 include/blt/std/allocator.h | 17 ++++++++---------
 1 file changed, 8 insertions(+), 9 deletions(-)

diff --git a/include/blt/std/allocator.h b/include/blt/std/allocator.h
index f3255ae..3a879e0 100644
--- a/include/blt/std/allocator.h
+++ b/include/blt/std/allocator.h
@@ -623,8 +623,6 @@ namespace blt
             block* base = nullptr;
             block* head = nullptr;
             
-            std::vector<block*> allocated_blocks;
-            
             block* allocate_block()
             {
                 block* buffer;
@@ -638,16 +636,22 @@ namespace blt
                     {
                         BLT_WARN_STREAM << "We failed to allocate huge pages\n";
                         handle_mmap_error(BLT_WARN_STREAM);
-                        buffer = static_cast<block*>(mmap(nullptr, BLOCK_SIZE * 2, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0));
+                        BLT_WARN_STREAM << "\033[1;31mYou should attempt to enable "
+                                           "huge pages as this will allocate normal pages and double the memory usage!\033[22m\n";
+                        blt::size_t bytes = BLOCK_SIZE * 2;
+                        buffer = static_cast<block*>(mmap(nullptr, bytes, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0));
                         if (buffer == MAP_FAILED)
                         {
                             BLT_ERROR_STREAM << "Failed to allocate normal pages\n";
                             handle_mmap_error(BLT_ERROR_STREAM);
                             throw std::bad_alloc();
                         }
-                        blt::size_t bytes = BLOCK_SIZE * 2;
+                        if (((size_t)buffer & (HUGE_PAGE_SIZE - 1)) != 0)
+                            BLT_ERROR("Pointer is not aligned! %p", buffer);
                         auto* ptr = static_cast<void*>(buffer);
+                        auto ptr_size = reinterpret_cast<blt::size_t>(ptr);
                         buffer = static_cast<block*>(std::align(BLOCK_SIZE, BLOCK_SIZE, ptr, bytes));
+                        BLT_ERROR("Offset by %ld pages, resulting: %p", (reinterpret_cast<blt::size_t>(buffer) - ptr_size) / 4096, buffer);
                     }
                 } else
                     buffer = reinterpret_cast<block*>(std::aligned_alloc(BLOCK_SIZE, BLOCK_SIZE));
@@ -655,7 +659,6 @@ namespace blt
                 buffer = reinterpret_cast<block*>(std::aligned_alloc(BLOCK_SIZE, BLOCK_SIZE));
 #endif
                 construct(buffer);
-                allocated_blocks.push_back(buffer);
                 return buffer;
             }
             
@@ -725,11 +728,7 @@ namespace blt
             {
                 if (p == nullptr)
                     return;
-                //BLT_DEBUG(p);
                 auto* blk = reinterpret_cast<block*>(reinterpret_cast<std::uintptr_t>(p) & static_cast<std::uintptr_t>(~(BLOCK_SIZE - 1)));
-                //BLT_TRACE(blk);
-                //for (const auto& v : allocated_blocks)
-                //    BLT_INFO(" %p", v);
                 if (--blk->metadata.allocated_objects == 0)
                 {
                     if (blk == base)

From bfb7b04ce5408053ce26fa9fa916809aa2132726 Mon Sep 17 00:00:00 2001
From: Brett <brettmaster1@gmail.com>
Date: Wed, 6 Mar 2024 23:52:43 -0500
Subject: [PATCH 3/4] no more annoying warnings, just enable THB

---
 include/blt/std/allocator.h | 25 ++++++++++++++++---------
 1 file changed, 16 insertions(+), 9 deletions(-)

diff --git a/include/blt/std/allocator.h b/include/blt/std/allocator.h
index 3a879e0..3cb9c4e 100644
--- a/include/blt/std/allocator.h
+++ b/include/blt/std/allocator.h
@@ -539,7 +539,7 @@ namespace blt
             }
     };
     
-    template<blt::size_t BLOCK_SIZE = 4096 * 512, bool USE_HUGE = true, blt::size_t HUGE_PAGE_SIZE = 4096 * 512>
+    template<blt::size_t BLOCK_SIZE = 4096 * 512, bool USE_HUGE = true, blt::size_t HUGE_PAGE_SIZE = 4096 * 512, bool WARN_ON_FAIL = false>
     class bump_allocator2
     {
             // power of two
@@ -634,10 +634,13 @@ namespace blt
                     // if we fail to allocate a huge page we can try to allocate normally
                     if (buffer == MAP_FAILED)
                     {
-                        BLT_WARN_STREAM << "We failed to allocate huge pages\n";
-                        handle_mmap_error(BLT_WARN_STREAM);
-                        BLT_WARN_STREAM << "\033[1;31mYou should attempt to enable "
-                                           "huge pages as this will allocate normal pages and double the memory usage!\033[22m\n";
+                        if constexpr (WARN_ON_FAIL)
+                        {
+                            BLT_WARN_STREAM << "We failed to allocate huge pages\n";
+                            handle_mmap_error(BLT_WARN_STREAM);
+                            BLT_WARN_STREAM << "\033[1;31mYou should attempt to enable "
+                                               "huge pages as this will allocate normal pages and double the memory usage!\033[22m\n";
+                        }
                         blt::size_t bytes = BLOCK_SIZE * 2;
                         buffer = static_cast<block*>(mmap(nullptr, bytes, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0));
                         if (buffer == MAP_FAILED)
@@ -646,12 +649,16 @@ namespace blt
                             handle_mmap_error(BLT_ERROR_STREAM);
                             throw std::bad_alloc();
                         }
-                        if (((size_t)buffer & (HUGE_PAGE_SIZE - 1)) != 0)
-                            BLT_ERROR("Pointer is not aligned! %p", buffer);
+                        if constexpr (WARN_ON_FAIL)
+                        {
+                            if (((size_t) buffer & (HUGE_PAGE_SIZE - 1)) != 0)
+                                BLT_ERROR("Pointer is not aligned! %p", buffer);
+                        }
                         auto* ptr = static_cast<void*>(buffer);
                         auto ptr_size = reinterpret_cast<blt::size_t>(ptr);
                         buffer = static_cast<block*>(std::align(BLOCK_SIZE, BLOCK_SIZE, ptr, bytes));
-                        BLT_ERROR("Offset by %ld pages, resulting: %p", (reinterpret_cast<blt::size_t>(buffer) - ptr_size) / 4096, buffer);
+                        if constexpr (WARN_ON_FAIL)
+                            BLT_ERROR("Offset by %ld pages, resulting: %p", (reinterpret_cast<blt::size_t>(buffer) - ptr_size) / 4096, buffer);
                     }
                 } else
                     buffer = reinterpret_cast<block*>(std::aligned_alloc(BLOCK_SIZE, BLOCK_SIZE));
@@ -693,7 +700,7 @@ namespace blt
                         BLT_ERROR_STREAM << "FAILED TO DEALLOCATE BLOCK\n";
                         handle_mmap_error(BLT_ERROR_STREAM);
                     }
-                }else
+                } else
                     free(p);
             }
         

From 0b1e566217f969b4badf5a65646893bfac90fdcc Mon Sep 17 00:00:00 2001
From: Brett <brettmaster1@gmail.com>
Date: Wed, 6 Mar 2024 23:53:03 -0500
Subject: [PATCH 4/4] no default huge

---
 include/blt/std/allocator.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/include/blt/std/allocator.h b/include/blt/std/allocator.h
index 3cb9c4e..6b97757 100644
--- a/include/blt/std/allocator.h
+++ b/include/blt/std/allocator.h
@@ -539,7 +539,7 @@ namespace blt
             }
     };
     
-    template<blt::size_t BLOCK_SIZE = 4096 * 512, bool USE_HUGE = true, blt::size_t HUGE_PAGE_SIZE = 4096 * 512, bool WARN_ON_FAIL = false>
+    template<blt::size_t BLOCK_SIZE = 4096 * 512, bool USE_HUGE = false, blt::size_t HUGE_PAGE_SIZE = 4096 * 512, bool WARN_ON_FAIL = false>
     class bump_allocator2
     {
             // power of two