From bb695ab703457735947bede9eaab9e4e7165fb6d Mon Sep 17 00:00:00 2001 From: Brett <brettmaster1@gmail.com> Date: Wed, 6 Mar 2024 20:35:54 -0500 Subject: [PATCH 1/4] page aligned allocators, if you are not using huge pages please disable, it'll provide slight performance improvement --- include/blt/std/allocator.h | 188 ++++++++++++++++++++++++++++-------- 1 file changed, 149 insertions(+), 39 deletions(-) diff --git a/include/blt/std/allocator.h b/include/blt/std/allocator.h index dd9071c..f3255ae 100644 --- a/include/blt/std/allocator.h +++ b/include/blt/std/allocator.h @@ -27,9 +27,11 @@ #include <stdexcept> #include "logging.h" #include <cstdlib> - + #ifdef __unix__ + #include <sys/mman.h> + #endif namespace blt @@ -537,18 +539,77 @@ namespace blt } }; - template<blt::size_t BLOCK_SIZE = 4096 * 16> + template<blt::size_t BLOCK_SIZE = 4096 * 512, bool USE_HUGE = true, blt::size_t HUGE_PAGE_SIZE = 4096 * 512> class bump_allocator2 { // power of two - static_assert(BLOCK_SIZE && ((BLOCK_SIZE & (BLOCK_SIZE - 1)) == 0)); + static_assert(((BLOCK_SIZE & (BLOCK_SIZE - 1)) == 0) && "Must be a power of two!"); private: + template<typename LOG_FUNC> + static void handle_mmap_error(LOG_FUNC func = BLT_ERROR_STREAM) + { +#define BLT_WRITE(arg) func << arg << '\n'; + switch (errno) + { + case EACCES: + BLT_WRITE("fd not set to open!"); + break; + case EAGAIN: + BLT_WRITE("The file has been locked, or too much memory has been locked"); + break; + case EBADF: + BLT_WRITE("fd is not a valid file descriptor"); + break; + case EEXIST: + BLT_WRITE("MAP_FIXED_NOREPLACE was specified in flags, and the range covered " + "by addr and length clashes with an existing mapping."); + break; + case EINVAL: + BLT_WRITE("We don't like addr, length, or offset (e.g., they are too large, or not aligned on a page boundary)."); + BLT_WRITE("Or length was 0"); + BLT_WRITE("Or flags contained none of MAP_PRIVATE, MAP_SHARED, or MAP_SHARED_VALIDATE."); + break; + case ENFILE: + BLT_WRITE("The system-wide limit on the total number of open files has been reached."); + break; + case ENODEV: + BLT_WRITE("The underlying filesystem of the specified file does not support memory mapping."); + break; + case ENOMEM: + BLT_WRITE("No memory is available."); + BLT_WRITE("Or The process's maximum number of mappings would have been exceeded. " + "This error can also occur for munmap(), when unmapping a region in the middle of an existing mapping, " + "since this results in two smaller mappings on either side of the region being unmapped."); + BLT_WRITE("Or The process's RLIMIT_DATA limit, described in getrlimit(2), would have been exceeded."); + BLT_WRITE("Or We don't like addr, because it exceeds the virtual address space of the CPU."); + break; + case EOVERFLOW: + BLT_WRITE("On 32-bit architecture together with the large file extension (i.e., using 64-bit off_t): " + "the number of pages used for length plus number of " + "pages used for offset would overflow unsigned long (32 bits)."); + break; + case EPERM: + BLT_WRITE("The prot argument asks for PROT_EXEC but the mapped area " + "belongs to a file on a filesystem that was mounted no-exec."); + BLT_WRITE("Or The operation was prevented by a file seal"); + BLT_WRITE("Or The MAP_HUGETLB flag was specified, but the caller " + "was not privileged (did not have the CAP_IPC_LOCK capability) " + "and is not a member of the sysctl_hugetlb_shm_group group; " + "see the description of /proc/sys/vm/sysctl_hugetlb_shm_group"); + break; + case ETXTBSY: + BLT_WRITE("MAP_DENYWRITE was set but the object specified by fd is open for writing."); + break; + } + } + struct block { struct { blt::size_t allocated_objects = 0; block* next = nullptr; + block* prev = nullptr; blt::u8* offset = nullptr; } metadata; blt::u8 buffer[BLOCK_SIZE - sizeof(metadata)]{}; @@ -562,16 +623,46 @@ namespace blt block* base = nullptr; block* head = nullptr; + std::vector<block*> allocated_blocks; + block* allocate_block() { - auto* buffer = reinterpret_cast<block*>(std::aligned_alloc(BLOCK_SIZE, BLOCK_SIZE)); + block* buffer; +#ifdef __unix__ + if constexpr (USE_HUGE) + { + static_assert((BLOCK_SIZE & (HUGE_PAGE_SIZE - 1)) == 0 && "Must be multiple of the huge page size!"); + buffer = static_cast<block*>(mmap(nullptr, BLOCK_SIZE, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS | MAP_HUGETLB, -1, 0)); + // if we fail to allocate a huge page we can try to allocate normally + if (buffer == MAP_FAILED) + { + BLT_WARN_STREAM << "We failed to allocate huge pages\n"; + handle_mmap_error(BLT_WARN_STREAM); + buffer = static_cast<block*>(mmap(nullptr, BLOCK_SIZE * 2, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0)); + if (buffer == MAP_FAILED) + { + BLT_ERROR_STREAM << "Failed to allocate normal pages\n"; + handle_mmap_error(BLT_ERROR_STREAM); + throw std::bad_alloc(); + } + blt::size_t bytes = BLOCK_SIZE * 2; + auto* ptr = static_cast<void*>(buffer); + buffer = static_cast<block*>(std::align(BLOCK_SIZE, BLOCK_SIZE, ptr, bytes)); + } + } else + buffer = reinterpret_cast<block*>(std::aligned_alloc(BLOCK_SIZE, BLOCK_SIZE)); +#else + buffer = reinterpret_cast<block*>(std::aligned_alloc(BLOCK_SIZE, BLOCK_SIZE)); +#endif construct(buffer); + allocated_blocks.push_back(buffer); return buffer; } void allocate_forward() { auto* block = allocate_block(); + block->metadata.prev = head; head->metadata.next = block; head = block; } @@ -579,19 +670,28 @@ namespace blt template<typename T> T* allocate_back() { - size_t remaining_bytes = BLOCK_SIZE - static_cast<size_t>(head->metadata.offset - head->buffer); - -// auto& back = blocks.back(); -// size_t remaining_bytes = size_ - static_cast<size_t>(back.offset - back.buffer); -// auto pointer = static_cast<void*>(back.offset); -// const auto aligned_address = std::align(alignof(T), sizeof(T), pointer, remaining_bytes); -// if (aligned_address != nullptr) -// { -// back.offset = static_cast<blt::u8*>(aligned_address) + sizeof(T); -// back.allocated_objects++; -// } -// -// return static_cast<T*>(aligned_address); + blt::size_t remaining_bytes = BLOCK_SIZE - static_cast<blt::size_t>(head->metadata.offset - head->buffer); + auto pointer = static_cast<void*>(head->metadata.offset); + const auto aligned_address = std::align(alignof(T), sizeof(T), pointer, remaining_bytes); + if (aligned_address != nullptr) + { + head->metadata.allocated_objects++; + head->metadata.offset = static_cast<blt::u8*>(aligned_address) + sizeof(T); + } + return static_cast<T*>(aligned_address); + } + + inline void del(block* p) + { + if constexpr (USE_HUGE) + { + if (munmap(p, BLOCK_SIZE)) + { + BLT_ERROR_STREAM << "FAILED TO DEALLOCATE BLOCK\n"; + handle_mmap_error(BLT_ERROR_STREAM); + } + }else + free(p); } public: @@ -600,16 +700,45 @@ namespace blt base = head = allocate_block(); }; + explicit bump_allocator2(blt::size_t): bump_allocator2() + {} + template<typename T> [[nodiscard]] T* allocate() { - + if constexpr (sizeof(T) > BLOCK_SIZE) + throw std::bad_alloc(); + + if (T* ptr = allocate_back<T>(); ptr == nullptr) + allocate_forward(); + else + return ptr; + + if (T* ptr = allocate_back<T>(); ptr == nullptr) + throw std::bad_alloc(); + else + return ptr; } template<typename T> void deallocate(T* p) { - + if (p == nullptr) + return; + //BLT_DEBUG(p); + auto* blk = reinterpret_cast<block*>(reinterpret_cast<std::uintptr_t>(p) & static_cast<std::uintptr_t>(~(BLOCK_SIZE - 1))); + //BLT_TRACE(blk); + //for (const auto& v : allocated_blocks) + // BLT_INFO(" %p", v); + if (--blk->metadata.allocated_objects == 0) + { + if (blk == base) + base = allocate_block(); + if (blk->metadata.prev != nullptr) + blk->metadata.prev->metadata.next = blk->metadata.next; + + del(blk); + } } template<typename T, typename... Args> @@ -638,30 +767,11 @@ namespace blt while (next != nullptr) { auto* after = next->metadata.next; - free(next); + del(next); next = after; } } }; - - template<typename T> - class constexpr_allocator - { - public: - constexpr constexpr_allocator() = default; - - constexpr T* allocate(blt::size_t n) - { - return ::new T[n]; - } - - constexpr void deallocate(T* t, blt::size_t) - { - ::delete[] t; - } - - BLT_CPP20_CONSTEXPR ~constexpr_allocator() = default; - }; } #define BLT_ALLOCATOR_H From 06892a3418b8340701a57393e725dc5379305897 Mon Sep 17 00:00:00 2001 From: Brett <brettmaster1@gmail.com> Date: Wed, 6 Mar 2024 21:48:17 -0500 Subject: [PATCH 2/4] more alignment in the allocator --- include/blt/std/allocator.h | 17 ++++++++--------- 1 file changed, 8 insertions(+), 9 deletions(-) diff --git a/include/blt/std/allocator.h b/include/blt/std/allocator.h index f3255ae..3a879e0 100644 --- a/include/blt/std/allocator.h +++ b/include/blt/std/allocator.h @@ -623,8 +623,6 @@ namespace blt block* base = nullptr; block* head = nullptr; - std::vector<block*> allocated_blocks; - block* allocate_block() { block* buffer; @@ -638,16 +636,22 @@ namespace blt { BLT_WARN_STREAM << "We failed to allocate huge pages\n"; handle_mmap_error(BLT_WARN_STREAM); - buffer = static_cast<block*>(mmap(nullptr, BLOCK_SIZE * 2, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0)); + BLT_WARN_STREAM << "\033[1;31mYou should attempt to enable " + "huge pages as this will allocate normal pages and double the memory usage!\033[22m\n"; + blt::size_t bytes = BLOCK_SIZE * 2; + buffer = static_cast<block*>(mmap(nullptr, bytes, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0)); if (buffer == MAP_FAILED) { BLT_ERROR_STREAM << "Failed to allocate normal pages\n"; handle_mmap_error(BLT_ERROR_STREAM); throw std::bad_alloc(); } - blt::size_t bytes = BLOCK_SIZE * 2; + if (((size_t)buffer & (HUGE_PAGE_SIZE - 1)) != 0) + BLT_ERROR("Pointer is not aligned! %p", buffer); auto* ptr = static_cast<void*>(buffer); + auto ptr_size = reinterpret_cast<blt::size_t>(ptr); buffer = static_cast<block*>(std::align(BLOCK_SIZE, BLOCK_SIZE, ptr, bytes)); + BLT_ERROR("Offset by %ld pages, resulting: %p", (reinterpret_cast<blt::size_t>(buffer) - ptr_size) / 4096, buffer); } } else buffer = reinterpret_cast<block*>(std::aligned_alloc(BLOCK_SIZE, BLOCK_SIZE)); @@ -655,7 +659,6 @@ namespace blt buffer = reinterpret_cast<block*>(std::aligned_alloc(BLOCK_SIZE, BLOCK_SIZE)); #endif construct(buffer); - allocated_blocks.push_back(buffer); return buffer; } @@ -725,11 +728,7 @@ namespace blt { if (p == nullptr) return; - //BLT_DEBUG(p); auto* blk = reinterpret_cast<block*>(reinterpret_cast<std::uintptr_t>(p) & static_cast<std::uintptr_t>(~(BLOCK_SIZE - 1))); - //BLT_TRACE(blk); - //for (const auto& v : allocated_blocks) - // BLT_INFO(" %p", v); if (--blk->metadata.allocated_objects == 0) { if (blk == base) From bfb7b04ce5408053ce26fa9fa916809aa2132726 Mon Sep 17 00:00:00 2001 From: Brett <brettmaster1@gmail.com> Date: Wed, 6 Mar 2024 23:52:43 -0500 Subject: [PATCH 3/4] no more annoying warnings, just enable THB --- include/blt/std/allocator.h | 25 ++++++++++++++++--------- 1 file changed, 16 insertions(+), 9 deletions(-) diff --git a/include/blt/std/allocator.h b/include/blt/std/allocator.h index 3a879e0..3cb9c4e 100644 --- a/include/blt/std/allocator.h +++ b/include/blt/std/allocator.h @@ -539,7 +539,7 @@ namespace blt } }; - template<blt::size_t BLOCK_SIZE = 4096 * 512, bool USE_HUGE = true, blt::size_t HUGE_PAGE_SIZE = 4096 * 512> + template<blt::size_t BLOCK_SIZE = 4096 * 512, bool USE_HUGE = true, blt::size_t HUGE_PAGE_SIZE = 4096 * 512, bool WARN_ON_FAIL = false> class bump_allocator2 { // power of two @@ -634,10 +634,13 @@ namespace blt // if we fail to allocate a huge page we can try to allocate normally if (buffer == MAP_FAILED) { - BLT_WARN_STREAM << "We failed to allocate huge pages\n"; - handle_mmap_error(BLT_WARN_STREAM); - BLT_WARN_STREAM << "\033[1;31mYou should attempt to enable " - "huge pages as this will allocate normal pages and double the memory usage!\033[22m\n"; + if constexpr (WARN_ON_FAIL) + { + BLT_WARN_STREAM << "We failed to allocate huge pages\n"; + handle_mmap_error(BLT_WARN_STREAM); + BLT_WARN_STREAM << "\033[1;31mYou should attempt to enable " + "huge pages as this will allocate normal pages and double the memory usage!\033[22m\n"; + } blt::size_t bytes = BLOCK_SIZE * 2; buffer = static_cast<block*>(mmap(nullptr, bytes, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0)); if (buffer == MAP_FAILED) @@ -646,12 +649,16 @@ namespace blt handle_mmap_error(BLT_ERROR_STREAM); throw std::bad_alloc(); } - if (((size_t)buffer & (HUGE_PAGE_SIZE - 1)) != 0) - BLT_ERROR("Pointer is not aligned! %p", buffer); + if constexpr (WARN_ON_FAIL) + { + if (((size_t) buffer & (HUGE_PAGE_SIZE - 1)) != 0) + BLT_ERROR("Pointer is not aligned! %p", buffer); + } auto* ptr = static_cast<void*>(buffer); auto ptr_size = reinterpret_cast<blt::size_t>(ptr); buffer = static_cast<block*>(std::align(BLOCK_SIZE, BLOCK_SIZE, ptr, bytes)); - BLT_ERROR("Offset by %ld pages, resulting: %p", (reinterpret_cast<blt::size_t>(buffer) - ptr_size) / 4096, buffer); + if constexpr (WARN_ON_FAIL) + BLT_ERROR("Offset by %ld pages, resulting: %p", (reinterpret_cast<blt::size_t>(buffer) - ptr_size) / 4096, buffer); } } else buffer = reinterpret_cast<block*>(std::aligned_alloc(BLOCK_SIZE, BLOCK_SIZE)); @@ -693,7 +700,7 @@ namespace blt BLT_ERROR_STREAM << "FAILED TO DEALLOCATE BLOCK\n"; handle_mmap_error(BLT_ERROR_STREAM); } - }else + } else free(p); } From 0b1e566217f969b4badf5a65646893bfac90fdcc Mon Sep 17 00:00:00 2001 From: Brett <brettmaster1@gmail.com> Date: Wed, 6 Mar 2024 23:53:03 -0500 Subject: [PATCH 4/4] no default huge --- include/blt/std/allocator.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/blt/std/allocator.h b/include/blt/std/allocator.h index 3cb9c4e..6b97757 100644 --- a/include/blt/std/allocator.h +++ b/include/blt/std/allocator.h @@ -539,7 +539,7 @@ namespace blt } }; - template<blt::size_t BLOCK_SIZE = 4096 * 512, bool USE_HUGE = true, blt::size_t HUGE_PAGE_SIZE = 4096 * 512, bool WARN_ON_FAIL = false> + template<blt::size_t BLOCK_SIZE = 4096 * 512, bool USE_HUGE = false, blt::size_t HUGE_PAGE_SIZE = 4096 * 512, bool WARN_ON_FAIL = false> class bump_allocator2 { // power of two