/* * * Copyright (C) 2023 Brett Terpstra * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program. If not, see . */ #ifndef BLT_ALLOCATOR_H #include #include #include #include #include #include #include #include #include "logging.h" #include #ifdef __unix__ #include #endif namespace blt { template class allocator_base { public: template inline void construct(U* p, Args&& ... args) { ::new((void*) p) U(std::forward(args)...); } template inline void destroy(U* p) { if (p != nullptr) p->~U(); } [[nodiscard]] inline size_t max_size() const { return std::numeric_limits::max(); } inline const_pointer address(const value_type& val) { return std::addressof(val); } inline pointer address(value_type& val) { return std::addressof(val); } }; template class area_allocator : public allocator_base { public: using value = T; using type = T; using value_type = type; using pointer = type*; using const_pointer = const type*; using void_pointer = void*; using const_void_pointer = const void*; using reference = value_type&; using const_reference = const value_type&; using size_type = size_t; using difference_type = size_t; using propagate_on_container_move_assignment = std::false_type; template struct rebind { typedef blt::area_allocator other; }; using allocator_base::allocator_base; private: /** * Stores a view to a region of memory that has been deallocated * This is a non-owning reference to the memory block * * pointer p is the pointer to the beginning of the block of memory * size_t n is the number of elements that this block can hold */ struct pointer_view { pointer p; size_t n; }; /** * Stores the actual data for allocated blocks. Since we would like to be able to allocate an arbitrary number of items * we need a way of storing that data. The block storage holds an owning pointer to a region of memory with used elements * Only up to used has to have their destructors called, which should be handled by the deallocate function * it is UB to not deallocate memory allocated by this allocator * * an internal vector is used to store the regions of memory which have been deallocated. the allocate function will search for * free blocks with sufficient size in order to maximize memory usage. In the future more advanced methods should be used * for both faster access to deallocated blocks of sufficient size and to ensure coherent memory. */ struct block_storage { pointer data; size_t used = 0; // TODO: b-tree? std::vector unallocated_blocks; }; /** * Stores an index to a pointer_view along with the amount of memory leftover after the allocation * it also stores the block being allocated to in question. The new inserted leftover should start at old_ptr + size */ struct block_view { block_storage* blk; size_t index; size_t leftover; block_view(block_storage* blk, size_t index, size_t leftover): blk(blk), index(index), leftover(leftover) {} }; /** * Allocate a new block of memory and push it to the back of blocks. */ inline void allocate_block() { //BLT_INFO("Allocating a new block of size %d", BLOCK_SIZE); auto* blk = new block_storage(); blk->data = static_cast(malloc(sizeof(T) * BLOCK_SIZE)); blocks.push_back(blk); } /** * Searches for a free block inside the block storage with sufficient space and returns an optional view to it * The optional will be empty if no open block can be found. */ inline std::optional search_for_block(block_storage* blk, size_t n) { for (auto kv : blt::enumerate(blk->unallocated_blocks)) { if (kv.second.n >= n) return block_view{blk, kv.first, kv.second.n - n}; } return {}; } /** * removes the block of memory from the unallocated_blocks storage in the underlying block, inserting a new unallocated block if * there was any leftover. Returns a pointer to the beginning of the new block. */ inline pointer swap_pop_resize_if(const block_view& view, size_t n) { pointer_view ptr = view.blk->unallocated_blocks[view.index]; std::iter_swap(view.blk->unallocated_blocks.begin() + view.index, view.blk->unallocated_blocks.end() - 1); view.blk->unallocated_blocks.pop_back(); if (view.leftover > 0) view.blk->unallocated_blocks.push_back({ptr.p + n, view.leftover}); return ptr.p; } /** * Finds the next available unallocated block of memory, or empty if there is none which meet size requirements */ inline std::optional find_available_block(size_t n) { for (auto* blk : blocks) { if (auto view = search_for_block(blk, n)) return swap_pop_resize_if(view.value(), n); } return {}; } /** * returns a pointer to a block of memory along with an offset into that pointer that the requested block can be found at */ inline std::pair getBlock(size_t n) { if (auto blk = find_available_block(n)) return {blk.value(), 0}; if (blocks.back()->used + n > BLOCK_SIZE) allocate_block(); auto ptr = std::pair{blocks.back()->data, blocks.back()->used}; blocks.back()->used += n; return ptr; } /** * Calls the constructor on elements if they require construction, otherwise constructor will not be called and this function is useless * * ALLOCATORS RETURN UNINIT STORAGE!! THIS HAS BEEN DISABLED. */ inline void allocate_in_block(pointer, size_t) { // if constexpr (std::is_default_constructible_v && !std::is_trivially_default_constructible_v) // { // for (size_t i = 0; i < n; i++) // new(&begin[i]) T(); // } } public: area_allocator() { allocate_block(); } area_allocator(const area_allocator& copy) = delete; area_allocator(area_allocator&& move) noexcept { blocks = move.blocks; } area_allocator& operator=(const area_allocator& copy) = delete; area_allocator& operator=(area_allocator&& move) noexcept { std::swap(move.blocks, blocks); } [[nodiscard]] pointer allocate(size_t n) { if (n > BLOCK_SIZE) throw std::runtime_error("Requested allocation is too large!"); auto block_info = getBlock(n); auto* ptr = &block_info.first[block_info.second]; // call constructors on the objects if they require it allocate_in_block(ptr, n); return ptr; } void deallocate(pointer p, size_t n) noexcept { if (p == nullptr) return; // for (size_t i = 0; i < n; i++) // p[i].~T(); for (auto*& blk : blocks) { if (p >= blk->data && p <= (blk->data + BLOCK_SIZE)) { blk->unallocated_blocks.push_back(pointer_view{p, n}); break; } } } ~area_allocator() { for (auto*& blk : blocks) { free(blk->data); delete blk; } } private: std::vector blocks; }; // template // class bump_allocator : public allocator_base // { // public: // using value = T; // using type = T; // using value_type = type; // using pointer = type*; // using const_pointer = const type*; // using void_pointer = void*; // using const_void_pointer = const void*; // using reference = value_type&; // using const_reference = const value_type&; // using size_type = size_t; // using difference_type = size_t; // using propagate_on_container_move_assignment = std::false_type; // template // struct rebind // { // typedef blt::bump_allocator other; // }; // using allocator_base::allocator_base; // private: // pointer buffer_; // blt::size_t offset_; // blt::size_t size_; // public: // explicit bump_allocator(blt::size_t size): buffer_(static_cast(malloc(size * sizeof(T)))), offset_(0), size_(size) // {} // // template // explicit bump_allocator(blt::size_t size, Args&& ... defaults): // buffer_(static_cast(malloc(size * sizeof(type)))), offset_(0), size_(size) // { // for (blt::size_t i = 0; i < size_; i++) // ::new(&buffer_[i]) T(std::forward(defaults)...); // } // // bump_allocator(pointer buffer, blt::size_t size): buffer_(buffer), offset_(0), size_(size) // {} // // bump_allocator(const bump_allocator& copy) = delete; // // bump_allocator(bump_allocator&& move) noexcept // { // buffer_ = move.buffer_; // size_ = move.size_; // offset_ = move.offset_; // } // // bump_allocator& operator=(const bump_allocator& copy) = delete; // // bump_allocator& operator=(bump_allocator&& move) noexcept // { // std::swap(move.buffer_, buffer_); // std::swap(move.size_, size_); // std::swap(move.offset_, offset_); // } // // pointer allocate(blt::size_t n) // { // auto nv = offset_ + n; // if (nv > size_) // throw std::bad_alloc(); // pointer b = &buffer_[offset_]; // offset_ = nv; // return b; // } // // void deallocate(pointer, blt::size_t) // {} // // ~bump_allocator() // { // free(buffer_); // } // }; /** * The bump allocator is meant to be a faster area allocator which will only allocate forward through either a supplied buffer or size * or will create a linked list type data structure of buffered blocks. * @tparam ALLOC allocator to use for any allocations. In the case of the non-linked variant, this will be used if a size is supplied. The supplied buffer must be allocated with this allocator! * @tparam linked use a linked list to allocate with the allocator or just use the supplied buffer and throw an exception of we cannot allocate */ template typename ALLOC = std::allocator> class bump_allocator_old; template typename ALLOC> class bump_allocator_old { private: ALLOC allocator; blt::u8* buffer_; blt::u8* offset_; blt::size_t size_; public: explicit bump_allocator_old(blt::size_t size): buffer_(static_cast(allocator.allocate(size))), offset_(buffer_), size_(size) {} explicit bump_allocator_old(blt::u8* buffer, blt::size_t size): buffer_(buffer), offset_(buffer), size_(size) {} template [[nodiscard]] T* allocate() { size_t remaining_num_bytes = size_ - static_cast(buffer_ - offset_); auto pointer = static_cast(offset_); const auto aligned_address = std::align(alignof(T), sizeof(T), pointer, remaining_num_bytes); if (aligned_address == nullptr) throw std::bad_alloc{}; offset_ = static_cast(aligned_address) + sizeof(T); return static_cast(aligned_address); } template [[nodiscard]] T* emplace(Args&& ... args) { const auto allocated_memory = allocate(); return new(allocated_memory) T{std::forward(args)...}; } template inline void construct(U* p, Args&& ... args) { ::new((void*) p) U(std::forward(args)...); } template inline void destroy(U* p) { if (p != nullptr) p->~U(); } ~bump_allocator_old() { allocator.deallocate(buffer_, size_); } }; template typename ALLOC> class bump_allocator_old { private: struct block { blt::size_t allocated_objects = 0; blt::u8* buffer = nullptr; blt::u8* offset = nullptr; explicit block(blt::u8* buffer): buffer(buffer), offset(buffer) {} }; ALLOC allocator; std::vector> blocks; blt::size_t size_; blt::size_t allocations = 0; blt::size_t deallocations = 0; void expand() { auto ptr = static_cast(allocator.allocate(size_)); blocks.push_back(block{ptr}); allocations++; } template T* allocate_back() { auto& back = blocks.back(); size_t remaining_bytes = size_ - static_cast(back.offset - back.buffer); auto pointer = static_cast(back.offset); const auto aligned_address = std::align(alignof(T), sizeof(T), pointer, remaining_bytes); if (aligned_address != nullptr) { back.offset = static_cast(aligned_address) + sizeof(T); back.allocated_objects++; } return static_cast(aligned_address); } public: /** * @param size of the list blocks */ explicit bump_allocator_old(blt::size_t size): size_(size) { expand(); } template [[nodiscard]] T* allocate() { if (auto ptr = allocate_back(); ptr == nullptr) expand(); else return ptr; if (auto ptr = allocate_back(); ptr == nullptr) throw std::bad_alloc(); else return ptr; } template void deallocate(T* p) { auto* ptr = reinterpret_cast(p); for (auto e : blt::enumerate(blocks)) { auto& block = e.second; if (ptr >= block.buffer && ptr <= block.offset) { block.allocated_objects--; if (block.allocated_objects == 0) { std::iter_swap(blocks.begin() + e.first, blocks.end() - 1); allocator.deallocate(blocks.back().buffer, size_); blocks.pop_back(); deallocations++; } return; } } } template [[nodiscard]] T* emplace(Args&& ... args) { const auto allocated_memory = allocate(); return new(allocated_memory) T{std::forward(args)...}; } template inline void construct(U* p, Args&& ... args) { ::new((void*) p) U(std::forward(args)...); } template inline void destroy(U* p) { if (p != nullptr) p->~U(); } ~bump_allocator_old() { if (allocations != deallocations) BLT_WARN("Allocator has blocks which have not been deallocated! Destructors might not have been called!"); for (auto& v : blocks) allocator.deallocate(v.buffer, size_); } }; // size of 2mb in bytes inline constexpr blt::size_t BLT_2MB_SIZE = 4096 * 512; /** * blt::bump_allocator. Allocates blocks of BLOCK_SIZE with zero reuse. When all objects from a block are fully deallocated the block will be freed * @tparam BLOCK_SIZE size of block to use. recommended to be multiple of page size or huge page size. * @tparam USE_HUGE allocate using mmap and huge pages. If this fails it will use mmap to allocate normally. defaults to off because linux has parent huge pages. * @tparam HUGE_PAGE_SIZE size the system allows huge pages to be. defaults to 2mb * @tparam WARN_ON_FAIL print warning messages if allocating huge pages fail */ template class bump_allocator { // ensure power of two static_assert(((BLOCK_SIZE & (BLOCK_SIZE - 1)) == 0) && "Must be a power of two!"); public: /** * convert any pointer back into a pointer its block */ template static inline auto to_block(T* p) { return reinterpret_cast(reinterpret_cast(p) & static_cast(~(BLOCK_SIZE - 1))); } class stats_t { friend bump_allocator; private: blt::size_t allocated_blocks = 0; blt::size_t allocated_bytes = 0; blt::size_t peak_blocks = 0; blt::size_t peak_bytes = 0; protected: inline void incrementBlocks() { allocated_blocks++; if (allocated_blocks > peak_blocks) peak_blocks = allocated_blocks; } inline void decrementBlocks() { allocated_blocks--; } inline void incrementBytes(blt::size_t bytes) { allocated_bytes += bytes; if (allocated_bytes > peak_bytes) peak_bytes = allocated_bytes; } inline void decrementBytes(blt::size_t bytes) { allocated_bytes -= bytes; } public: inline auto getAllocatedBlocks() const { return allocated_blocks; } inline auto getAllocatedBytes() const { return allocated_bytes; } inline auto getPeakBlocks() const { return peak_blocks; } inline auto getPeakBytes() const { return peak_bytes; } }; private: stats_t stats; /** * Logging function used for handling mmap errors. call after a failed mmap call. * @param LOG_FUNC function to log with, must be a BLT_*_STREAM */ template static void handle_mmap_error(LOG_FUNC func = BLT_ERROR_STREAM) { #define BLT_WRITE(arg) func << arg << '\n'; switch (errno) { case EACCES: BLT_WRITE("fd not set to open!"); break; case EAGAIN: BLT_WRITE("The file has been locked, or too much memory has been locked"); break; case EBADF: BLT_WRITE("fd is not a valid file descriptor"); break; case EEXIST: BLT_WRITE("MAP_FIXED_NOREPLACE was specified in flags, and the range covered " "by addr and length clashes with an existing mapping."); break; case EINVAL: BLT_WRITE("We don't like addr, length, or offset (e.g., they are too large, or not aligned on a page boundary)."); BLT_WRITE("Or length was 0"); BLT_WRITE("Or flags contained none of MAP_PRIVATE, MAP_SHARED, or MAP_SHARED_VALIDATE."); break; case ENFILE: BLT_WRITE("The system-wide limit on the total number of open files has been reached."); break; case ENODEV: BLT_WRITE("The underlying filesystem of the specified file does not support memory mapping."); break; case ENOMEM: BLT_WRITE("No memory is available."); BLT_WRITE("Or The process's maximum number of mappings would have been exceeded. " "This error can also occur for munmap(), when unmapping a region in the middle of an existing mapping, " "since this results in two smaller mappings on either side of the region being unmapped."); BLT_WRITE("Or The process's RLIMIT_DATA limit, described in getrlimit(2), would have been exceeded."); BLT_WRITE("Or We don't like addr, because it exceeds the virtual address space of the CPU."); break; case EOVERFLOW: BLT_WRITE("On 32-bit architecture together with the large file extension (i.e., using 64-bit off_t): " "the number of pages used for length plus number of " "pages used for offset would overflow unsigned long (32 bits)."); break; case EPERM: BLT_WRITE("The prot argument asks for PROT_EXEC but the mapped area " "belongs to a file on a filesystem that was mounted no-exec."); BLT_WRITE("Or The operation was prevented by a file seal"); BLT_WRITE("Or The MAP_HUGETLB flag was specified, but the caller " "was not privileged (did not have the CAP_IPC_LOCK capability) " "and is not a member of the sysctl_hugetlb_shm_group group; " "see the description of /proc/sys/vm/sysctl_hugetlb_shm_group"); break; case ETXTBSY: BLT_WRITE("MAP_DENYWRITE was set but the object specified by fd is open for writing."); break; } } struct block { struct block_metadata_t { blt::size_t allocated_objects = 0; block* next = nullptr; block* prev = nullptr; blt::u8* offset = nullptr; } metadata; blt::u8 buffer[BLOCK_SIZE - sizeof(metadata)]{}; block() { metadata.offset = buffer; } }; // remaining space inside the block after accounting for the metadata static constexpr blt::size_t BLOCK_REMAINDER = BLOCK_SIZE - sizeof(typename block::block_metadata_t); block* base = nullptr; block* head = nullptr; /** * Handles the allocation of the bytes for the block. * This function will either use mmap to allocate huge pages if requested * or use std::align_alloc to create an aligned allocation * @return pointer to a constructed block */ block* allocate_block() { block* buffer; #ifdef __unix__ if constexpr (USE_HUGE) { static_assert((BLOCK_SIZE & (HUGE_PAGE_SIZE - 1)) == 0 && "Must be multiple of the huge page size!"); buffer = static_cast(mmap(nullptr, BLOCK_SIZE, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS | MAP_HUGETLB | MAP_POPULATE, -1, 0)); // if we fail to allocate a huge page we can try to allocate normally if (buffer == MAP_FAILED) { if constexpr (WARN_ON_FAIL) { BLT_WARN_STREAM << "We failed to allocate huge pages\n"; handle_mmap_error(BLT_WARN_STREAM); BLT_WARN_STREAM << "\033[1;31mYou should attempt to enable " "huge pages as this will allocate normal pages and double the memory usage!\033[22m\n"; } blt::size_t bytes = BLOCK_SIZE * 2; buffer = static_cast(mmap(nullptr, bytes, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS | MAP_POPULATE, -1, 0)); if (buffer == MAP_FAILED) { BLT_ERROR_STREAM << "Failed to allocate normal pages\n"; handle_mmap_error(BLT_ERROR_STREAM); throw std::bad_alloc(); } if constexpr (WARN_ON_FAIL) { if (((size_t) buffer & (HUGE_PAGE_SIZE - 1)) != 0) BLT_ERROR("Pointer is not aligned! %p", buffer); } auto* ptr = static_cast(buffer); auto ptr_size = reinterpret_cast(ptr); buffer = static_cast(std::align(BLOCK_SIZE, BLOCK_SIZE, ptr, bytes)); if constexpr (WARN_ON_FAIL) BLT_ERROR("Offset by %ld pages, resulting: %p", (reinterpret_cast(buffer) - ptr_size) / 4096, buffer); } } else buffer = reinterpret_cast(std::aligned_alloc(BLOCK_SIZE, BLOCK_SIZE)); #else buffer = reinterpret_cast(std::aligned_alloc(BLOCK_SIZE, BLOCK_SIZE)); #endif construct(buffer); #ifndef BLT_DISABLE_STATS stats.incrementBlocks(); #endif return buffer; } /** * Allocates a new block and pushes it to the front of the linked listed */ void allocate_forward() { auto* block = allocate_block(); if (head == nullptr) { base = head = block; return; } block->metadata.prev = head; head->metadata.next = block; head = block; } /** * handles the actual allocation and alignment of memory * @param bytes number of bytes to allocate * @param alignment alignment required * @return aligned pointer */ void* allocate_bytes(blt::size_t bytes, blt::size_t alignment) { if (head == nullptr) return nullptr; blt::size_t remaining_bytes = BLOCK_REMAINDER - static_cast(head->metadata.offset - head->buffer); auto pointer = static_cast(head->metadata.offset); return std::align(alignment, bytes, pointer, remaining_bytes); } /** * allocate an object starting from the next available address * @tparam T type to allocate for * @param count number of elements to allocate * @return nullptr if the object could not be allocated, pointer to the object if it could, pointer to the start if count != 1 */ template T* allocate_object(blt::size_t count) { blt::size_t bytes = sizeof(T) * count; const auto aligned_address = allocate_bytes(bytes, alignof(T)); if (aligned_address != nullptr) { head->metadata.allocated_objects++; head->metadata.offset = static_cast(aligned_address) + bytes; } return static_cast(aligned_address); } /** * Frees a block * @param p pointer to the block to free */ inline void delete_block(block* p) { #ifndef BLT_DISABLE_STATS stats.decrementBlocks(); #endif if constexpr (USE_HUGE) { if (munmap(p, BLOCK_SIZE)) { BLT_ERROR_STREAM << "FAILED TO DEALLOCATE BLOCK\n"; handle_mmap_error(BLT_ERROR_STREAM); } } else free(p); } public: bump_allocator() = default; /** * Takes an unused size parameter. Purely used for compatibility with the old bump_allocator */ explicit bump_allocator(blt::size_t) {} /** * Allocate bytes for a type * @tparam T type to allocate * @param count number of elements to allocate for * @throws std::bad_alloc * @return aligned pointer to the beginning of the allocated memory */ template [[nodiscard]] T* allocate(blt::size_t count = 1) { if constexpr (sizeof(T) > BLOCK_REMAINDER) throw std::bad_alloc(); #ifndef BLT_DISABLE_STATS stats.incrementBytes(sizeof(T) * count); #endif T* ptr = allocate_object(count); if (ptr != nullptr) return ptr; allocate_forward(); ptr = allocate_object(count); if (ptr == nullptr) throw std::bad_alloc(); return ptr; } /** * Deallocate a pointer, does not call the destructor * @tparam T type of pointer * @param p pointer to deallocate */ template void deallocate(T* p, blt::size_t count = 1) { if (p == nullptr) return; #ifndef BLT_DISABLE_STATS stats.decrementBytes(sizeof(T) * count); #endif auto blk = to_block(p); if (--blk->metadata.allocated_objects == 0) { if (blk == base) base = head = nullptr; if (blk->metadata.prev != nullptr) blk->metadata.prev->metadata.next = blk->metadata.next; delete_block(blk); } } /** * allocate a type then call its constructor with arguments * @tparam T type to construct * @tparam Args type of args to construct with * @param args args to construct with * @return aligned pointer to the constructed type */ template [[nodiscard]] T* emplace(Args&& ... args) { const auto allocated_memory = allocate(); return new(allocated_memory) T{std::forward(args)...}; } /** * allocate an array of count T with argument(s) args and call T's constructor * @tparam T class to construct * @tparam Args argument types to supply to construction * @param count size of the array to allocate in number of elements. Note calling this with count = 0 is equivalent to calling emplace * @param args the args to supply to construction * @return aligned pointer to the beginning of the array of T */ template [[nodiscard]] T* emplace_many(blt::size_t count, Args&& ... args) { if (count == 0) return nullptr; const auto allocated_memory = allocate(count); for (blt::size_t i = 0; i < count; i++) new(allocated_memory + i) T{std::forward(args)...}; return allocated_memory; } /** * Used to construct a class U with parameters Args * @tparam U class to construct * @tparam Args args to use * @param p pointer to non-constructed memory * @param args list of arguments to build the class with */ template inline void construct(U* p, Args&& ... args) { ::new((void*) p) U(std::forward(args)...); } /** * Call the destructor for class U with pointer p * @tparam U class to call destructor on, this will not do anything if the type is std::trivially_destructible * @param p */ template inline void destroy(U* p) { if constexpr (!std::is_trivially_destructible_v) { if (p != nullptr) p->~U(); } } /** * Calls destroy on pointer p * Then calls deallocate on p * @tparam U class to destroy * @param p pointer to deallocate */ template inline void destruct(U* p) { destroy(p); deallocate(p); } inline const auto& getStats() { return stats; } ~bump_allocator() { block* next = base; while (next != nullptr) { auto* after = next->metadata.next; delete_block(next); next = after; } } }; } #define BLT_ALLOCATOR_H #endif //BLT_ALLOCATOR_H