From 631c67dc18886c4ffa8aaec85fe8cf01cf9e453b Mon Sep 17 00:00:00 2001 From: Brett Date: Tue, 6 Aug 2024 02:46:01 -0400 Subject: [PATCH] very nice solution. no longer do we need to copy via stacks. using caching pointers now --- CMakeLists.txt | 2 +- include/blt/gp/stack.h | 221 ++++++++++++++++++++++++----------------- src/transformers.cpp | 99 ++++++++++-------- 3 files changed, 188 insertions(+), 134 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 908a350..9dd4d02 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1,5 +1,5 @@ cmake_minimum_required(VERSION 3.25) -project(blt-gp VERSION 0.0.116) +project(blt-gp VERSION 0.0.117) include(CTest) diff --git a/include/blt/gp/stack.h b/include/blt/gp/stack.h index ce92a36..cb3bcf2 100644 --- a/include/blt/gp/stack.h +++ b/include/blt/gp/stack.h @@ -143,51 +143,11 @@ namespace blt::gp BLT_WARN("This stack is empty, we will copy no bytes from it!"); return; } - auto start_block = stack.head; - auto bytes_left = static_cast(bytes); - blt::u8* start_point = nullptr; - while (bytes_left > 0) - { - if (start_block == nullptr) - { - BLT_WARN("This stack doesn't contain enough space to copy %ld bytes!", bytes); - BLT_WARN_STREAM << "State: " << size() << "\n"; - BLT_ABORT("Stack doesn't contain enough data for this copy operation!"); - } - if (start_block->used_bytes_in_block() < bytes_left) - { - bytes_left -= start_block->used_bytes_in_block(); - start_block = start_block->metadata.prev; - } else if (start_block->used_bytes_in_block() == bytes_left) - { - start_point = start_block->buffer; - break; - } else - { - start_point = start_block->metadata.offset - bytes_left; - break; - } - } + auto [start_block, bytes_left, start_point] = get_start_from_bytes(stack, bytes); + if (bytes_left > 0) { - auto insert = head; - while (insert != nullptr) - { - if (insert->remaining_bytes_in_block() >= bytes_left) - break; - insert = insert->metadata.next; - } - // can directly copy into a block. this stack's head is now the insert point - if (insert != nullptr && insert->remaining_bytes_in_block() >= bytes_left) - head = insert; - else - { - // need to push a block to the end. - // make sure head is at the end. - while (head != nullptr && head->metadata.next != nullptr) - head = head->metadata.next; - push_block(bytes_left); - } + allocate_block_to_head_for_size(bytes_left); std::memcpy(head->metadata.offset, start_point, bytes_left); head->metadata.offset += bytes_left; start_block = start_block->metadata.next; @@ -195,27 +155,44 @@ namespace blt::gp // we now copy whole blocks at a time. while (start_block != nullptr) { - auto prev = head; - auto insert = head; - while (insert != nullptr) - { - if (insert->remaining_bytes_in_block() >= start_block->used_bytes_in_block()) - break; - prev = insert; - insert = insert->metadata.next; - } - if (insert == nullptr) - { - head = prev; - push_block(start_block->used_bytes_in_block()); - } else - head = insert; + allocate_block_to_head_for_size(start_block->used_bytes_in_block()); std::memcpy(head->metadata.offset, start_block->buffer, start_block->used_bytes_in_block()); head->metadata.offset += start_block->used_bytes_in_block(); start_block = start_block->metadata.next; } } + void copy_from(blt::u8* data, blt::size_t bytes) + { + if (bytes == 0 || data == nullptr) + return; + allocate_block_to_head_for_size(bytes); + std::memcpy(head->metadata.offset, data, bytes); + head->metadata.offset += bytes; + } + + void copy_to(blt::u8* data, blt::size_t bytes) const + { + if (bytes == 0 || data == nullptr) + return; + auto [start_block, bytes_left, start_point] = get_start_from_bytes(*this, bytes); + + blt::size_t write_point = 0; + if (bytes_left > 0) + { + std::memcpy(data + write_point, start_point, bytes_left); + write_point += bytes_left; + start_block = start_block->metadata.next; + } + // we now copy whole blocks at a time. + while (start_block != nullptr) + { + std::memcpy(data + write_point, start_block->buffer, start_block->used_bytes_in_block()); + write_point += start_block->used_bytes_in_block(); + start_block = start_block->metadata.next; + } + } + /** * Pushes an instance of an object on to the stack * @tparam T type to push @@ -294,8 +271,21 @@ namespace blt::gp void pop_bytes(blt::ptrdiff_t bytes) { + if (bytes == 0) + return; + if (empty()) + { + BLT_WARN("Cannot pop %ld bytes", bytes); + BLT_ABORT("Stack is empty, we cannot pop!"); + } while (bytes > 0) { + if (head == nullptr) + { + BLT_WARN("The head is null, this stack doesn't contain enough data inside to pop %ld bytes!", bytes); + BLT_WARN_STREAM << "Stack State: " << size() << "\n"; + BLT_ABORT("Stack doesn't contain enough data to preform a pop!"); + } auto diff = head->used_bytes_in_block() - bytes; // if there is not enough room left to pop completely off the block, then move to the next previous block // and pop from it, update the amount of bytes to reflect the amount removed from the current block @@ -351,7 +341,7 @@ namespace blt::gp ()), ...); } - [[nodiscard]] bool empty() const + [[nodiscard]] bool empty() const noexcept { if (head == nullptr) return true; @@ -360,7 +350,7 @@ namespace blt::gp return head->used_bytes_in_block() == 0; } - [[nodiscard]] blt::ptrdiff_t bytes_in_head() const + [[nodiscard]] blt::ptrdiff_t bytes_in_head() const noexcept { if (head == nullptr) return 0; @@ -371,7 +361,7 @@ namespace blt::gp * Warning this function is slow! * @return the size of the stack allocator in bytes */ - [[nodiscard]] size_data_t size() const + [[nodiscard]] size_data_t size() const noexcept { size_data_t size_data; auto* prev = head; @@ -404,7 +394,7 @@ namespace blt::gp // TODO: cleanup this allocator! // if you keep track of type size information you can memcpy between stack allocators as you already only allow trivially copyable types - stack_allocator(const stack_allocator& copy) + stack_allocator(const stack_allocator& copy) noexcept { if (copy.empty()) return; @@ -444,7 +434,7 @@ namespace blt::gp return *this; } - ~stack_allocator() + ~stack_allocator() noexcept { if (head != nullptr) { @@ -470,27 +460,27 @@ namespace blt::gp return (size + (MAX_ALIGNMENT - 1)) & ~(MAX_ALIGNMENT - 1); } - inline static constexpr auto metadata_size() + inline static constexpr auto metadata_size() noexcept { return sizeof(typename block::block_metadata_t); } - inline static constexpr auto block_size() + inline static constexpr auto block_size() noexcept { return sizeof(block); } - inline static constexpr auto page_size() + inline static constexpr auto page_size() noexcept { return PAGE_SIZE; } - inline static constexpr auto page_size_no_meta() + inline static constexpr auto page_size_no_meta() noexcept { return page_size() - metadata_size(); } - inline static constexpr auto page_size_no_block() + inline static constexpr auto page_size_no_block() noexcept { return page_size() - block_size(); } @@ -507,7 +497,7 @@ namespace blt::gp } metadata; blt::u8 buffer[8]{}; - explicit block(blt::size_t size) + explicit block(blt::size_t size) noexcept { #if BLT_DEBUG_LEVEL > 0 if (size < PAGE_SIZE) @@ -520,27 +510,34 @@ namespace blt::gp metadata.offset = buffer; } - void reset() + void reset() noexcept { metadata.offset = buffer; } - [[nodiscard]] blt::ptrdiff_t storage_size() const + [[nodiscard]] blt::ptrdiff_t storage_size() const noexcept { return static_cast(metadata.size - sizeof(typename block::block_metadata_t)); } - [[nodiscard]] blt::ptrdiff_t used_bytes_in_block() const + [[nodiscard]] blt::ptrdiff_t used_bytes_in_block() const noexcept { return static_cast(metadata.offset - buffer); } - [[nodiscard]] blt::ptrdiff_t remaining_bytes_in_block() const + [[nodiscard]] blt::ptrdiff_t remaining_bytes_in_block() const noexcept { return storage_size() - used_bytes_in_block(); } }; + struct copy_start_point + { + block* start_block; + blt::ptrdiff_t bytes_left; + blt::u8* start_point; + }; + template void* allocate_bytes() { @@ -551,25 +548,35 @@ namespace blt::gp { auto ptr = get_aligned_pointer(size); if (ptr == nullptr) - { - while (head != nullptr && head->metadata.next != nullptr) - { - head = head->metadata.next; - if (head != nullptr) - head->reset(); - if (head->remaining_bytes_in_block() >= static_cast(size)) - break; - } - if (head == nullptr || head->remaining_bytes_in_block() < static_cast(size)) - push_block(aligned_size(size) + sizeof(typename block::block_metadata_t)); - } + allocate_block_to_head_for_size(aligned_size(size)); ptr = get_aligned_pointer(size); if (ptr == nullptr) throw std::bad_alloc(); return ptr; } - void* get_aligned_pointer(blt::size_t bytes) + /** + * Moves forward through the list of "deallocated" blocks, if none meet size requirements it'll allocate a new block. + * This function will take into account the size of the block metadata, but requires the size input to be aligned. + * It will perform no modification to the size value. + * + * The block which allows for size is now at head. + */ + void allocate_block_to_head_for_size(const blt::size_t size) noexcept + { + while (head != nullptr && head->metadata.next != nullptr) + { + head = head->metadata.next; + if (head != nullptr) + head->reset(); + if (head->remaining_bytes_in_block() >= static_cast(size)) + break; + } + if (head == nullptr || head->remaining_bytes_in_block() < static_cast(size)) + push_block(size + sizeof(typename block::block_metadata_t)); + } + + void* get_aligned_pointer(blt::size_t bytes) noexcept { if (head == nullptr) return nullptr; @@ -578,7 +585,7 @@ namespace blt::gp return std::align(MAX_ALIGNMENT, bytes, pointer, remaining_bytes); } - void push_block(blt::size_t size) + void push_block(blt::size_t size) noexcept { auto blk = allocate_block(size); if (head == nullptr) @@ -591,13 +598,13 @@ namespace blt::gp head = blk; } - static size_t to_nearest_page_size(blt::size_t bytes) + static size_t to_nearest_page_size(blt::size_t bytes) noexcept { constexpr static blt::size_t MASK = ~(PAGE_SIZE - 1); return (bytes & MASK) + PAGE_SIZE; } - static block* allocate_block(blt::size_t bytes) + static block* allocate_block(blt::size_t bytes) noexcept { auto size = to_nearest_page_size(bytes); auto* data = std::aligned_alloc(PAGE_SIZE, size); @@ -606,7 +613,7 @@ namespace blt::gp return reinterpret_cast(data); } - static void free_chain(block* current) + static void free_chain(block* current) noexcept { while (current != nullptr) { @@ -617,12 +624,12 @@ namespace blt::gp } } - static void free_block(block* ptr) + static void free_block(block* ptr) noexcept { std::free(ptr); } - inline bool move_back() + inline bool move_back() noexcept { auto old = head; head = head->metadata.prev; @@ -633,6 +640,36 @@ namespace blt::gp } return true; } + + [[nodiscard]] inline static copy_start_point get_start_from_bytes(const stack_allocator& stack, blt::size_t bytes) + { + auto start_block = stack.head; + auto bytes_left = static_cast(bytes); + blt::u8* start_point = nullptr; + while (bytes_left > 0) + { + if (start_block == nullptr) + { + BLT_WARN("This stack doesn't contain enough space to copy %ld bytes!", bytes); + BLT_WARN_STREAM << "State: " << stack.size() << "\n"; + BLT_ABORT("Stack doesn't contain enough data for this copy operation!"); + } + if (start_block->used_bytes_in_block() < bytes_left) + { + bytes_left -= start_block->used_bytes_in_block(); + start_block = start_block->metadata.prev; + } else if (start_block->used_bytes_in_block() == bytes_left) + { + start_point = start_block->buffer; + break; + } else + { + start_point = start_block->metadata.offset - bytes_left; + break; + } + } + return copy_start_point{start_block, bytes_left, start_point}; + } private: block* head = nullptr; diff --git a/src/transformers.cpp b/src/transformers.cpp index f3aaddf..3b8a96b 100644 --- a/src/transformers.cpp +++ b/src/transformers.cpp @@ -19,13 +19,39 @@ #include #include #include +#include +#include #include #include namespace blt::gp { + + inline blt::size_t accumulate_type_sizes(detail::op_iter begin, detail::op_iter end) + { + blt::size_t total = 0; + for (auto it = begin; it != end; ++it) + { + if (it->is_value) + total += stack_allocator::aligned_size(it->type_size); + } + return total; + } + + template + blt::u8* get_thread_pointer_for_size(blt::size_t bytes) + { + static thread_local blt::expanding_buffer buffer; + if (bytes > buffer.size()) + buffer.resize(bytes); + return buffer.data(); + } + grow_generator_t grow_generator; + mutation_t::config_t::config_t(): generator(grow_generator) + {} + blt::expected crossover_t::apply(gp_program& program, const tree_t& p1, const tree_t& p2) // NOLINT { result_t result{p1, p2}; @@ -50,8 +76,8 @@ namespace blt::gp auto found_point_begin_itr = c2_ops.begin() + point->p2_crossover_point; auto found_point_end_itr = c2_ops.begin() + find_endpoint(program, c2_ops, point->p2_crossover_point); - stack_allocator& c1_stack_init = c1.get_values(); - stack_allocator& c2_stack_init = c2.get_values(); + stack_allocator& c1_stack = c1.get_values(); + stack_allocator& c2_stack = c2.get_values(); // we have to make a copy because we will modify the underlying storage. std::vector c1_operators; @@ -62,25 +88,27 @@ namespace blt::gp for (const auto& op : blt::iterate(found_point_begin_itr, found_point_end_itr)) c2_operators.push_back(op); - stack_allocator c1_stack_after_copy; - stack_allocator c1_stack_for_copy; - stack_allocator c2_stack_after_copy; - stack_allocator c2_stack_for_copy; + blt::size_t c1_stack_after_bytes = accumulate_type_sizes(crossover_point_end_itr, c1_ops.end()); + blt::size_t c1_stack_for_bytes = accumulate_type_sizes(crossover_point_begin_itr, crossover_point_end_itr); + blt::size_t c2_stack_after_bytes = accumulate_type_sizes(found_point_end_itr, c2_ops.end()); + blt::size_t c2_stack_for_bytes = accumulate_type_sizes(found_point_begin_itr, found_point_end_itr); + auto c1_total = static_cast(c1_stack_after_bytes + c1_stack_for_bytes); + auto c2_total = static_cast(c2_stack_after_bytes + c2_stack_for_bytes); + auto copy_ptr_c1 = get_thread_pointer_for_size(c1_total); + auto copy_ptr_c2 = get_thread_pointer_for_size(c2_total); - // transfer all values after the crossover point. these will need to be transferred back to child2 - transfer_backward(c1_stack_init, c1_stack_after_copy, c1_ops.end() - 1, crossover_point_end_itr - 1); - // transfer all values for the crossover point. - transfer_backward(c1_stack_init, c1_stack_for_copy, crossover_point_end_itr - 1, crossover_point_begin_itr - 1); - // transfer child2 values for copying back into c1 - transfer_backward(c2_stack_init, c2_stack_after_copy, c2_ops.end() - 1, found_point_end_itr - 1); - transfer_backward(c2_stack_init, c2_stack_for_copy, found_point_end_itr - 1, found_point_begin_itr - 1); - // now copy back into the respective children - transfer_forward(c2_stack_for_copy, c1.get_values(), found_point_begin_itr, found_point_end_itr); - transfer_forward(c1_stack_for_copy, c2.get_values(), crossover_point_begin_itr, crossover_point_end_itr); - // now copy after the crossover point back to the correct children - transfer_forward(c1_stack_after_copy, c1.get_values(), crossover_point_end_itr, c1_ops.end()); - transfer_forward(c2_stack_after_copy, c2.get_values(), found_point_end_itr, c2_ops.end()); + c1_stack.copy_to(copy_ptr_c1, c1_total); + c1_stack.pop_bytes(c1_total); + c2_stack.copy_to(copy_ptr_c2, c2_total); + c2_stack.pop_bytes(c2_total); + + c2_stack.copy_from(copy_ptr_c1, c1_stack_for_bytes); + c2_stack.copy_from(copy_ptr_c2 + c2_stack_for_bytes, c2_stack_after_bytes); + + c1_stack.copy_from(copy_ptr_c2, c2_stack_for_bytes); + c1_stack.copy_from(copy_ptr_c1 + c1_stack_for_bytes, c1_stack_after_bytes); + // now swap the operators auto insert_point_c1 = crossover_point_begin_itr - 1; auto insert_point_c2 = found_point_begin_itr - 1; @@ -195,26 +223,18 @@ namespace blt::gp auto& new_ops_r = new_tree.get_operations(); auto& new_vals_r = new_tree.get_values(); - stack_allocator stack_after; - blt::size_t total_bytes_after = 0; - for (auto it = end_itr; it != ops_r.end(); it++) - { - if (it->is_value) - total_bytes_after += stack_allocator::aligned_size(it->type_size); - } -// transfer_backward(vals_r, stack_after, ops_r.end() - 1, end_itr - 1); - stack_after.copy_from(vals_r, total_bytes_after); - vals_r.pop_bytes(static_cast(total_bytes_after)); - for (auto it = end_itr - 1; it != begin_itr - 1; it--) - { - if (it->is_value) - vals_r.pop_bytes(static_cast(stack_allocator::aligned_size(it->type_size))); - } + blt::size_t total_bytes_after = accumulate_type_sizes(end_itr, ops_r.end()); + auto* stack_after_data = get_thread_pointer_for_size(total_bytes_after); + // make a copy of any stack data after the mutation point / children. + vals_r.copy_to(stack_after_data, total_bytes_after); + + // remove the bytes of the data after the mutation point and the data for the children of the mutation node. + vals_r.pop_bytes(static_cast(total_bytes_after + accumulate_type_sizes(begin_itr, end_itr))); + + // insert the new tree then move back the data from after the original mutation point. vals_r.insert(std::move(new_vals_r)); - //transfer_forward(stack_after, vals_r, end_itr, ops_r.end()); - vals_r.copy_from(stack_after, total_bytes_after); - stack_after = {}; + vals_r.copy_from(stack_after_data, total_bytes_after); auto before = begin_itr - 1; ops_r.erase(begin_itr, end_itr); @@ -223,7 +243,7 @@ namespace blt::gp // this will check to make sure that the tree is in a correct and executable state. it requires that the evaluation is context free! #if BLT_DEBUG_LEVEL >= 2 BLT_ASSERT(new_vals_r.empty()); - BLT_ASSERT(stack_after.empty()); + //BLT_ASSERT(stack_after.empty()); blt::size_t bytes_expected = 0; auto bytes_size = vals_r.size().total_used_bytes; @@ -267,9 +287,6 @@ namespace blt::gp return c; } - mutation_t::config_t::config_t(): generator(grow_generator) - {} - blt::ptrdiff_t find_endpoint(blt::gp::gp_program& program, const std::vector& container, blt::ptrdiff_t index) { blt::i64 children_left = 0;