very nice solution. no longer do we need to copy via stacks. using caching pointers now

thread
Brett 2024-08-06 02:46:01 -04:00
parent 3972a70bc5
commit 631c67dc18
3 changed files with 188 additions and 134 deletions

View File

@ -1,5 +1,5 @@
cmake_minimum_required(VERSION 3.25)
project(blt-gp VERSION 0.0.116)
project(blt-gp VERSION 0.0.117)
include(CTest)

View File

@ -143,51 +143,11 @@ namespace blt::gp
BLT_WARN("This stack is empty, we will copy no bytes from it!");
return;
}
auto start_block = stack.head;
auto bytes_left = static_cast<blt::ptrdiff_t>(bytes);
blt::u8* start_point = nullptr;
while (bytes_left > 0)
{
if (start_block == nullptr)
{
BLT_WARN("This stack doesn't contain enough space to copy %ld bytes!", bytes);
BLT_WARN_STREAM << "State: " << size() << "\n";
BLT_ABORT("Stack doesn't contain enough data for this copy operation!");
}
if (start_block->used_bytes_in_block() < bytes_left)
{
bytes_left -= start_block->used_bytes_in_block();
start_block = start_block->metadata.prev;
} else if (start_block->used_bytes_in_block() == bytes_left)
{
start_point = start_block->buffer;
break;
} else
{
start_point = start_block->metadata.offset - bytes_left;
break;
}
}
auto [start_block, bytes_left, start_point] = get_start_from_bytes(stack, bytes);
if (bytes_left > 0)
{
auto insert = head;
while (insert != nullptr)
{
if (insert->remaining_bytes_in_block() >= bytes_left)
break;
insert = insert->metadata.next;
}
// can directly copy into a block. this stack's head is now the insert point
if (insert != nullptr && insert->remaining_bytes_in_block() >= bytes_left)
head = insert;
else
{
// need to push a block to the end.
// make sure head is at the end.
while (head != nullptr && head->metadata.next != nullptr)
head = head->metadata.next;
push_block(bytes_left);
}
allocate_block_to_head_for_size(bytes_left);
std::memcpy(head->metadata.offset, start_point, bytes_left);
head->metadata.offset += bytes_left;
start_block = start_block->metadata.next;
@ -195,27 +155,44 @@ namespace blt::gp
// we now copy whole blocks at a time.
while (start_block != nullptr)
{
auto prev = head;
auto insert = head;
while (insert != nullptr)
{
if (insert->remaining_bytes_in_block() >= start_block->used_bytes_in_block())
break;
prev = insert;
insert = insert->metadata.next;
}
if (insert == nullptr)
{
head = prev;
push_block(start_block->used_bytes_in_block());
} else
head = insert;
allocate_block_to_head_for_size(start_block->used_bytes_in_block());
std::memcpy(head->metadata.offset, start_block->buffer, start_block->used_bytes_in_block());
head->metadata.offset += start_block->used_bytes_in_block();
start_block = start_block->metadata.next;
}
}
void copy_from(blt::u8* data, blt::size_t bytes)
{
if (bytes == 0 || data == nullptr)
return;
allocate_block_to_head_for_size(bytes);
std::memcpy(head->metadata.offset, data, bytes);
head->metadata.offset += bytes;
}
void copy_to(blt::u8* data, blt::size_t bytes) const
{
if (bytes == 0 || data == nullptr)
return;
auto [start_block, bytes_left, start_point] = get_start_from_bytes(*this, bytes);
blt::size_t write_point = 0;
if (bytes_left > 0)
{
std::memcpy(data + write_point, start_point, bytes_left);
write_point += bytes_left;
start_block = start_block->metadata.next;
}
// we now copy whole blocks at a time.
while (start_block != nullptr)
{
std::memcpy(data + write_point, start_block->buffer, start_block->used_bytes_in_block());
write_point += start_block->used_bytes_in_block();
start_block = start_block->metadata.next;
}
}
/**
* Pushes an instance of an object on to the stack
* @tparam T type to push
@ -294,8 +271,21 @@ namespace blt::gp
void pop_bytes(blt::ptrdiff_t bytes)
{
if (bytes == 0)
return;
if (empty())
{
BLT_WARN("Cannot pop %ld bytes", bytes);
BLT_ABORT("Stack is empty, we cannot pop!");
}
while (bytes > 0)
{
if (head == nullptr)
{
BLT_WARN("The head is null, this stack doesn't contain enough data inside to pop %ld bytes!", bytes);
BLT_WARN_STREAM << "Stack State: " << size() << "\n";
BLT_ABORT("Stack doesn't contain enough data to preform a pop!");
}
auto diff = head->used_bytes_in_block() - bytes;
// if there is not enough room left to pop completely off the block, then move to the next previous block
// and pop from it, update the amount of bytes to reflect the amount removed from the current block
@ -351,7 +341,7 @@ namespace blt::gp
()), ...);
}
[[nodiscard]] bool empty() const
[[nodiscard]] bool empty() const noexcept
{
if (head == nullptr)
return true;
@ -360,7 +350,7 @@ namespace blt::gp
return head->used_bytes_in_block() == 0;
}
[[nodiscard]] blt::ptrdiff_t bytes_in_head() const
[[nodiscard]] blt::ptrdiff_t bytes_in_head() const noexcept
{
if (head == nullptr)
return 0;
@ -371,7 +361,7 @@ namespace blt::gp
* Warning this function is slow!
* @return the size of the stack allocator in bytes
*/
[[nodiscard]] size_data_t size() const
[[nodiscard]] size_data_t size() const noexcept
{
size_data_t size_data;
auto* prev = head;
@ -404,7 +394,7 @@ namespace blt::gp
// TODO: cleanup this allocator!
// if you keep track of type size information you can memcpy between stack allocators as you already only allow trivially copyable types
stack_allocator(const stack_allocator& copy)
stack_allocator(const stack_allocator& copy) noexcept
{
if (copy.empty())
return;
@ -444,7 +434,7 @@ namespace blt::gp
return *this;
}
~stack_allocator()
~stack_allocator() noexcept
{
if (head != nullptr)
{
@ -470,27 +460,27 @@ namespace blt::gp
return (size + (MAX_ALIGNMENT - 1)) & ~(MAX_ALIGNMENT - 1);
}
inline static constexpr auto metadata_size()
inline static constexpr auto metadata_size() noexcept
{
return sizeof(typename block::block_metadata_t);
}
inline static constexpr auto block_size()
inline static constexpr auto block_size() noexcept
{
return sizeof(block);
}
inline static constexpr auto page_size()
inline static constexpr auto page_size() noexcept
{
return PAGE_SIZE;
}
inline static constexpr auto page_size_no_meta()
inline static constexpr auto page_size_no_meta() noexcept
{
return page_size() - metadata_size();
}
inline static constexpr auto page_size_no_block()
inline static constexpr auto page_size_no_block() noexcept
{
return page_size() - block_size();
}
@ -507,7 +497,7 @@ namespace blt::gp
} metadata;
blt::u8 buffer[8]{};
explicit block(blt::size_t size)
explicit block(blt::size_t size) noexcept
{
#if BLT_DEBUG_LEVEL > 0
if (size < PAGE_SIZE)
@ -520,27 +510,34 @@ namespace blt::gp
metadata.offset = buffer;
}
void reset()
void reset() noexcept
{
metadata.offset = buffer;
}
[[nodiscard]] blt::ptrdiff_t storage_size() const
[[nodiscard]] blt::ptrdiff_t storage_size() const noexcept
{
return static_cast<blt::ptrdiff_t>(metadata.size - sizeof(typename block::block_metadata_t));
}
[[nodiscard]] blt::ptrdiff_t used_bytes_in_block() const
[[nodiscard]] blt::ptrdiff_t used_bytes_in_block() const noexcept
{
return static_cast<blt::ptrdiff_t>(metadata.offset - buffer);
}
[[nodiscard]] blt::ptrdiff_t remaining_bytes_in_block() const
[[nodiscard]] blt::ptrdiff_t remaining_bytes_in_block() const noexcept
{
return storage_size() - used_bytes_in_block();
}
};
struct copy_start_point
{
block* start_block;
blt::ptrdiff_t bytes_left;
blt::u8* start_point;
};
template<typename T>
void* allocate_bytes()
{
@ -551,25 +548,35 @@ namespace blt::gp
{
auto ptr = get_aligned_pointer(size);
if (ptr == nullptr)
{
while (head != nullptr && head->metadata.next != nullptr)
{
head = head->metadata.next;
if (head != nullptr)
head->reset();
if (head->remaining_bytes_in_block() >= static_cast<blt::ptrdiff_t>(size))
break;
}
if (head == nullptr || head->remaining_bytes_in_block() < static_cast<blt::ptrdiff_t>(size))
push_block(aligned_size(size) + sizeof(typename block::block_metadata_t));
}
allocate_block_to_head_for_size(aligned_size(size));
ptr = get_aligned_pointer(size);
if (ptr == nullptr)
throw std::bad_alloc();
return ptr;
}
void* get_aligned_pointer(blt::size_t bytes)
/**
* Moves forward through the list of "deallocated" blocks, if none meet size requirements it'll allocate a new block.
* This function will take into account the size of the block metadata, but requires the size input to be aligned.
* It will perform no modification to the size value.
*
* The block which allows for size is now at head.
*/
void allocate_block_to_head_for_size(const blt::size_t size) noexcept
{
while (head != nullptr && head->metadata.next != nullptr)
{
head = head->metadata.next;
if (head != nullptr)
head->reset();
if (head->remaining_bytes_in_block() >= static_cast<blt::ptrdiff_t>(size))
break;
}
if (head == nullptr || head->remaining_bytes_in_block() < static_cast<blt::ptrdiff_t>(size))
push_block(size + sizeof(typename block::block_metadata_t));
}
void* get_aligned_pointer(blt::size_t bytes) noexcept
{
if (head == nullptr)
return nullptr;
@ -578,7 +585,7 @@ namespace blt::gp
return std::align(MAX_ALIGNMENT, bytes, pointer, remaining_bytes);
}
void push_block(blt::size_t size)
void push_block(blt::size_t size) noexcept
{
auto blk = allocate_block(size);
if (head == nullptr)
@ -591,13 +598,13 @@ namespace blt::gp
head = blk;
}
static size_t to_nearest_page_size(blt::size_t bytes)
static size_t to_nearest_page_size(blt::size_t bytes) noexcept
{
constexpr static blt::size_t MASK = ~(PAGE_SIZE - 1);
return (bytes & MASK) + PAGE_SIZE;
}
static block* allocate_block(blt::size_t bytes)
static block* allocate_block(blt::size_t bytes) noexcept
{
auto size = to_nearest_page_size(bytes);
auto* data = std::aligned_alloc(PAGE_SIZE, size);
@ -606,7 +613,7 @@ namespace blt::gp
return reinterpret_cast<block*>(data);
}
static void free_chain(block* current)
static void free_chain(block* current) noexcept
{
while (current != nullptr)
{
@ -617,12 +624,12 @@ namespace blt::gp
}
}
static void free_block(block* ptr)
static void free_block(block* ptr) noexcept
{
std::free(ptr);
}
inline bool move_back()
inline bool move_back() noexcept
{
auto old = head;
head = head->metadata.prev;
@ -633,6 +640,36 @@ namespace blt::gp
}
return true;
}
[[nodiscard]] inline static copy_start_point get_start_from_bytes(const stack_allocator& stack, blt::size_t bytes)
{
auto start_block = stack.head;
auto bytes_left = static_cast<blt::ptrdiff_t>(bytes);
blt::u8* start_point = nullptr;
while (bytes_left > 0)
{
if (start_block == nullptr)
{
BLT_WARN("This stack doesn't contain enough space to copy %ld bytes!", bytes);
BLT_WARN_STREAM << "State: " << stack.size() << "\n";
BLT_ABORT("Stack doesn't contain enough data for this copy operation!");
}
if (start_block->used_bytes_in_block() < bytes_left)
{
bytes_left -= start_block->used_bytes_in_block();
start_block = start_block->metadata.prev;
} else if (start_block->used_bytes_in_block() == bytes_left)
{
start_point = start_block->buffer;
break;
} else
{
start_point = start_block->metadata.offset - bytes_left;
break;
}
}
return copy_start_point{start_block, bytes_left, start_point};
}
private:
block* head = nullptr;

View File

@ -19,13 +19,39 @@
#include <blt/gp/program.h>
#include <blt/std/ranges.h>
#include <blt/std/utility.h>
#include <algorithm>
#include <blt/std/memory.h>
#include <blt/profiling/profiler_v2.h>
#include <random>
namespace blt::gp
{
inline blt::size_t accumulate_type_sizes(detail::op_iter begin, detail::op_iter end)
{
blt::size_t total = 0;
for (auto it = begin; it != end; ++it)
{
if (it->is_value)
total += stack_allocator::aligned_size(it->type_size);
}
return total;
}
template<typename>
blt::u8* get_thread_pointer_for_size(blt::size_t bytes)
{
static thread_local blt::expanding_buffer<blt::u8> buffer;
if (bytes > buffer.size())
buffer.resize(bytes);
return buffer.data();
}
grow_generator_t grow_generator;
mutation_t::config_t::config_t(): generator(grow_generator)
{}
blt::expected<crossover_t::result_t, crossover_t::error_t> crossover_t::apply(gp_program& program, const tree_t& p1, const tree_t& p2) // NOLINT
{
result_t result{p1, p2};
@ -50,8 +76,8 @@ namespace blt::gp
auto found_point_begin_itr = c2_ops.begin() + point->p2_crossover_point;
auto found_point_end_itr = c2_ops.begin() + find_endpoint(program, c2_ops, point->p2_crossover_point);
stack_allocator& c1_stack_init = c1.get_values();
stack_allocator& c2_stack_init = c2.get_values();
stack_allocator& c1_stack = c1.get_values();
stack_allocator& c2_stack = c2.get_values();
// we have to make a copy because we will modify the underlying storage.
std::vector<op_container_t> c1_operators;
@ -62,25 +88,27 @@ namespace blt::gp
for (const auto& op : blt::iterate(found_point_begin_itr, found_point_end_itr))
c2_operators.push_back(op);
stack_allocator c1_stack_after_copy;
stack_allocator c1_stack_for_copy;
stack_allocator c2_stack_after_copy;
stack_allocator c2_stack_for_copy;
blt::size_t c1_stack_after_bytes = accumulate_type_sizes(crossover_point_end_itr, c1_ops.end());
blt::size_t c1_stack_for_bytes = accumulate_type_sizes(crossover_point_begin_itr, crossover_point_end_itr);
blt::size_t c2_stack_after_bytes = accumulate_type_sizes(found_point_end_itr, c2_ops.end());
blt::size_t c2_stack_for_bytes = accumulate_type_sizes(found_point_begin_itr, found_point_end_itr);
auto c1_total = static_cast<blt::ptrdiff_t>(c1_stack_after_bytes + c1_stack_for_bytes);
auto c2_total = static_cast<blt::ptrdiff_t>(c2_stack_after_bytes + c2_stack_for_bytes);
auto copy_ptr_c1 = get_thread_pointer_for_size<struct c1>(c1_total);
auto copy_ptr_c2 = get_thread_pointer_for_size<struct c2>(c2_total);
// transfer all values after the crossover point. these will need to be transferred back to child2
transfer_backward(c1_stack_init, c1_stack_after_copy, c1_ops.end() - 1, crossover_point_end_itr - 1);
// transfer all values for the crossover point.
transfer_backward(c1_stack_init, c1_stack_for_copy, crossover_point_end_itr - 1, crossover_point_begin_itr - 1);
// transfer child2 values for copying back into c1
transfer_backward(c2_stack_init, c2_stack_after_copy, c2_ops.end() - 1, found_point_end_itr - 1);
transfer_backward(c2_stack_init, c2_stack_for_copy, found_point_end_itr - 1, found_point_begin_itr - 1);
// now copy back into the respective children
transfer_forward(c2_stack_for_copy, c1.get_values(), found_point_begin_itr, found_point_end_itr);
transfer_forward(c1_stack_for_copy, c2.get_values(), crossover_point_begin_itr, crossover_point_end_itr);
// now copy after the crossover point back to the correct children
transfer_forward(c1_stack_after_copy, c1.get_values(), crossover_point_end_itr, c1_ops.end());
transfer_forward(c2_stack_after_copy, c2.get_values(), found_point_end_itr, c2_ops.end());
c1_stack.copy_to(copy_ptr_c1, c1_total);
c1_stack.pop_bytes(c1_total);
c2_stack.copy_to(copy_ptr_c2, c2_total);
c2_stack.pop_bytes(c2_total);
c2_stack.copy_from(copy_ptr_c1, c1_stack_for_bytes);
c2_stack.copy_from(copy_ptr_c2 + c2_stack_for_bytes, c2_stack_after_bytes);
c1_stack.copy_from(copy_ptr_c2, c2_stack_for_bytes);
c1_stack.copy_from(copy_ptr_c1 + c1_stack_for_bytes, c1_stack_after_bytes);
// now swap the operators
auto insert_point_c1 = crossover_point_begin_itr - 1;
auto insert_point_c2 = found_point_begin_itr - 1;
@ -195,26 +223,18 @@ namespace blt::gp
auto& new_ops_r = new_tree.get_operations();
auto& new_vals_r = new_tree.get_values();
stack_allocator stack_after;
blt::size_t total_bytes_after = 0;
for (auto it = end_itr; it != ops_r.end(); it++)
{
if (it->is_value)
total_bytes_after += stack_allocator::aligned_size(it->type_size);
}
// transfer_backward(vals_r, stack_after, ops_r.end() - 1, end_itr - 1);
stack_after.copy_from(vals_r, total_bytes_after);
vals_r.pop_bytes(static_cast<blt::ptrdiff_t>(total_bytes_after));
for (auto it = end_itr - 1; it != begin_itr - 1; it--)
{
if (it->is_value)
vals_r.pop_bytes(static_cast<blt::ptrdiff_t>(stack_allocator::aligned_size(it->type_size)));
}
blt::size_t total_bytes_after = accumulate_type_sizes(end_itr, ops_r.end());
auto* stack_after_data = get_thread_pointer_for_size<struct mutation>(total_bytes_after);
// make a copy of any stack data after the mutation point / children.
vals_r.copy_to(stack_after_data, total_bytes_after);
// remove the bytes of the data after the mutation point and the data for the children of the mutation node.
vals_r.pop_bytes(static_cast<blt::ptrdiff_t>(total_bytes_after + accumulate_type_sizes(begin_itr, end_itr)));
// insert the new tree then move back the data from after the original mutation point.
vals_r.insert(std::move(new_vals_r));
//transfer_forward(stack_after, vals_r, end_itr, ops_r.end());
vals_r.copy_from(stack_after, total_bytes_after);
stack_after = {};
vals_r.copy_from(stack_after_data, total_bytes_after);
auto before = begin_itr - 1;
ops_r.erase(begin_itr, end_itr);
@ -223,7 +243,7 @@ namespace blt::gp
// this will check to make sure that the tree is in a correct and executable state. it requires that the evaluation is context free!
#if BLT_DEBUG_LEVEL >= 2
BLT_ASSERT(new_vals_r.empty());
BLT_ASSERT(stack_after.empty());
//BLT_ASSERT(stack_after.empty());
blt::size_t bytes_expected = 0;
auto bytes_size = vals_r.size().total_used_bytes;
@ -267,9 +287,6 @@ namespace blt::gp
return c;
}
mutation_t::config_t::config_t(): generator(grow_generator)
{}
blt::ptrdiff_t find_endpoint(blt::gp::gp_program& program, const std::vector<blt::gp::op_container_t>& container, blt::ptrdiff_t index)
{
blt::i64 children_left = 0;