Compare commits

..

2 Commits

Author SHA1 Message Date
Brett 1b83d6b4a8 need to add threading next! 2024-08-17 02:20:32 -04:00
Brett 3dd3e6fc9e stack works. test now 2024-08-17 01:59:13 -04:00
5 changed files with 87 additions and 683 deletions

View File

@ -1,5 +1,5 @@
cmake_minimum_required(VERSION 3.25) cmake_minimum_required(VERSION 3.25)
project(blt-gp VERSION 0.1.1) project(blt-gp VERSION 0.1.3)
include(CTest) include(CTest)

41
dev_branch.txt Normal file
View File

@ -0,0 +1,41 @@
Performance counter stats for './cmake-build-release/blt-symbolic-regression-example' (30 runs):
24,277,728,279 branches ( +- 19.01% ) (20.47%)
76,457,616 branch-misses # 0.31% of all branches ( +- 17.97% ) (21.41%)
14,213,192 cache-misses # 4.73% of all cache refs ( +- 14.24% ) (22.52%)
300,581,049 cache-references ( +- 21.08% ) (23.68%)
48,914,779,668 cycles ( +- 19.65% ) (24.80%)
123,068,193,359 instructions # 2.52 insn per cycle ( +- 19.44% ) (25.09%)
0 alignment-faults
4,202 cgroup-switches ( +- 13.56% )
115,962 faults ( +- 10.95% )
871,101,993 ns duration_time ( +- 13.40% )
11,507,605,674 ns user_time ( +- 3.56% )
299,016,204 ns system_time ( +- 3.32% )
41,446,831,795 L1-dcache-loads ( +- 19.28% ) (24.69%)
167,603,194 L1-dcache-load-misses # 0.40% of all L1-dcache accesses ( +- 22.47% ) (23.95%)
81,992,073 L1-dcache-prefetches ( +- 25.34% ) (23.24%)
350,398,072 L1-icache-loads ( +- 15.30% ) (22.70%)
909,504 L1-icache-load-misses # 0.26% of all L1-icache accesses ( +- 14.46% ) (22.18%)
14,271,381 dTLB-loads ( +- 20.04% ) (21.90%)
1,559,972 dTLB-load-misses # 10.93% of all dTLB cache accesses ( +- 14.74% ) (21.39%)
246,888 iTLB-loads ( +- 21.69% ) (20.54%)
403,152 iTLB-load-misses # 163.29% of all iTLB cache accesses ( +- 13.35% ) (19.94%)
210,585,840 l2_request_g1.all_no_prefetch ( +- 20.07% ) (19.93%)
115,962 page-faults ( +- 10.95% )
115,958 page-faults:u ( +- 10.95% )
3 page-faults:k ( +- 4.54% )
41,209,739,257 L1-dcache-loads ( +- 19.02% ) (19.60%)
181,755,898 L1-dcache-load-misses # 0.44% of all L1-dcache accesses ( +- 20.60% ) (20.01%)
<not supported> LLC-loads
<not supported> LLC-load-misses
425,056,352 L1-icache-loads ( +- 12.27% ) (20.43%)
1,076,486 L1-icache-load-misses # 0.31% of all L1-icache accesses ( +- 10.84% ) (20.98%)
15,418,419 dTLB-loads ( +- 17.74% ) (21.24%)
1,648,473 dTLB-load-misses # 11.55% of all dTLB cache accesses ( +- 13.11% ) (20.94%)
325,141 iTLB-loads ( +- 26.87% ) (20.80%)
459,828 iTLB-load-misses # 186.25% of all iTLB cache accesses ( +- 11.50% ) (20.34%)
94,270,593 L1-dcache-prefetches ( +- 22.82% ) (20.09%)
<not supported> L1-dcache-prefetch-misses
0.871 +- 0.117 seconds time elapsed ( +- 13.40% )

View File

@ -39,7 +39,7 @@ blt::gp::prog_config_t config = blt::gp::prog_config_t()
.set_mutation_chance(0.1) .set_mutation_chance(0.1)
.set_reproduction_chance(0) .set_reproduction_chance(0)
.set_max_generations(50) .set_max_generations(50)
.set_pop_size(5000) .set_pop_size(50000)
.set_thread_count(0); .set_thread_count(0);
blt::gp::type_provider type_system; blt::gp::type_provider type_system;

View File

@ -140,7 +140,6 @@ namespace blt::gp
void copy_from(const stack_allocator& stack, blt::size_t bytes) void copy_from(const stack_allocator& stack, blt::size_t bytes)
{ {
BLT_ASSERT(stack.data_ != nullptr);
if (size_ < bytes + bytes_stored) if (size_ < bytes + bytes_stored)
expand(bytes + bytes_stored); expand(bytes + bytes_stored);
std::memcpy(data_ + bytes_stored, stack.data_ + (stack.bytes_stored - bytes), bytes); std::memcpy(data_ + bytes_stored, stack.data_ + (stack.bytes_stored - bytes), bytes);
@ -175,9 +174,8 @@ namespace blt::gp
static_assert(std::is_trivially_copyable_v<NO_REF> && "Type must be bitwise copyable!"); static_assert(std::is_trivially_copyable_v<NO_REF> && "Type must be bitwise copyable!");
static_assert(alignof(NO_REF) <= MAX_ALIGNMENT && "Type alignment must not be greater than the max alignment!"); static_assert(alignof(NO_REF) <= MAX_ALIGNMENT && "Type alignment must not be greater than the max alignment!");
constexpr auto size = aligned_size(sizeof(NO_REF)); constexpr auto size = aligned_size(sizeof(NO_REF));
T t;
std::memcpy(&t, data_ + bytes_stored - size, size);
bytes_stored -= size; bytes_stored -= size;
return *reinterpret_cast<T*>(data_ + bytes_stored);
} }
template<typename T, typename NO_REF = NO_REF_T<T>> template<typename T, typename NO_REF = NO_REF_T<T>>
@ -196,7 +194,7 @@ namespace blt::gp
void transfer_bytes(stack_allocator& to, blt::size_t bytes) void transfer_bytes(stack_allocator& to, blt::size_t bytes)
{ {
to.copy_from(*this, bytes); to.copy_from(*this, aligned_size(bytes));
pop_bytes(bytes); pop_bytes(bytes);
} }
@ -283,8 +281,7 @@ namespace blt::gp
} }
if (aligned_ptr == nullptr) if (aligned_ptr == nullptr)
throw std::bad_alloc(); throw std::bad_alloc();
// TODO: this whole process could be better auto used_bytes = aligned_size(bytes);
auto used_bytes = static_cast<blt::size_t>(std::abs(data_ - static_cast<blt::u8*>(aligned_ptr)));
bytes_stored += used_bytes; bytes_stored += used_bytes;
return aligned_ptr; return aligned_ptr;
} }
@ -309,681 +306,6 @@ namespace blt::gp
blt::size_t bytes_stored = 0; blt::size_t bytes_stored = 0;
blt::size_t size_ = 0; blt::size_t size_ = 0;
}; };
class stack_allocator_old
{
constexpr static blt::size_t PAGE_SIZE = 0x1000;
constexpr static blt::size_t MAX_ALIGNMENT = 8;
template<typename T>
using NO_REF_T = std::remove_cv_t<std::remove_reference_t<T>>;
public:
struct size_data_t
{
blt::size_t total_size_bytes = 0;
blt::size_t total_used_bytes = 0;
blt::size_t total_remaining_bytes = 0;
blt::size_t total_no_meta_bytes = 0;
blt::size_t total_dealloc = 0;
blt::size_t total_dealloc_used = 0;
blt::size_t total_dealloc_remaining = 0;
blt::size_t total_dealloc_no_meta = 0;
blt::size_t blocks = 0;
friend std::ostream& operator<<(std::ostream& stream, const size_data_t& data)
{
stream << "[";
stream << data.total_used_bytes << "/";
stream << data.total_size_bytes << "(";
stream << (static_cast<double>(data.total_used_bytes) / static_cast<double>(data.total_size_bytes) * 100) << "%), ";
stream << data.total_used_bytes << "/";
stream << data.total_no_meta_bytes << "(";
stream << (static_cast<double>(data.total_used_bytes) / static_cast<double>(data.total_no_meta_bytes) * 100)
<< "%), (empty space: ";
stream << data.total_remaining_bytes << ") blocks: " << data.blocks << " || unallocated space: ";
stream << data.total_dealloc_used << "/";
stream << data.total_dealloc;
if (static_cast<double>(data.total_dealloc) > 0)
stream << "(" << (static_cast<double>(data.total_dealloc_used) / static_cast<double>(data.total_dealloc) * 100) << "%)";
stream << ", ";
stream << data.total_dealloc_used << "/";
stream << data.total_dealloc_no_meta;
if (data.total_dealloc_no_meta > 0)
stream << "(" << (static_cast<double>(data.total_dealloc_used) / static_cast<double>(data.total_dealloc_no_meta * 100))
<< "%)";
stream << ", (empty space: " << data.total_dealloc_remaining << ")]";
return stream;
}
};
void insert(stack_allocator_old stack)
{
if (stack.empty())
return;
// take a copy of the pointer to this stack's blocks
auto old_head = stack.head;
// stack is now empty, we have the last reference to it.
stack.head = nullptr;
// we don't have any nodes to search through or re-point, we can just assign the head
if (head == nullptr)
{
head = old_head;
return;
}
// find the beginning of the stack
auto begin = old_head;
while (begin->metadata.prev != nullptr)
begin = begin->metadata.prev;
// move along blocks with free space, attempt to insert bytes from one stack to another
auto insert = head;
while (insert->metadata.next != nullptr && begin != nullptr)
{
if (begin->used_bytes_in_block() <= insert->remaining_bytes_in_block())
{
std::memcpy(insert->metadata.offset, begin->buffer, begin->used_bytes_in_block());
insert->metadata.offset += begin->used_bytes_in_block();
auto old_begin = begin;
begin = begin->metadata.next;
free_block(old_begin);
}
head = insert;
insert = insert->metadata.next;
}
if (begin == nullptr)
return;
while (insert->metadata.next != nullptr)
insert = insert->metadata.next;
// if here is space left we can move the pointers around
insert->metadata.next = begin;
begin->metadata.prev = insert;
// find where the head is now and set the head to this new point.
auto new_head = begin;
while (new_head->metadata.next != nullptr)
new_head = new_head->metadata.next;
head = new_head;
}
/**
* Bytes must be the number of bytes to move, all types must have alignment accounted for
*/
void copy_from(const stack_allocator_old& stack, blt::size_t bytes)
{
if (bytes == 0)
return;
if (stack.empty())
{
BLT_WARN("This stack is empty, we will copy no bytes from it!");
return;
}
auto [start_block, bytes_left, start_point] = get_start_from_bytes(stack, bytes);
if (bytes_left > 0)
{
allocate_block_to_head_for_size(bytes_left);
std::memcpy(head->metadata.offset, start_point, bytes_left);
head->metadata.offset += bytes_left;
start_block = start_block->metadata.next;
}
// we now copy whole blocks at a time.
while (start_block != nullptr)
{
allocate_block_to_head_for_size(start_block->used_bytes_in_block());
std::memcpy(head->metadata.offset, start_block->buffer, start_block->used_bytes_in_block());
head->metadata.offset += start_block->used_bytes_in_block();
start_block = start_block->metadata.next;
}
}
void copy_from(blt::u8* data, blt::size_t bytes)
{
if (bytes == 0 || data == nullptr)
return;
allocate_block_to_head_for_size(bytes);
std::memcpy(head->metadata.offset, data, bytes);
head->metadata.offset += bytes;
}
void copy_to(blt::u8* data, blt::size_t bytes) const
{
if (bytes == 0 || data == nullptr)
return;
auto [start_block, bytes_left, start_point] = get_start_from_bytes(*this, bytes);
blt::size_t write_point = 0;
if (bytes_left > 0)
{
std::memcpy(data + write_point, start_point, bytes_left);
write_point += bytes_left;
start_block = start_block->metadata.next;
}
// we now copy whole blocks at a time.
while (start_block != nullptr)
{
std::memcpy(data + write_point, start_block->buffer, start_block->used_bytes_in_block());
write_point += start_block->used_bytes_in_block();
start_block = start_block->metadata.next;
}
}
/**
* Pushes an instance of an object on to the stack
* @tparam T type to push
* @param value universal reference to the object to push
*/
template<typename T>
void push(const T& value)
{
using NO_REF_T = std::remove_cv_t<std::remove_reference_t<T>>;
static_assert(std::is_trivially_copyable_v<NO_REF_T> && "Type must be bitwise copyable!");
static_assert(alignof(NO_REF_T) <= MAX_ALIGNMENT && "Type must not be greater than the max alignment!");
auto ptr = allocate_bytes<NO_REF_T>();
head->metadata.offset = static_cast<blt::u8*>(ptr) + aligned_size<NO_REF_T>();
//new(ptr) NO_REF_T(std::forward<T>(value));
std::memcpy(ptr, &value, sizeof(NO_REF_T));
}
template<typename T>
T pop()
{
using NO_REF_T = std::remove_cv_t<std::remove_reference_t<T>>;
static_assert(std::is_trivially_copyable_v<NO_REF_T> && "Type must be bitwise copyable!");
constexpr static auto TYPE_SIZE = aligned_size<NO_REF_T>();
while (head->used_bytes_in_block() == 0 && move_back());
if (empty())
throw std::runtime_error("Silly boi the stack is empty!");
if (head->used_bytes_in_block() < static_cast<blt::ptrdiff_t>(aligned_size<NO_REF_T>()))
throw std::runtime_error((std::string("Mismatched Types! Not enough space left in block! Bytes: ") += std::to_string(
head->used_bytes_in_block()) += " Size: " + std::to_string(sizeof(NO_REF_T))).c_str());
// make copy
NO_REF_T t = *reinterpret_cast<NO_REF_T*>(head->metadata.offset - TYPE_SIZE);
// call destructor
if constexpr (detail::has_func_drop_v<T>)
call_drop<NO_REF_T>(0, 0, nullptr);
// move offset back
head->metadata.offset -= TYPE_SIZE;
// moving back allows us to allocate with other data, if there is room.
while (head->used_bytes_in_block() == 0 && move_back());
return t;
}
template<typename T>
T& from(blt::size_t bytes)
{
using NO_REF_T = std::remove_cv_t<std::remove_reference_t<T>>;
constexpr static auto TYPE_SIZE = aligned_size<NO_REF_T>();
auto remaining_bytes = static_cast<blt::ptrdiff_t>(bytes + TYPE_SIZE);
block* blk = head;
while (remaining_bytes > 0)
{
if (blk == nullptr)
{
BLT_WARN_STREAM << "Stack state: " << size() << "\n";
BLT_WARN_STREAM << "Requested " << bytes << " bytes which becomes " << (bytes + TYPE_SIZE) << "\n";
throw std::runtime_error("Requested size is beyond the scope of this stack!");
}
auto bytes_available = blk->used_bytes_in_block() - remaining_bytes;
if (bytes_available < 0)
{
remaining_bytes -= blk->used_bytes_in_block();
blk = blk->metadata.prev;
} else
break;
}
if (blk == nullptr)
throw std::runtime_error("Some nonsense is going on. This function already smells");
if (blk->used_bytes_in_block() < static_cast<blt::ptrdiff_t>(TYPE_SIZE))
{
BLT_WARN_STREAM << size() << "\n";
BLT_WARN_STREAM << "Requested " << bytes << " bytes which becomes " << (bytes + TYPE_SIZE) << "\n";
BLT_WARN_STREAM << "Block size: " << blk->storage_size() << "\n";
BLT_ABORT((std::string("Mismatched Types! Not enough space left in block! Bytes: ") += std::to_string(
blk->used_bytes_in_block()) += " Size: " + std::to_string(sizeof(NO_REF_T))).c_str());
}
return *reinterpret_cast<NO_REF_T*>(blk->metadata.offset - remaining_bytes);
}
void pop_bytes(blt::ptrdiff_t bytes)
{
if (bytes == 0)
return;
if (empty())
{
BLT_WARN("Cannot pop %ld bytes", bytes);
BLT_ABORT("Stack is empty, we cannot pop!");
}
while (bytes > 0)
{
if (head == nullptr)
{
BLT_WARN("The head is null, this stack doesn't contain enough data inside to pop %ld bytes!", bytes);
BLT_WARN_STREAM << "Stack State: " << size() << "\n";
BLT_ABORT("Stack doesn't contain enough data to preform a pop!");
}
auto diff = head->used_bytes_in_block() - bytes;
// if there is not enough room left to pop completely off the block, then move to the next previous block
// and pop from it, update the amount of bytes to reflect the amount removed from the current block
if (diff < 0)
{
bytes -= head->used_bytes_in_block();
// reset this head's buffer.
head->metadata.offset = head->buffer;
move_back();
} else
{
// otherwise update the offset pointer
head->metadata.offset -= bytes;
break;
}
}
while (head != nullptr && head->used_bytes_in_block() == 0 && move_back());
}
/**
* Warning this function should be used to transfer types, not arrays of types! It will produce an error if you attempt to pass more
* than one type # of bytes at a time!
* @param to stack to push to
* @param bytes number of bytes to transfer out.
*/
void transfer_bytes(stack_allocator_old& to, blt::size_t bytes)
{
while (head->used_bytes_in_block() == 0 && move_back());
if (empty())
throw std::runtime_error("This stack is empty!");
auto type_size = aligned_size(bytes);
if (head->used_bytes_in_block() < static_cast<blt::ptrdiff_t>(type_size))
{
BLT_ERROR_STREAM << "Stack State:\n" << size() << "\n" << "Bytes in head: " << bytes_in_head() << "\n";
BLT_ABORT(("This stack doesn't contain enough data for this type! " + std::to_string(head->used_bytes_in_block()) + " / " +
std::to_string(bytes) + " This is an invalid runtime state!").c_str());
}
auto ptr = to.allocate_bytes(type_size);
to.head->metadata.offset = static_cast<blt::u8*>(ptr) + type_size;
std::memcpy(ptr, head->metadata.offset - type_size, type_size);
head->metadata.offset -= type_size;
while (head->used_bytes_in_block() == 0 && move_back());
}
template<typename... Args>
void call_destructors(detail::bitmask_t* mask)
{
if constexpr (sizeof...(Args) > 0)
{
blt::size_t offset = (stack_allocator_old::aligned_size<NO_REF_T<Args>>() + ...) -
stack_allocator_old::aligned_size<NO_REF_T<typename blt::meta::arg_helper<Args...>::First>>();
blt::size_t index = 0;
if (mask != nullptr)
index = mask->size() - sizeof...(Args);
((call_drop<Args>(offset, index, mask), offset -= stack_allocator_old::aligned_size<NO_REF_T<Args>>(), ++index), ...);
if (mask != nullptr)
{
auto& mask_r = *mask;
for (blt::size_t i = 0; i < sizeof...(Args); i++)
mask_r.pop_back();
}
}
}
[[nodiscard]] bool empty() const noexcept
{
if (head == nullptr)
return true;
if (head->metadata.prev != nullptr)
return false;
return head->used_bytes_in_block() == 0;
}
[[nodiscard]] blt::ptrdiff_t bytes_in_head() const noexcept
{
if (head == nullptr)
return 0;
return head->used_bytes_in_block();
}
/**
* Warning this function is slow!
* @return the size of the stack allocator in bytes
*/
[[nodiscard]] size_data_t size() const noexcept
{
size_data_t size_data;
auto* prev = head;
while (prev != nullptr)
{
size_data.total_size_bytes += prev->metadata.size;
size_data.total_no_meta_bytes += prev->storage_size();
size_data.total_remaining_bytes += prev->remaining_bytes_in_block();
size_data.total_used_bytes += prev->used_bytes_in_block();
size_data.blocks++;
prev = prev->metadata.prev;
}
if (head != nullptr)
{
auto next = head->metadata.next;
while (next != nullptr)
{
size_data.total_dealloc += next->metadata.size;
size_data.total_dealloc_no_meta += next->storage_size();
size_data.total_dealloc_remaining += next->remaining_bytes_in_block();
size_data.total_dealloc_used += next->used_bytes_in_block();
size_data.blocks++;
next = next->metadata.next;
}
}
return size_data;
}
stack_allocator_old() = default;
// TODO: cleanup this allocator!
// if you keep track of type size information you can memcpy between stack allocators as you already only allow trivially copyable types
stack_allocator_old(const stack_allocator_old& copy) noexcept
{
if (copy.empty())
return;
head = nullptr;
block* list_itr = nullptr;
// start at the beginning of the list
block* current = copy.head;
while (current != nullptr)
{
list_itr = current;
current = current->metadata.prev;
}
// copy all the blocks
while (list_itr != nullptr)
{
push_block(list_itr->metadata.size);
std::memcpy(head->buffer, list_itr->buffer, list_itr->storage_size());
head->metadata.size = list_itr->metadata.size;
head->metadata.offset = head->buffer + list_itr->used_bytes_in_block();
list_itr = list_itr->metadata.next;
}
}
stack_allocator_old& operator=(const stack_allocator_old& copy) = delete;
stack_allocator_old(stack_allocator_old&& move) noexcept
{
head = move.head;
move.head = nullptr;
}
stack_allocator_old& operator=(stack_allocator_old&& move) noexcept
{
move.head = std::exchange(head, move.head);
return *this;
}
~stack_allocator_old() noexcept
{
if (head != nullptr)
{
auto blk = head->metadata.next;
while (blk != nullptr)
{
auto ptr = blk;
blk = blk->metadata.next;
free_block(ptr);
}
}
free_chain(head);
}
template<typename T>
static inline constexpr blt::size_t aligned_size() noexcept
{
return aligned_size(sizeof(NO_REF_T<T>));
}
static inline constexpr blt::size_t aligned_size(blt::size_t size) noexcept
{
return (size + (MAX_ALIGNMENT - 1)) & ~(MAX_ALIGNMENT - 1);
}
inline static constexpr auto metadata_size() noexcept
{
return sizeof(typename block::block_metadata_t);
}
inline static constexpr auto block_size() noexcept
{
return sizeof(block);
}
inline static constexpr auto page_size() noexcept
{
return PAGE_SIZE;
}
inline static constexpr auto page_size_no_meta() noexcept
{
return page_size() - metadata_size();
}
inline static constexpr auto page_size_no_block() noexcept
{
return page_size() - block_size();
}
private:
struct block
{
struct block_metadata_t
{
blt::size_t size = 0;
block* next = nullptr;
block* prev = nullptr;
blt::u8* offset = nullptr;
} metadata;
blt::u8 buffer[8]{};
explicit block(blt::size_t size) noexcept
{
#if BLT_DEBUG_LEVEL > 0
if (size < PAGE_SIZE)
{
BLT_WARN("Hey this block is too small, who allocated it?");
std::abort();
}
#endif
metadata.size = size;
metadata.offset = buffer;
}
void reset() noexcept
{
metadata.offset = buffer;
}
[[nodiscard]] blt::ptrdiff_t storage_size() const noexcept
{
return static_cast<blt::ptrdiff_t>(metadata.size - sizeof(typename block::block_metadata_t));
}
[[nodiscard]] blt::ptrdiff_t used_bytes_in_block() const noexcept
{
return static_cast<blt::ptrdiff_t>(metadata.offset - buffer);
}
[[nodiscard]] blt::ptrdiff_t remaining_bytes_in_block() const noexcept
{
return storage_size() - used_bytes_in_block();
}
};
struct copy_start_point
{
block* start_block;
blt::ptrdiff_t bytes_left;
blt::u8* start_point;
};
template<typename T>
inline void call_drop(blt::size_t offset, blt::size_t index, detail::bitmask_t* mask)
{
if constexpr (detail::has_func_drop_v<T>)
{
if (mask != nullptr)
{
auto& mask_r = *mask;
if (!mask_r[index])
return;
}
from<NO_REF_T<T>>(offset).drop();
}
}
template<typename T>
void* allocate_bytes()
{
return allocate_bytes(sizeof(NO_REF_T<T>));
}
void* allocate_bytes(blt::size_t size)
{
auto ptr = get_aligned_pointer(size);
if (ptr == nullptr)
allocate_block_to_head_for_size(aligned_size(size));
ptr = get_aligned_pointer(size);
if (ptr == nullptr)
throw std::bad_alloc();
return ptr;
}
/**
* Moves forward through the list of "deallocated" blocks, if none meet size requirements it'll allocate a new block.
* This function will take into account the size of the block metadata, but requires the size input to be aligned.
* It will perform no modification to the size value.
*
* The block which allows for size is now at head.
*/
void allocate_block_to_head_for_size(const blt::size_t size) noexcept
{
while (head != nullptr && head->metadata.next != nullptr)
{
head = head->metadata.next;
if (head != nullptr)
head->reset();
if (head->remaining_bytes_in_block() >= static_cast<blt::ptrdiff_t>(size))
break;
}
if (head == nullptr || head->remaining_bytes_in_block() < static_cast<blt::ptrdiff_t>(size))
push_block(size + sizeof(typename block::block_metadata_t));
}
void* get_aligned_pointer(blt::size_t bytes) noexcept
{
if (head == nullptr)
return nullptr;
blt::size_t remaining_bytes = head->remaining_bytes_in_block();
auto* pointer = static_cast<void*>(head->metadata.offset);
return std::align(MAX_ALIGNMENT, bytes, pointer, remaining_bytes);
}
void push_block(blt::size_t size) noexcept
{
auto blk = allocate_block(size);
if (head == nullptr)
{
head = blk;
return;
}
head->metadata.next = blk;
blk->metadata.prev = head;
head = blk;
}
static size_t to_nearest_page_size(blt::size_t bytes) noexcept
{
constexpr static blt::size_t MASK = ~(PAGE_SIZE - 1);
return (bytes & MASK) + PAGE_SIZE;
}
static block* allocate_block(blt::size_t bytes) noexcept
{
auto size = to_nearest_page_size(bytes);
auto* data = std::aligned_alloc(PAGE_SIZE, size);
//auto* data = get_allocator().allocate(size);
new(data) block{size};
return reinterpret_cast<block*>(data);
}
static void free_chain(block* current) noexcept
{
while (current != nullptr)
{
block* ptr = current;
current = current->metadata.prev;
free_block(ptr);
//get_allocator().deallocate(ptr);
}
}
static void free_block(block* ptr) noexcept
{
std::free(ptr);
}
inline bool move_back() noexcept
{
auto old = head;
head = head->metadata.prev;
if (head == nullptr)
{
head = old;
return false;
}
return true;
}
[[nodiscard]] inline static copy_start_point get_start_from_bytes(const stack_allocator_old& stack, blt::size_t bytes)
{
auto start_block = stack.head;
auto bytes_left = static_cast<blt::ptrdiff_t>(bytes);
blt::u8* start_point = nullptr;
while (bytes_left > 0)
{
if (start_block == nullptr)
{
BLT_WARN("This stack doesn't contain enough space to copy %ld bytes!", bytes);
BLT_WARN_STREAM << "State: " << stack.size() << "\n";
BLT_ABORT("Stack doesn't contain enough data for this copy operation!");
}
if (start_block->used_bytes_in_block() < bytes_left)
{
bytes_left -= start_block->used_bytes_in_block();
start_block = start_block->metadata.prev;
} else if (start_block->used_bytes_in_block() == bytes_left)
{
start_point = start_block->buffer;
break;
} else
{
start_point = start_block->metadata.offset - bytes_left;
break;
}
}
return copy_start_point{start_block, bytes_left, start_point};
}
private:
block* head = nullptr;
};
} }
#endif //BLT_GP_STACK_H #endif //BLT_GP_STACK_H

41
main_branch.txt Normal file
View File

@ -0,0 +1,41 @@
Performance counter stats for './cmake-build-release/blt-symbolic-regression-example' (30 runs):
81,986,993,284 branches ( +- 15.89% ) (19.93%)
194,632,894 branch-misses # 0.24% of all branches ( +- 21.10% ) (19.84%)
32,561,539 cache-misses # 0.89% of all cache refs ( +- 10.21% ) (19.95%)
3,645,509,810 cache-references ( +- 15.93% ) (20.11%)
169,957,442,648 cycles ( +- 15.85% ) (20.26%)
426,558,894,577 instructions # 2.51 insn per cycle ( +- 16.24% ) (20.29%)
0 alignment-faults
9,103 cgroup-switches ( +- 13.62% )
52,586 faults ( +- 5.74% )
1,823,320,688 ns duration_time ( +- 12.76% )
41,213,439,537 ns user_time ( +- 3.68% )
219,435,124 ns system_time ( +- 2.44% )
132,928,139,347 L1-dcache-loads ( +- 15.55% ) (20.40%)
2,559,138,346 L1-dcache-load-misses # 1.93% of all L1-dcache accesses ( +- 15.53% ) (20.37%)
852,474,938 L1-dcache-prefetches ( +- 19.61% ) (20.44%)
1,035,909,753 L1-icache-loads ( +- 11.73% ) (20.45%)
1,451,589 L1-icache-load-misses # 0.14% of all L1-icache accesses ( +- 13.61% ) (20.50%)
37,722,800 dTLB-loads ( +- 14.93% ) (20.52%)
4,119,243 dTLB-load-misses # 10.92% of all dTLB cache accesses ( +- 10.99% ) (20.55%)
1,318,136 iTLB-loads ( +- 20.32% ) (20.51%)
367,939 iTLB-load-misses # 27.91% of all iTLB cache accesses ( +- 12.34% ) (20.42%)
2,730,214,946 l2_request_g1.all_no_prefetch ( +- 15.32% ) (20.43%)
52,586 page-faults ( +- 5.74% )
52,583 page-faults:u ( +- 5.75% )
3 page-faults:k ( +- 3.96% )
132,786,226,560 L1-dcache-loads ( +- 15.54% ) (20.33%)
2,581,181,694 L1-dcache-load-misses # 1.94% of all L1-dcache accesses ( +- 15.34% ) (20.26%)
<not supported> LLC-loads
<not supported> LLC-load-misses
1,021,814,075 L1-icache-loads ( +- 11.67% ) (20.19%)
1,376,958 L1-icache-load-misses # 0.13% of all L1-icache accesses ( +- 13.76% ) (20.09%)
38,065,494 dTLB-loads ( +- 14.76% ) (20.09%)
4,174,010 dTLB-load-misses # 11.06% of all dTLB cache accesses ( +- 10.90% ) (20.14%)
1,407,386 iTLB-loads ( +- 20.45% ) (20.09%)
338,781 iTLB-load-misses # 25.70% of all iTLB cache accesses ( +- 12.61% ) (20.05%)
873,873,406 L1-dcache-prefetches ( +- 19.41% ) (20.00%)
<not supported> L1-dcache-prefetch-misses
1.823 +- 0.233 seconds time elapsed ( +- 12.76% )