reduce allocations and make faster

shared
Brett 2024-08-22 02:10:55 -04:00
parent 7aaad70132
commit 95460e7bf1
6 changed files with 48 additions and 10 deletions

View File

@ -1,5 +1,5 @@
cmake_minimum_required(VERSION 3.25) cmake_minimum_required(VERSION 3.25)
project(blt-gp VERSION 0.1.19) project(blt-gp VERSION 0.1.20)
include(CTest) include(CTest)

View File

@ -113,6 +113,7 @@ int main()
while (!program.should_terminate()) while (!program.should_terminate())
{ {
BLT_TRACE("------------{Begin Generation %ld}------------", program.get_current_generation()); BLT_TRACE("------------{Begin Generation %ld}------------", program.get_current_generation());
BLT_TRACE("Creating next generation");
BLT_START_INTERVAL("Symbolic Regression", "Gen"); BLT_START_INTERVAL("Symbolic Regression", "Gen");
program.create_next_generation(); program.create_next_generation();
BLT_END_INTERVAL("Symbolic Regression", "Gen"); BLT_END_INTERVAL("Symbolic Regression", "Gen");
@ -148,5 +149,12 @@ int main()
BLT_PRINT_PROFILE("Symbolic Regression", blt::PRINT_CYCLES | blt::PRINT_THREAD | blt::PRINT_WALL); BLT_PRINT_PROFILE("Symbolic Regression", blt::PRINT_CYCLES | blt::PRINT_THREAD | blt::PRINT_WALL);
// BLT_TRACE("Allocations:");
// auto h = static_cast<blt::ptrdiff_t>(blt::gp::hello.load());
// auto u = static_cast<blt::ptrdiff_t>(blt::gp::unhello.load());
// BLT_TRACE("Allocated: %ld", h);
// BLT_TRACE("Deallocated: %ld", u);
// BLT_TRACE("Ratio: %lf Difference: %ld", static_cast<double>(h) / static_cast<double>(u), std::abs(h - u));
return 0; return 0;
} }

View File

@ -55,7 +55,7 @@ namespace blt::gp
class operator_storage_test; class operator_storage_test;
// context*, read stack, write stack // context*, read stack, write stack
using operator_func_t = std::function<void(void*, stack_allocator&, stack_allocator&)>; using operator_func_t = std::function<void(void*, stack_allocator&, stack_allocator&)>;
using eval_func_t = std::function<evaluation_context(const tree_t& tree, void* context)>; using eval_func_t = std::function<evaluation_context&(const tree_t& tree, void* context)>;
// debug function, // debug function,
using print_func_t = std::function<void(std::ostream&, stack_allocator&)>; using print_func_t = std::function<void(std::ostream&, stack_allocator&)>;

View File

@ -115,12 +115,14 @@ namespace blt::gp
for (auto v : sizes) for (auto v : sizes)
largest = std::max(v, largest); largest = std::max(v, largest);
storage.eval_func = [&operators..., largest](const tree_t& tree, void* context) { storage.eval_func = [&operators..., largest](const tree_t& tree, void* context) -> evaluation_context& {
const auto& ops = tree.get_operations(); const auto& ops = tree.get_operations();
const auto& vals = tree.get_values(); const auto& vals = tree.get_values();
evaluation_context results{}; static thread_local evaluation_context results{};
results.values.reset();
results.values.reserve(largest); results.values.reserve(largest);
// BLT_DEBUG("%ld stored %ld", largest, results.values.internal_storage_size());
blt::size_t total_so_far = 0; blt::size_t total_so_far = 0;
@ -261,7 +263,7 @@ namespace blt::gp
storage.names.push_back(op.get_name()); storage.names.push_back(op.get_name());
if (op.is_ephemeral()) if (op.is_ephemeral())
storage.static_types.insert(operator_id); storage.static_types.insert(operator_id);
return total_size_required; return total_size_required * 2;
} }
template<typename T> template<typename T>

View File

@ -20,7 +20,7 @@
#define BLT_GP_STACK_H #define BLT_GP_STACK_H
#include <blt/std/types.h> #include <blt/std/types.h>
#include <blt/std/atomic_allocator.h> #include <blt/std/bump_allocator.h>
#include <blt/std/assert.h> #include <blt/std/assert.h>
#include <blt/std/logging.h> #include <blt/std/logging.h>
#include <blt/std/allocator.h> #include <blt/std/allocator.h>
@ -43,16 +43,25 @@ namespace blt::gp
BLT_META_MAKE_FUNCTION_CHECK(drop); BLT_META_MAKE_FUNCTION_CHECK(drop);
} }
// inline std::atomic_uint64_t hello = 0;
// inline std::atomic_uint64_t unhello = 0;
class aligned_allocator class aligned_allocator
{ {
public: public:
void* allocate(blt::size_t bytes) // NOLINT void* allocate(blt::size_t bytes) // NOLINT
{ {
// hello.fetch_add(1, std::memory_order_relaxed);
// BLT_TRACE("Allocating %ld bytes", bytes);
return std::aligned_alloc(8, bytes); return std::aligned_alloc(8, bytes);
} }
void deallocate(void* ptr, blt::size_t) // NOLINT void deallocate(void* ptr, blt::size_t) // NOLINT
{ {
// if (ptr == nullptr)
// return;
// unhello.fetch_add(1, std::memory_order_relaxed);
// BLT_TRACE("Deallocating %ld bytes", bytes);
std::free(ptr); std::free(ptr);
} }
}; };
@ -122,7 +131,6 @@ namespace blt::gp
~stack_allocator() ~stack_allocator()
{ {
//std::free(data_);
get_allocator().deallocate(data_, size_); get_allocator().deallocate(data_, size_);
} }
@ -266,13 +274,33 @@ namespace blt::gp
void reserve(blt::size_t bytes) void reserve(blt::size_t bytes)
{ {
if (bytes > size_) if (bytes > size_)
expand(bytes); expand_raw(bytes);
}
[[nodiscard]] blt::size_t stored() const
{
return bytes_stored;
}
[[nodiscard]] blt::size_t internal_storage_size() const
{
return size_;
}
void reset()
{
bytes_stored = 0;
} }
private: private:
void expand(blt::size_t bytes) void expand(blt::size_t bytes)
{ {
bytes = to_nearest_page_size(bytes); bytes = to_nearest_page_size(bytes);
expand_raw(bytes);
}
void expand_raw(blt::size_t bytes)
{
auto new_data = static_cast<blt::u8*>(get_allocator().allocate(bytes)); auto new_data = static_cast<blt::u8*>(get_allocator().allocate(bytes));
if (bytes_stored > 0) if (bytes_stored > 0)
std::memcpy(new_data, data_, bytes_stored); std::memcpy(new_data, data_, bytes_stored);

View File

@ -84,7 +84,7 @@ namespace blt::gp
return values; return values;
} }
evaluation_context evaluate(void* context) const evaluation_context& evaluate(void* context) const
{ {
return (*func)(*this, context); return (*func)(*this, context);
} }
@ -115,7 +115,7 @@ namespace blt::gp
template<typename T> template<typename T>
T get_evaluation_value(void* context) T get_evaluation_value(void* context)
{ {
auto results = evaluate(context); auto& results = evaluate(context);
return results.values.pop<T>(); return results.values.pop<T>();
} }