From 55f5b5bc05a460cc2fd12d2e2df50636111e5f78 Mon Sep 17 00:00:00 2001 From: Brett Date: Mon, 2 Sep 2024 01:55:15 -0400 Subject: [PATCH 1/7] thread_local stats --- CMakeLists.txt | 2 +- examples/symbolic_regression.cpp | 6 +- include/blt/gp/fwdecl.h | 2 +- include/blt/gp/program.h | 6 +- include/blt/gp/selection.h | 22 +++--- include/blt/gp/stats.h | 125 ++++++++++++++++++++++++++++--- src/program.cpp | 6 ++ src/stats.cpp | 8 +- src/transformers.cpp | 3 + 9 files changed, 149 insertions(+), 31 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index e552fbb..65e3368 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1,5 +1,5 @@ cmake_minimum_required(VERSION 3.25) -project(blt-gp VERSION 0.1.40) +project(blt-gp VERSION 0.1.41) include(CTest) diff --git a/examples/symbolic_regression.cpp b/examples/symbolic_regression.cpp index 44ce977..ce3206a 100644 --- a/examples/symbolic_regression.cpp +++ b/examples/symbolic_regression.cpp @@ -148,9 +148,9 @@ int main() auto mutation_allocations_v = blt::gp::mutation_allocations.get_calls(); auto reproduction_calls_v = blt::gp::reproduction_calls.get_calls(); auto reproduction_allocations_v = blt::gp::reproduction_allocations.get_calls(); - BLT_TRACE("Total Crossover Calls: %ld Bytes %s", crossover_calls_v, blt::byte_convert_t(blt::gp::crossover_calls.get_value()).convert_to_nearest_type().to_pretty_string().c_str()); - BLT_TRACE("Total Mutation Calls: %ld Bytes %s", mutation_calls_v, blt::byte_convert_t(blt::gp::mutation_calls.get_value()).convert_to_nearest_type().to_pretty_string().c_str()); - BLT_TRACE("Total Reproduction Calls: %ld Bytes %s", reproduction_calls_v, blt::byte_convert_t(blt::gp::reproduction_calls.get_value()).convert_to_nearest_type().to_pretty_string().c_str()); + BLT_TRACE("Total Crossover Calls: %ld", crossover_calls_v); + BLT_TRACE("Total Mutation Calls: %ld", mutation_calls_v); + BLT_TRACE("Total Reproduction Calls: %ld", reproduction_calls_v); BLT_TRACE("Total Crossover Allocations: %ld Bytes %s", crossover_allocations_v, blt::byte_convert_t(blt::gp::crossover_allocations.get_value()).convert_to_nearest_type().to_pretty_string().c_str()); BLT_TRACE("Total Mutation Allocations: %ld Bytes %s", mutation_allocations_v, blt::byte_convert_t(blt::gp::mutation_allocations.get_value()).convert_to_nearest_type().to_pretty_string().c_str()); BLT_TRACE("Total Reproduction Allocations: %ld Bytes %s", reproduction_allocations_v, blt::byte_convert_t(blt::gp::reproduction_allocations.get_value()).convert_to_nearest_type().to_pretty_string().c_str()); diff --git a/include/blt/gp/fwdecl.h b/include/blt/gp/fwdecl.h index 5e751e2..7655dd2 100644 --- a/include/blt/gp/fwdecl.h +++ b/include/blt/gp/fwdecl.h @@ -76,7 +76,7 @@ namespace blt::gp template using tracked_vector = std::vector; #endif - + // using operation_vector_t = tracked_vector; // using individual_vector_t = tracked_vector>; // using tree_vector_t = tracked_vector; diff --git a/include/blt/gp/program.h b/include/blt/gp/program.h index 8c3c1e3..ff8fb8e 100644 --- a/include/blt/gp/program.h +++ b/include/blt/gp/program.h @@ -367,8 +367,7 @@ namespace blt::gp (*thread_execution_service)(0); #ifdef BLT_TRACK_ALLOCATIONS blt::gp::tracker.stop_measurement(gen_alloc); - BLT_TRACE("Generation Allocated %ld times with a total of %s", gen_alloc.getAllocationDifference(), - blt::byte_convert_t(gen_alloc.getAllocatedByteDifference()).convert_to_nearest_type().to_pretty_string().c_str()); + gen_alloc.pretty_print("Generation"); #endif } @@ -386,8 +385,7 @@ namespace blt::gp evaluate_fitness_internal(); #ifdef BLT_TRACK_ALLOCATIONS blt::gp::tracker.stop_measurement(fitness_alloc); - BLT_TRACE("Fitness Allocated %ld times with a total of %s", fitness_alloc.getAllocationDifference(), - blt::byte_convert_t(fitness_alloc.getAllocatedByteDifference()).convert_to_nearest_type().to_pretty_string().c_str()); + fitness_alloc.pretty_print("Fitness"); #endif } diff --git a/include/blt/gp/selection.h b/include/blt/gp/selection.h index 958a676..029e29b 100644 --- a/include/blt/gp/selection.h +++ b/include/blt/gp/selection.h @@ -89,7 +89,7 @@ namespace blt::gp if (random.choice(config.crossover_chance)) { #ifdef BLT_TRACK_ALLOCATIONS - auto state = tracker.start_measurement(); + auto state = tracker.start_measurement_thread_local(); #endif // crossover const tree_t* p1; @@ -100,10 +100,12 @@ namespace blt::gp p2 = &crossover_selection.select(program, current_pop); } while (!config.crossover.get().apply(program, *p1, *p2, c1, *c2)); #ifdef BLT_TRACK_ALLOCATIONS - tracker.stop_measurement(state); - crossover_calls.call(state.getAllocatedByteDifference()); - if (state.getAllocationDifference() != 0) + tracker.stop_measurement_thread_local(state); + crossover_calls.call(); + if (state.getAllocatedByteDifference() != 0) + { crossover_allocations.call(state.getAllocatedByteDifference()); + } #endif return 2; } @@ -112,7 +114,7 @@ namespace blt::gp if (random.choice(config.mutation_chance)) { #ifdef BLT_TRACK_ALLOCATIONS - auto state = tracker.start_measurement(); + auto state = tracker.start_measurement_thread_local(); #endif // mutation const tree_t* p; @@ -121,8 +123,8 @@ namespace blt::gp p = &mutation_selection.select(program, current_pop); } while (!config.mutator.get().apply(program, *p, c1)); #ifdef BLT_TRACK_ALLOCATIONS - tracker.stop_measurement(state); - mutation_calls.call(state.getAllocatedByteDifference()); + tracker.stop_measurement_thread_local(state); + mutation_calls.call(); if (state.getAllocationDifference() != 0) { mutation_allocations.call(state.getAllocatedByteDifference()); @@ -135,13 +137,13 @@ namespace blt::gp if (config.reproduction_chance > 0 && random.choice(config.reproduction_chance)) { #ifdef BLT_TRACK_ALLOCATIONS - auto state = tracker.start_measurement(); + auto state = tracker.start_measurement_thread_local(); #endif // reproduction c1 = reproduction_selection.select(program, current_pop); #ifdef BLT_TRACK_ALLOCATIONS - tracker.stop_measurement(state); - reproduction_calls.call(state.getAllocatedByteDifference()); + tracker.stop_measurement_thread_local(state); + reproduction_calls.call(); if (state.getAllocationDifference() != 0) { reproduction_allocations.call(state.getAllocatedByteDifference()); diff --git a/include/blt/gp/stats.h b/include/blt/gp/stats.h index 9fda502..4a88012 100644 --- a/include/blt/gp/stats.h +++ b/include/blt/gp/stats.h @@ -20,15 +20,60 @@ #define BLT_GP_STATS_H #include -#include +#include +#include +#include #include +#include +#include +#include namespace blt::gp { - class allocation_tracker_t { public: + class tl_t + { + friend allocation_tracker_t; + public: + [[nodiscard]] blt::u64 getAllocations() const + { + return get_map(allocations); + } + + [[nodiscard]] blt::u64 getDeallocations() const + { + return get_map(deallocations); + } + + [[nodiscard]] blt::u64 getAllocatedBytes() const + { + return get_map(allocated_bytes); + } + + [[nodiscard]] blt::u64 getDeallocatedBytes() const + { + return get_map(deallocated_bytes); + } + + [[nodiscard]] blt::u64 getAllocationDifference() const + { + return std::abs(static_cast(getAllocations()) - static_cast(getDeallocations())); + } + + [[nodiscard]] blt::u64 getCurrentlyAllocatedBytes() const + { + return getAllocatedBytes() - getDeallocatedBytes(); + } + + private: + blt::hashmap_t> allocations; + blt::hashmap_t> deallocations; + blt::hashmap_t> allocated_bytes; + blt::hashmap_t> deallocated_bytes; + }; + struct allocation_data_t { blt::u64 start_allocations = 0; @@ -60,18 +105,33 @@ namespace blt::gp { return end_deallocated_bytes - start_deallocated_bytes; } + + void pretty_print(const std::string& name) const; }; + void reserve() + { + std::scoped_lock lock(mutex); + tl.allocations[std::this_thread::get_id()] = std::make_unique(); + tl.deallocations[std::this_thread::get_id()] = std::make_unique(); + tl.allocated_bytes[std::this_thread::get_id()] = std::make_unique(); + tl.deallocated_bytes[std::this_thread::get_id()] = std::make_unique(); + } + void allocate(blt::size_t bytes) { allocations++; allocated_bytes += bytes; + add_map(tl.allocations, 1); + add_map(tl.allocated_bytes, bytes); } void deallocate(blt::size_t bytes) { deallocations++; deallocated_bytes += bytes; + add_map(tl.deallocations, 1); + add_map(tl.deallocated_bytes, bytes); } [[nodiscard]] blt::u64 getAllocations() const @@ -104,29 +164,73 @@ namespace blt::gp return getAllocatedBytes() - getDeallocatedBytes(); } + allocation_tracker_t::tl_t& get_thread_local() + { + return tl; + } + [[nodiscard]] allocation_data_t start_measurement() const { allocation_data_t data{}; - data.start_allocations = allocations; - data.start_deallocations = deallocations; - data.start_allocated_bytes = allocated_bytes; - data.start_deallocated_bytes = deallocated_bytes; + data.start_allocations = getAllocations(); + data.start_deallocations = getDeallocations(); + data.start_allocated_bytes = getAllocatedBytes(); + data.start_deallocated_bytes = getDeallocatedBytes(); + return data; + } + + [[nodiscard]] allocation_data_t start_measurement_thread_local() const + { + allocation_data_t data{}; + data.start_allocations = tl.getAllocations(); + data.start_deallocations = tl.getDeallocations(); + data.start_allocated_bytes = tl.getAllocatedBytes(); + data.start_deallocated_bytes = tl.getDeallocatedBytes(); return data; } void stop_measurement(allocation_data_t& data) const { - data.end_allocations = allocations; - data.end_deallocations = deallocations; - data.end_allocated_bytes = allocated_bytes; - data.end_deallocated_bytes = deallocated_bytes; + data.end_allocations = getAllocations(); + data.end_deallocations = getDeallocations(); + data.end_allocated_bytes = getAllocatedBytes(); + data.end_deallocated_bytes = getDeallocatedBytes(); + } + + void stop_measurement_thread_local(allocation_data_t& data) const + { + data.end_allocations = tl.getAllocations(); + data.end_deallocations = tl.getDeallocations(); + data.end_allocated_bytes = tl.getAllocatedBytes(); + data.end_deallocated_bytes = tl.getDeallocatedBytes(); } private: + static void add_map(blt::hashmap_t>& map, blt::u64 value) + { + auto l = map.find(std::this_thread::get_id()); + if (l == map.end()) + BLT_ABORT("Thread doesn't exist inside this map!"); + auto& v = *l->second; + v += value; + } + + static blt::u64 get_map(const blt::hashmap_t>& map) + { + auto l = map.find(std::this_thread::get_id()); + if (l == map.end()) + BLT_ABORT("Thread doesn't exist inside this map!"); + return *l->second; + } + + tl_t tl; + std::atomic_uint64_t allocations = 0; std::atomic_uint64_t deallocations = 0; std::atomic_uint64_t allocated_bytes = 0; std::atomic_uint64_t deallocated_bytes = 0; + + std::mutex mutex; }; class call_tracker_t @@ -190,7 +294,6 @@ namespace blt::gp std::atomic_uint64_t primary_calls = 0; std::atomic_uint64_t secondary_value = 0; }; - } #endif //BLT_GP_STATS_H diff --git a/src/program.cpp b/src/program.cpp index 1f310d3..ce96d53 100644 --- a/src/program.cpp +++ b/src/program.cpp @@ -58,12 +58,18 @@ namespace blt::gp void gp_program::create_threads() { +#ifdef BLT_TRACK_ALLOCATIONS + tracker.reserve(); +#endif if (config.threads == 0) config.set_thread_count(std::thread::hardware_concurrency()); // main thread is thread0 for (blt::size_t i = 1; i < config.threads; i++) { thread_helper.threads.emplace_back(new std::thread([i, this]() { +#ifdef BLT_TRACK_ALLOCATIONS + tracker.reserve(); +#endif std::function* execution_function = nullptr; while (!should_thread_terminate()) { diff --git a/src/stats.cpp b/src/stats.cpp index d5be6a7..2ee9322 100644 --- a/src/stats.cpp +++ b/src/stats.cpp @@ -17,8 +17,14 @@ */ #include #include +#include "blt/std/format.h" namespace blt::gp { - + + void allocation_tracker_t::allocation_data_t::pretty_print(const std::string& name) const + { + BLT_TRACE("%s Allocations: %ld times with a total of %s", name.c_str(), getAllocationDifference(), + blt::byte_convert_t(getAllocatedByteDifference()).convert_to_nearest_type().to_pretty_string().c_str()); + } } \ No newline at end of file diff --git a/src/transformers.cpp b/src/transformers.cpp index 5dc65ea..ab2b7f9 100644 --- a/src/transformers.cpp +++ b/src/transformers.cpp @@ -105,6 +105,9 @@ namespace blt::gp auto copy_ptr_c1 = get_thread_pointer_for_size(c1_total); auto copy_ptr_c2 = get_thread_pointer_for_size(c2_total); + c1_stack.reserve(c1_stack.bytes_in_head() - c1_stack_for_bytes + c2_stack_for_bytes); + c2_stack.reserve(c2_stack.bytes_in_head() - c2_stack_for_bytes + c1_stack_for_bytes); + c1_stack.copy_to(copy_ptr_c1, c1_total); c1_stack.pop_bytes(c1_total); From 433002bb8c5a3ca933f8d8c84686d8dba4a4539e Mon Sep 17 00:00:00 2001 From: Brett Date: Mon, 2 Sep 2024 03:08:03 -0400 Subject: [PATCH 2/7] track peak allocations --- CMakeLists.txt | 4 +-- examples/symbolic_regression.cpp | 23 +++++++++------ include/blt/gp/selection.h | 3 ++ include/blt/gp/stats.h | 48 ++++++++++++++++++++++++++++---- src/program.cpp | 4 +++ 5 files changed, 66 insertions(+), 16 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 65e3368..81a0333 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1,5 +1,5 @@ cmake_minimum_required(VERSION 3.25) -project(blt-gp VERSION 0.1.41) +project(blt-gp VERSION 0.1.42) include(CTest) @@ -16,7 +16,7 @@ set(CMAKE_CXX_STANDARD 17) set(THREADS_PREFER_PTHREAD_FLAG ON) find_package(Threads REQUIRED) -#SET(CMAKE_CXX_FLAGS_RELEASE "-O3 -g") +SET(CMAKE_CXX_FLAGS_RELEASE "-O3 -g") if (NOT TARGET BLT) add_subdirectory(lib/blt) diff --git a/examples/symbolic_regression.cpp b/examples/symbolic_regression.cpp index ce3206a..66fc8d9 100644 --- a/examples/symbolic_regression.cpp +++ b/examples/symbolic_regression.cpp @@ -140,20 +140,27 @@ int main() BLT_PRINT_PROFILE("Symbolic Regression", blt::PRINT_CYCLES | blt::PRINT_THREAD | blt::PRINT_WALL); #ifdef BLT_TRACK_ALLOCATIONS - BLT_TRACE("Total Allocations: %ld times with a total of %s", blt::gp::tracker.getAllocations(), - blt::byte_convert_t(blt::gp::tracker.getAllocatedBytes()).convert_to_nearest_type().to_pretty_string().c_str()); + BLT_TRACE("Total Allocations: %ld times with a total of %s, peak allocated bytes %s", blt::gp::tracker.getAllocations(), + blt::byte_convert_t(blt::gp::tracker.getAllocatedBytes()).convert_to_nearest_type().to_pretty_string().c_str(), + blt::byte_convert_t(blt::gp::tracker.getPeakAllocatedBytes()).convert_to_nearest_type().to_pretty_string().c_str()); auto crossover_calls_v = blt::gp::crossover_calls.get_calls(); auto crossover_allocations_v = blt::gp::crossover_allocations.get_calls(); auto mutation_calls_v = blt::gp::mutation_calls.get_calls(); auto mutation_allocations_v = blt::gp::mutation_allocations.get_calls(); auto reproduction_calls_v = blt::gp::reproduction_calls.get_calls(); auto reproduction_allocations_v = blt::gp::reproduction_allocations.get_calls(); - BLT_TRACE("Total Crossover Calls: %ld", crossover_calls_v); - BLT_TRACE("Total Mutation Calls: %ld", mutation_calls_v); - BLT_TRACE("Total Reproduction Calls: %ld", reproduction_calls_v); - BLT_TRACE("Total Crossover Allocations: %ld Bytes %s", crossover_allocations_v, blt::byte_convert_t(blt::gp::crossover_allocations.get_value()).convert_to_nearest_type().to_pretty_string().c_str()); - BLT_TRACE("Total Mutation Allocations: %ld Bytes %s", mutation_allocations_v, blt::byte_convert_t(blt::gp::mutation_allocations.get_value()).convert_to_nearest_type().to_pretty_string().c_str()); - BLT_TRACE("Total Reproduction Allocations: %ld Bytes %s", reproduction_allocations_v, blt::byte_convert_t(blt::gp::reproduction_allocations.get_value()).convert_to_nearest_type().to_pretty_string().c_str()); + BLT_TRACE("Total Crossover Calls: %ld Peak Bytes Allocated %s", crossover_calls_v, + blt::byte_convert_t(blt::gp::crossover_calls.get_value()).convert_to_nearest_type().to_pretty_string().c_str()); + BLT_TRACE("Total Mutation Calls: %ld Peak Bytes Allocated %s", mutation_calls_v, + blt::byte_convert_t(blt::gp::mutation_calls.get_value()).convert_to_nearest_type().to_pretty_string().c_str()); + BLT_TRACE("Total Reproduction Calls: %ld Peak Bytes Allocated %s", reproduction_calls_v, + blt::byte_convert_t(blt::gp::reproduction_calls.get_value()).convert_to_nearest_type().to_pretty_string().c_str()); + BLT_TRACE("Total Crossover Allocations: %ld Bytes %s", crossover_allocations_v, + blt::byte_convert_t(blt::gp::crossover_allocations.get_value()).convert_to_nearest_type().to_pretty_string().c_str()); + BLT_TRACE("Total Mutation Allocations: %ld Bytes %s", mutation_allocations_v, + blt::byte_convert_t(blt::gp::mutation_allocations.get_value()).convert_to_nearest_type().to_pretty_string().c_str()); + BLT_TRACE("Total Reproduction Allocations: %ld Bytes %s", reproduction_allocations_v, + blt::byte_convert_t(blt::gp::reproduction_allocations.get_value()).convert_to_nearest_type().to_pretty_string().c_str()); BLT_TRACE("Percent Crossover calls allocate? %lf%%", static_cast(crossover_allocations_v) / static_cast(crossover_calls_v == 0 ? 1 : crossover_calls_v) * 100); BLT_TRACE("Percent Mutation calls allocate? %lf%%", diff --git a/include/blt/gp/selection.h b/include/blt/gp/selection.h index 029e29b..38c1f81 100644 --- a/include/blt/gp/selection.h +++ b/include/blt/gp/selection.h @@ -102,6 +102,7 @@ namespace blt::gp #ifdef BLT_TRACK_ALLOCATIONS tracker.stop_measurement_thread_local(state); crossover_calls.call(); + crossover_calls.set_value(std::max(crossover_calls.get_value(), state.getAllocatedByteDifference())); if (state.getAllocatedByteDifference() != 0) { crossover_allocations.call(state.getAllocatedByteDifference()); @@ -125,6 +126,7 @@ namespace blt::gp #ifdef BLT_TRACK_ALLOCATIONS tracker.stop_measurement_thread_local(state); mutation_calls.call(); + mutation_calls.set_value(std::max(mutation_calls.get_value(), state.getAllocatedByteDifference())); if (state.getAllocationDifference() != 0) { mutation_allocations.call(state.getAllocatedByteDifference()); @@ -144,6 +146,7 @@ namespace blt::gp #ifdef BLT_TRACK_ALLOCATIONS tracker.stop_measurement_thread_local(state); reproduction_calls.call(); + reproduction_calls.set_value(std::max(reproduction_calls.get_value(), state.getAllocatedByteDifference())); if (state.getAllocationDifference() != 0) { reproduction_allocations.call(state.getAllocatedByteDifference()); diff --git a/include/blt/gp/stats.h b/include/blt/gp/stats.h index 4a88012..d08800b 100644 --- a/include/blt/gp/stats.h +++ b/include/blt/gp/stats.h @@ -27,6 +27,7 @@ #include #include #include +#include namespace blt::gp { @@ -72,6 +73,9 @@ namespace blt::gp blt::hashmap_t> deallocations; blt::hashmap_t> allocated_bytes; blt::hashmap_t> deallocated_bytes; + + std::mutex mutex; + std::condition_variable var; }; struct allocation_data_t @@ -111,17 +115,39 @@ namespace blt::gp void reserve() { - std::scoped_lock lock(mutex); - tl.allocations[std::this_thread::get_id()] = std::make_unique(); - tl.deallocations[std::this_thread::get_id()] = std::make_unique(); - tl.allocated_bytes[std::this_thread::get_id()] = std::make_unique(); - tl.deallocated_bytes[std::this_thread::get_id()] = std::make_unique(); + { + std::scoped_lock lock(tl.mutex); + tl.allocations.insert({std::this_thread::get_id(), std::make_unique()}); + tl.deallocations.insert({std::this_thread::get_id(), std::make_unique()}); + tl.allocated_bytes.insert({std::this_thread::get_id(), std::make_unique()}); + tl.deallocated_bytes.insert({std::this_thread::get_id(), std::make_unique()}); + } + tl.var.notify_all(); + } + + blt::size_t reserved_threads() + { + return tl.allocations.size(); + } + + void await_completion(blt::u64 required_threads) + { + std::unique_lock lock(tl.mutex); + tl.var.wait(lock, [this, required_threads]() { + return reserved_threads() == required_threads; + }); } void allocate(blt::size_t bytes) { allocations++; allocated_bytes += bytes; + + auto diff = getCurrentlyAllocatedBytes(); + auto atomic_val = peak_allocated_bytes.load(std::memory_order_relaxed); + while (diff > atomic_val && + !peak_allocated_bytes.compare_exchange_weak(atomic_val, diff, std::memory_order_relaxed, std::memory_order_relaxed)); + add_map(tl.allocations, 1); add_map(tl.allocated_bytes, bytes); } @@ -164,6 +190,11 @@ namespace blt::gp return getAllocatedBytes() - getDeallocatedBytes(); } + [[nodiscard]] blt::u64 getPeakAllocatedBytes() const + { + return peak_allocated_bytes; + } + allocation_tracker_t::tl_t& get_thread_local() { return tl; @@ -230,7 +261,7 @@ namespace blt::gp std::atomic_uint64_t allocated_bytes = 0; std::atomic_uint64_t deallocated_bytes = 0; - std::mutex mutex; + std::atomic_uint64_t peak_allocated_bytes = 0; }; class call_tracker_t @@ -259,6 +290,11 @@ namespace blt::gp secondary_value += value; } + void set_value(blt::u64 value) + { + secondary_value = value; + } + void call() { primary_calls++; diff --git a/src/program.cpp b/src/program.cpp index ce96d53..e8032a9 100644 --- a/src/program.cpp +++ b/src/program.cpp @@ -69,6 +69,7 @@ namespace blt::gp thread_helper.threads.emplace_back(new std::thread([i, this]() { #ifdef BLT_TRACK_ALLOCATIONS tracker.reserve(); + tracker.await_completion(config.threads); #endif std::function* execution_function = nullptr; while (!should_thread_terminate()) @@ -89,5 +90,8 @@ namespace blt::gp } })); } +#ifdef BLT_TRACK_ALLOCATIONS + tracker.await_completion(config.threads); +#endif } } \ No newline at end of file From 921fec9e6b5ddf4d7db076a2bba903839f3401cc Mon Sep 17 00:00:00 2001 From: Brett Date: Mon, 2 Sep 2024 15:41:20 -0400 Subject: [PATCH 3/7] fix thread issue --- CMakeLists.txt | 2 +- dhat.out.293761 | 6 ++-- examples/symbolic_regression.cpp | 53 ++++++++++++++++++------------ include/blt/gp/fwdecl.h | 6 ++++ include/blt/gp/program.h | 56 ++++++++++++++++++++------------ include/blt/gp/selection.h | 4 ++- include/blt/gp/stack.h | 4 +-- include/blt/gp/stats.h | 2 +- lib/blt | 2 +- src/program.cpp | 4 +-- src/transformers.cpp | 2 +- 11 files changed, 87 insertions(+), 54 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 81a0333..743655b 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1,5 +1,5 @@ cmake_minimum_required(VERSION 3.25) -project(blt-gp VERSION 0.1.42) +project(blt-gp VERSION 0.1.43) include(CTest) diff --git a/dhat.out.293761 b/dhat.out.293761 index 21dd56d..7579178 100644 --- a/dhat.out.293761 +++ b/dhat.out.293761 @@ -1240,9 +1240,9 @@ ,"0x122836: main (in /home/brett/Documents/code/c++/blt-gp/cmake-build-release/blt-SR-playground-example)" ,"0x12E6ED: void std::vector >::_M_realloc_insert(__gnu_cxx::__normal_iterator > >, blt::gp::operator_id const&) (in /home/brett/Documents/code/c++/blt-gp/cmake-build-release/blt-SR-playground-example)" ,"0x12284D: main (in /home/brett/Documents/code/c++/blt-gp/cmake-build-release/blt-SR-playground-example)" - ,"0x12EB76: void std::vector >::_M_realloc_insert(__gnu_cxx::__normal_iterator > >, blt::gp::operator_info const&) (in /home/brett/Documents/code/c++/blt-gp/cmake-build-release/blt-SR-playground-example)" + ,"0x12EB76: void std::vector >::_M_realloc_insert(__gnu_cxx::__normal_iterator > >, blt::gp::operator_info_t const&) (in /home/brett/Documents/code/c++/blt-gp/cmake-build-release/blt-SR-playground-example)" ,"0x12294C: main (in /home/brett/Documents/code/c++/blt-gp/cmake-build-release/blt-SR-playground-example)" - ,"0x12E897: void std::vector >::_M_realloc_insert(__gnu_cxx::__normal_iterator > >, blt::gp::operator_info const&) (in /home/brett/Documents/code/c++/blt-gp/cmake-build-release/blt-SR-playground-example)" + ,"0x12E897: void std::vector >::_M_realloc_insert(__gnu_cxx::__normal_iterator > >, blt::gp::operator_info_t const&) (in /home/brett/Documents/code/c++/blt-gp/cmake-build-release/blt-SR-playground-example)" ,"0x12F1FA: void std::vector, std::allocator > >::_M_realloc_insert >(__gnu_cxx::__normal_iterator*, std::vector, std::allocator > > >, std::function&&) (in /home/brett/Documents/code/c++/blt-gp/cmake-build-release/blt-SR-playground-example)" ,"0x12298F: main (in /home/brett/Documents/code/c++/blt-gp/cmake-build-release/blt-SR-playground-example)" ,"0x12F3EB: void std::vector > >, std::allocator > > > >::_M_realloc_insert > > >(__gnu_cxx::__normal_iterator > >*, std::vector > >, std::allocator > > > > >, std::optional > >&&) (in /home/brett/Documents/code/c++/blt-gp/cmake-build-release/blt-SR-playground-example)" @@ -1261,7 +1261,7 @@ ,"0x122F78: main (in /home/brett/Documents/code/c++/blt-gp/cmake-build-release/blt-SR-playground-example)" ,"0x123052: main (in /home/brett/Documents/code/c++/blt-gp/cmake-build-release/blt-SR-playground-example)" ,"0x12305D: main (in /home/brett/Documents/code/c++/blt-gp/cmake-build-release/blt-SR-playground-example)" - ,"0x12ED14: std::vector >::push_back(blt::gp::operator_info const&) (in /home/brett/Documents/code/c++/blt-gp/cmake-build-release/blt-SR-playground-example)" + ,"0x12ED14: std::vector >::push_back(blt::gp::operator_info_t const&) (in /home/brett/Documents/code/c++/blt-gp/cmake-build-release/blt-SR-playground-example)" ,"0x1231C6: main (in /home/brett/Documents/code/c++/blt-gp/cmake-build-release/blt-SR-playground-example)" ,"0x123321: main (in /home/brett/Documents/code/c++/blt-gp/cmake-build-release/blt-SR-playground-example)" ,"0x123390: main (in /home/brett/Documents/code/c++/blt-gp/cmake-build-release/blt-SR-playground-example)" diff --git a/examples/symbolic_regression.cpp b/examples/symbolic_regression.cpp index 66fc8d9..ab87400 100644 --- a/examples/symbolic_regression.cpp +++ b/examples/symbolic_regression.cpp @@ -22,6 +22,7 @@ #include #include #include "operations_common.h" +#include "blt/math/averages.h" //static constexpr long SEED = 41912; static const unsigned long SEED = std::random_device()(); @@ -36,13 +37,13 @@ std::array training_cases; blt::gp::prog_config_t config = blt::gp::prog_config_t() .set_initial_min_tree_size(2) .set_initial_max_tree_size(6) - .set_elite_count(200) + .set_elite_count(0) .set_crossover_chance(0.9) .set_mutation_chance(0.1) .set_reproduction_chance(0) .set_max_generations(50) - .set_pop_size(20000) - .set_thread_count(0); + .set_pop_size(1) + .set_thread_count(1); blt::gp::gp_program program{SEED, config}; @@ -143,30 +144,40 @@ int main() BLT_TRACE("Total Allocations: %ld times with a total of %s, peak allocated bytes %s", blt::gp::tracker.getAllocations(), blt::byte_convert_t(blt::gp::tracker.getAllocatedBytes()).convert_to_nearest_type().to_pretty_string().c_str(), blt::byte_convert_t(blt::gp::tracker.getPeakAllocatedBytes()).convert_to_nearest_type().to_pretty_string().c_str()); + BLT_TRACE("------------------------------------------------------"); + auto evaluation_calls_v = blt::gp::evaluation_calls.get_calls(); + auto evaluation_allocations_v = blt::gp::evaluation_allocations.get_calls(); + BLT_TRACE("Total Evaluation Calls: %ld; Peak Bytes Allocated %s", evaluation_calls_v, + blt::string::bytes_to_pretty(blt::gp::evaluation_calls.get_value()).c_str()); + BLT_TRACE("Total Evaluation Allocations: %ld; Bytes %s; Average %s", evaluation_allocations_v, + blt::string::bytes_to_pretty(blt::gp::evaluation_allocations.get_value()).c_str(), + blt::string::bytes_to_pretty(blt::average(blt::gp::evaluation_allocations.get_value(), evaluation_allocations_v)).c_str()); + BLT_TRACE("Percent Evaluation calls allocate? %lf%%", blt::average(evaluation_allocations_v, evaluation_calls_v) * 100); + BLT_TRACE("------------------------------------------------------"); auto crossover_calls_v = blt::gp::crossover_calls.get_calls(); auto crossover_allocations_v = blt::gp::crossover_allocations.get_calls(); auto mutation_calls_v = blt::gp::mutation_calls.get_calls(); auto mutation_allocations_v = blt::gp::mutation_allocations.get_calls(); auto reproduction_calls_v = blt::gp::reproduction_calls.get_calls(); auto reproduction_allocations_v = blt::gp::reproduction_allocations.get_calls(); - BLT_TRACE("Total Crossover Calls: %ld Peak Bytes Allocated %s", crossover_calls_v, - blt::byte_convert_t(blt::gp::crossover_calls.get_value()).convert_to_nearest_type().to_pretty_string().c_str()); - BLT_TRACE("Total Mutation Calls: %ld Peak Bytes Allocated %s", mutation_calls_v, - blt::byte_convert_t(blt::gp::mutation_calls.get_value()).convert_to_nearest_type().to_pretty_string().c_str()); - BLT_TRACE("Total Reproduction Calls: %ld Peak Bytes Allocated %s", reproduction_calls_v, - blt::byte_convert_t(blt::gp::reproduction_calls.get_value()).convert_to_nearest_type().to_pretty_string().c_str()); - BLT_TRACE("Total Crossover Allocations: %ld Bytes %s", crossover_allocations_v, - blt::byte_convert_t(blt::gp::crossover_allocations.get_value()).convert_to_nearest_type().to_pretty_string().c_str()); - BLT_TRACE("Total Mutation Allocations: %ld Bytes %s", mutation_allocations_v, - blt::byte_convert_t(blt::gp::mutation_allocations.get_value()).convert_to_nearest_type().to_pretty_string().c_str()); - BLT_TRACE("Total Reproduction Allocations: %ld Bytes %s", reproduction_allocations_v, - blt::byte_convert_t(blt::gp::reproduction_allocations.get_value()).convert_to_nearest_type().to_pretty_string().c_str()); - BLT_TRACE("Percent Crossover calls allocate? %lf%%", - static_cast(crossover_allocations_v) / static_cast(crossover_calls_v == 0 ? 1 : crossover_calls_v) * 100); - BLT_TRACE("Percent Mutation calls allocate? %lf%%", - static_cast(mutation_allocations_v) / static_cast(mutation_calls_v == 0 ? 1 : mutation_calls_v) * 100); - BLT_TRACE("Percent Reproduction calls allocate? %lf%%", - static_cast(reproduction_allocations_v) / static_cast(reproduction_calls_v == 0 ? 1 : reproduction_calls_v) * 100); + BLT_TRACE("Total Crossover Calls: %ld; Peak Bytes Allocated %s", crossover_calls_v, + blt::string::bytes_to_pretty(blt::gp::crossover_calls.get_value()).c_str()); + BLT_TRACE("Total Mutation Calls: %ld; Peak Bytes Allocated %s", mutation_calls_v, + blt::string::bytes_to_pretty(blt::gp::mutation_calls.get_value()).c_str()); + BLT_TRACE("Total Reproduction Calls: %ld; Peak Bytes Allocated %s", reproduction_calls_v, + blt::string::bytes_to_pretty(blt::gp::reproduction_calls.get_value()).c_str()); + BLT_TRACE("Total Crossover Allocations: %ld; Bytes %s; Average %s", crossover_allocations_v, + blt::string::bytes_to_pretty(blt::gp::crossover_allocations.get_value()).c_str(), + blt::string::bytes_to_pretty(blt::average(blt::gp::crossover_allocations.get_value(), crossover_allocations_v)).c_str()); + BLT_TRACE("Total Mutation Allocations: %ld; Bytes %s; Average %s", mutation_allocations_v, + blt::string::bytes_to_pretty(blt::gp::mutation_allocations.get_value()).c_str(), + blt::string::bytes_to_pretty(blt::average(blt::gp::mutation_allocations.get_value(), mutation_allocations_v)).c_str()); + BLT_TRACE("Total Reproduction Allocations: %ld; Bytes %s; Average %s", reproduction_allocations_v, + blt::string::bytes_to_pretty(blt::gp::reproduction_allocations.get_value()).c_str(), + blt::string::bytes_to_pretty(blt::average(blt::gp::reproduction_allocations.get_value(), reproduction_allocations_v)).c_str()); + BLT_TRACE("Percent Crossover calls allocate? %lf%%", blt::average(crossover_allocations_v, crossover_calls_v) * 100); + BLT_TRACE("Percent Mutation calls allocate? %lf%%", blt::average(mutation_allocations_v, mutation_calls_v) * 100); + BLT_TRACE("Percent Reproduction calls allocate? %lf%%", blt::average(reproduction_allocations_v, reproduction_calls_v) * 100); #endif return 0; diff --git a/include/blt/gp/fwdecl.h b/include/blt/gp/fwdecl.h index 7655dd2..e0cea23 100644 --- a/include/blt/gp/fwdecl.h +++ b/include/blt/gp/fwdecl.h @@ -30,12 +30,18 @@ namespace blt::gp { #ifdef BLT_TRACK_ALLOCATIONS inline allocation_tracker_t tracker; + + // population gen specifics inline call_tracker_t crossover_calls; inline call_tracker_t mutation_calls; inline call_tracker_t reproduction_calls; inline call_tracker_t crossover_allocations; inline call_tracker_t mutation_allocations; inline call_tracker_t reproduction_allocations; + + // for evaluating fitness + inline call_tracker_t evaluation_calls; + inline call_tracker_t evaluation_allocations; #endif class gp_program; diff --git a/include/blt/gp/program.h b/include/blt/gp/program.h index ff8fb8e..b3b574b 100644 --- a/include/blt/gp/program.h +++ b/include/blt/gp/program.h @@ -67,7 +67,7 @@ namespace blt::gp } }; - struct operator_info + struct operator_info_t { // types of the arguments tracked_vector argument_types; @@ -79,6 +79,12 @@ namespace blt::gp detail::operator_func_t func; }; + struct operator_metadata_t + { + blt::size_t arg_size_bytes = 0; + blt::size_t return_size_bytes = 0; + }; + struct program_operator_storage_t { // indexed from return TYPE ID, returns index of operator @@ -87,7 +93,8 @@ namespace blt::gp blt::expanding_buffer>> operators_ordered_terminals; // indexed from OPERATOR ID (operator number) blt::hashset_t ephemeral_leaf_operators; - tracked_vector operators; + tracked_vector operators; + tracked_vector operator_metadata; tracked_vector print_funcs; tracked_vector destroy_funcs; tracked_vector> names; @@ -110,11 +117,9 @@ namespace blt::gp template program_operator_storage_t& build(Operators& ... operators) { - tracked_vector sizes; - (sizes.push_back(add_operator(operators)), ...); blt::size_t largest = 0; - for (auto v : sizes) - largest = std::max(v, largest); + operator_metadata_t meta; + ((meta = add_operator(operators), largest = std::max(std::max(meta.arg_size_bytes, meta.return_size_bytes), largest)), ...); storage.eval_func = [&operators..., largest](const tree_t& tree, void* context) -> evaluation_context& { const auto& ops = tree.get_operations(); @@ -214,14 +219,18 @@ namespace blt::gp (storage.system.register_type(), ...); storage.system.register_type(); - auto total_size_required = stack_allocator::aligned_size(sizeof(Return)); - ((total_size_required += stack_allocator::aligned_size(sizeof(Args))), ...); + operator_metadata_t meta; + if constexpr (sizeof...(Args) != 0) + { + meta.arg_size_bytes = (stack_allocator::aligned_size(sizeof(Args)) + ...); + } + meta.return_size_bytes = sizeof(Return); auto return_type_id = storage.system.get_type().id(); auto operator_id = blt::gp::operator_id(storage.operators.size()); op.id = operator_id; - operator_info info; + operator_info_t info; if constexpr (sizeof...(Args) > 0) { @@ -240,6 +249,7 @@ namespace blt::gp BLT_ASSERT(info.argc.argc_context - info.argc.argc <= 1 && "Cannot pass multiple context as arguments!"); storage.operators.push_back(info); + storage.operator_metadata.push_back(meta); storage.print_funcs.push_back([&op](std::ostream& out, stack_allocator& stack) { if constexpr (blt::meta::is_streamable_v) { @@ -267,11 +277,11 @@ namespace blt::gp storage.names.push_back(op.get_name()); if (op.is_ephemeral()) storage.ephemeral_leaf_operators.insert(operator_id); - return total_size_required * 2; + return meta; } template - void add_non_context_argument(decltype(operator_info::argument_types)& types) + void add_non_context_argument(decltype(operator_info_t::argument_types)& types) { if constexpr (!std::is_same_v>) { @@ -386,6 +396,12 @@ namespace blt::gp #ifdef BLT_TRACK_ALLOCATIONS blt::gp::tracker.stop_measurement(fitness_alloc); fitness_alloc.pretty_print("Fitness"); + evaluation_calls.call(); + evaluation_calls.set_value(std::max(evaluation_calls.get_value(), fitness_alloc.getAllocatedByteDifference())); + if (fitness_alloc.getAllocatedByteDifference() > 0) + { + evaluation_allocations.call(fitness_alloc.getAllocatedByteDifference()); + } #endif } @@ -475,9 +491,7 @@ namespace blt::gp mutation_selection.pre_process(*this, current_pop); reproduction_selection.pre_process(*this, current_pop); - perform_elitism(args, next_pop); - - blt::size_t start = config.elites; + blt::size_t start = perform_elitism(args, next_pop); while (start < config.population_size) { @@ -566,10 +580,8 @@ namespace blt::gp mutation_selection.pre_process(*this, current_pop); if (&crossover_selection != &reproduction_selection) reproduction_selection.pre_process(*this, current_pop); - - perform_elitism(args, next_pop); - - thread_helper.next_gen_left -= config.elites; + auto elite_amount = perform_elitism(args, next_pop); + thread_helper.next_gen_left -= elite_amount; } thread_helper.barrier.wait(); @@ -653,7 +665,7 @@ namespace blt::gp return storage.system; } - [[nodiscard]] inline operator_info& get_operator_info(operator_id id) + [[nodiscard]] inline operator_info_t& get_operator_info(operator_id id) { return storage.operators[id]; } @@ -723,8 +735,10 @@ namespace blt::gp return a.second > b.second; }); - for (blt::size_t i = 0; i < size; i++) + for (blt::size_t i = 0; i < std::min(size, config.population_size); i++) arr[i] = values[i].first; + for (blt::size_t i = std::min(size, config.population_size); i < size; i++) + arr[i] = 0; return arr; } @@ -791,7 +805,7 @@ namespace blt::gp struct concurrency_storage { - tracked_vector> threads; + std::vector> threads; std::mutex thread_function_control{}; std::condition_variable thread_function_condition{}; diff --git a/include/blt/gp/selection.h b/include/blt/gp/selection.h index 38c1f81..7c4c67b 100644 --- a/include/blt/gp/selection.h +++ b/include/blt/gp/selection.h @@ -41,7 +41,7 @@ namespace blt::gp constexpr inline auto perform_elitism = [](const selector_args& args, population_t& next_pop) { auto& [program, current_pop, current_stats, config, random] = args; - if (config.elites > 0) + if (config.elites > 0 && current_pop.get_individuals().size() >= config.elites) { static thread_local tracked_vector> values; values.clear(); @@ -70,7 +70,9 @@ namespace blt::gp for (blt::size_t i = 0; i < config.elites; i++) next_pop.get_individuals()[i].copy_fast(current_pop.get_individuals()[values[i].first].tree); + return config.elites; } + return 0ul; }; template diff --git a/include/blt/gp/stack.h b/include/blt/gp/stack.h index 6ee7e0a..2a73404 100644 --- a/include/blt/gp/stack.h +++ b/include/blt/gp/stack.h @@ -125,7 +125,7 @@ namespace blt::gp { if (bytes == 0) return; - if (size_ < bytes + bytes_stored) + if (bytes + bytes_stored >= size_) expand(bytes + size_); std::memcpy(data_ + bytes_stored, stack.data_ + (stack.bytes_stored - bytes), bytes); bytes_stored += bytes; @@ -135,7 +135,7 @@ namespace blt::gp { if (bytes == 0 || data == nullptr) return; - if (size_ < bytes + bytes_stored) + if (bytes + bytes_stored >= size_) expand(bytes + size_); std::memcpy(data_ + bytes_stored, data, bytes); bytes_stored += bytes; diff --git a/include/blt/gp/stats.h b/include/blt/gp/stats.h index d08800b..5d87e02 100644 --- a/include/blt/gp/stats.h +++ b/include/blt/gp/stats.h @@ -130,7 +130,7 @@ namespace blt::gp return tl.allocations.size(); } - void await_completion(blt::u64 required_threads) + void await_thread_loading_complete(blt::u64 required_threads) { std::unique_lock lock(tl.mutex); tl.var.wait(lock, [this, required_threads]() { diff --git a/lib/blt b/lib/blt index ab482f1..a7645d9 160000 --- a/lib/blt +++ b/lib/blt @@ -1 +1 @@ -Subproject commit ab482f1a1c5782bd3501428f26c02f0bb4729946 +Subproject commit a7645d9ddec57ecaad525b48a30f8001adcf75e8 diff --git a/src/program.cpp b/src/program.cpp index e8032a9..248d658 100644 --- a/src/program.cpp +++ b/src/program.cpp @@ -69,7 +69,7 @@ namespace blt::gp thread_helper.threads.emplace_back(new std::thread([i, this]() { #ifdef BLT_TRACK_ALLOCATIONS tracker.reserve(); - tracker.await_completion(config.threads); + tracker.await_thread_loading_complete(config.threads); #endif std::function* execution_function = nullptr; while (!should_thread_terminate()) @@ -91,7 +91,7 @@ namespace blt::gp })); } #ifdef BLT_TRACK_ALLOCATIONS - tracker.await_completion(config.threads); + tracker.await_thread_loading_complete(config.threads); #endif } } \ No newline at end of file diff --git a/src/transformers.cpp b/src/transformers.cpp index ab2b7f9..e2a0685 100644 --- a/src/transformers.cpp +++ b/src/transformers.cpp @@ -171,7 +171,7 @@ namespace blt::gp blt::size_t attempted_point = 0; const auto& crossover_point_type = program.get_operator_info(c1_ops[crossover_point].id); - operator_info* attempted_point_type = nullptr; + operator_info_t* attempted_point_type = nullptr; blt::size_t counter = 0; do From 06a34d21f1ff05fe33ad1eb60c7f8374c237d34b Mon Sep 17 00:00:00 2001 From: Brett Laptop Date: Tue, 3 Sep 2024 17:51:54 -0400 Subject: [PATCH 4/7] silly debugging --- CMakeLists.txt | 2 +- include/blt/gp/fwdecl.h | 4 ++++ include/blt/gp/program.h | 38 ++++++++++++++++++++++++++++++-------- include/blt/gp/stack.h | 6 +++--- include/blt/gp/tree.h | 10 +++++++++- src/tree.cpp | 2 +- 6 files changed, 48 insertions(+), 14 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 743655b..d3f8d6e 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1,5 +1,5 @@ cmake_minimum_required(VERSION 3.25) -project(blt-gp VERSION 0.1.43) +project(blt-gp VERSION 0.1.44) include(CTest) diff --git a/include/blt/gp/fwdecl.h b/include/blt/gp/fwdecl.h index e0cea23..2169194 100644 --- a/include/blt/gp/fwdecl.h +++ b/include/blt/gp/fwdecl.h @@ -94,6 +94,7 @@ namespace blt::gp { #ifdef BLT_TRACK_ALLOCATIONS tracker.allocate(bytes); + std::cout << "Hey our aligned allocator allocated " << bytes << " bytes!\n"; #endif return std::aligned_alloc(8, bytes); } @@ -104,6 +105,7 @@ namespace blt::gp return; #ifdef BLT_TRACK_ALLOCATIONS tracker.deallocate(bytes); + std::cout << "[Hey our aligned allocator deallocated " << bytes << " bytes!]\n"; #else (void) bytes; #endif @@ -134,6 +136,7 @@ namespace blt::gp { #ifdef BLT_TRACK_ALLOCATIONS tracker.allocate(n * sizeof(T)); +// std::cout << "Hey our tracked allocator allocated " << (n * sizeof(T)) << " bytes!\n"; #endif return static_cast(std::malloc(n * sizeof(T))); } @@ -147,6 +150,7 @@ namespace blt::gp { #ifdef BLT_TRACK_ALLOCATIONS tracker.deallocate(n * sizeof(T)); +// std::cout << "[Hey our tracked allocator deallocated " << (n * sizeof(T)) << " bytes!]\n"; #else (void) n; #endif diff --git a/include/blt/gp/program.h b/include/blt/gp/program.h index b3b574b..3e50669 100644 --- a/include/blt/gp/program.h +++ b/include/blt/gp/program.h @@ -83,6 +83,7 @@ namespace blt::gp { blt::size_t arg_size_bytes = 0; blt::size_t return_size_bytes = 0; + argc_t argc{}; }; struct program_operator_storage_t @@ -118,8 +119,13 @@ namespace blt::gp program_operator_storage_t& build(Operators& ... operators) { blt::size_t largest = 0; + blt::u32 largest_argc = 0; operator_metadata_t meta; - ((meta = add_operator(operators), largest = std::max(std::max(meta.arg_size_bytes, meta.return_size_bytes), largest)), ...); + ((meta = add_operator(operators), largest_argc = std::max(meta.argc.argc, largest_argc), + largest = std::max(std::max(meta.arg_size_bytes, meta.return_size_bytes), largest)), ...); + +// largest = largest * largest_argc; + BLT_TRACE(largest); storage.eval_func = [&operators..., largest](const tree_t& tree, void* context) -> evaluation_context& { const auto& ops = tree.get_operations(); @@ -130,16 +136,30 @@ namespace blt::gp results.values.reserve(largest); blt::size_t total_so_far = 0; + blt::size_t op_pos = 0; for (const auto& operation : blt::reverse_iterate(ops.begin(), ops.end())) { + op_pos++; if (operation.is_value) { + auto cur = tracker.start_measurement(); total_so_far += stack_allocator::aligned_size(operation.type_size); results.values.copy_from(vals.from(total_so_far), stack_allocator::aligned_size(operation.type_size)); + tracker.stop_measurement(cur); + if (cur.getAllocatedByteDifference() > 0) + { + BLT_TRACE("Operator %ld allocated! pos: %ld", operation.id, op_pos); + } continue; } + auto cur = tracker.start_measurement(); call_jmp_table(operation.id, context, results.values, results.values, operators...); + tracker.stop_measurement(cur); + if (cur.getAllocatedByteDifference() > 0) + { + BLT_TRACE("Operator %ld allocated! pos: %ld", operation.id, op_pos); + } } return results; @@ -219,13 +239,6 @@ namespace blt::gp (storage.system.register_type(), ...); storage.system.register_type(); - operator_metadata_t meta; - if constexpr (sizeof...(Args) != 0) - { - meta.arg_size_bytes = (stack_allocator::aligned_size(sizeof(Args)) + ...); - } - meta.return_size_bytes = sizeof(Return); - auto return_type_id = storage.system.get_type().id(); auto operator_id = blt::gp::operator_id(storage.operators.size()); op.id = operator_id; @@ -249,6 +262,15 @@ namespace blt::gp BLT_ASSERT(info.argc.argc_context - info.argc.argc <= 1 && "Cannot pass multiple context as arguments!"); storage.operators.push_back(info); + + operator_metadata_t meta; + if constexpr (sizeof...(Args) != 0) + { + meta.arg_size_bytes = (stack_allocator::aligned_size(sizeof(Args)) + ...); + } + meta.return_size_bytes = sizeof(Return); + meta.argc = info.argc; + storage.operator_metadata.push_back(meta); storage.print_funcs.push_back([&op](std::ostream& out, stack_allocator& stack) { if constexpr (blt::meta::is_streamable_v) diff --git a/include/blt/gp/stack.h b/include/blt/gp/stack.h index 2a73404..4a883ab 100644 --- a/include/blt/gp/stack.h +++ b/include/blt/gp/stack.h @@ -115,7 +115,7 @@ namespace blt::gp { if (stack.empty()) return; - if (size_ < stack.bytes_stored + bytes_stored) + if (stack.bytes_stored + bytes_stored > size_) expand(stack.bytes_stored + size_); std::memcpy(data_ + bytes_stored, stack.data_, stack.bytes_stored); bytes_stored += stack.bytes_stored; @@ -125,7 +125,7 @@ namespace blt::gp { if (bytes == 0) return; - if (bytes + bytes_stored >= size_) + if (bytes + bytes_stored > size_) expand(bytes + size_); std::memcpy(data_ + bytes_stored, stack.data_ + (stack.bytes_stored - bytes), bytes); bytes_stored += bytes; @@ -135,7 +135,7 @@ namespace blt::gp { if (bytes == 0 || data == nullptr) return; - if (bytes + bytes_stored >= size_) + if (bytes + bytes_stored > size_) expand(bytes + size_); std::memcpy(data_ + bytes_stored, data, bytes); bytes_stored += bytes; diff --git a/include/blt/gp/tree.h b/include/blt/gp/tree.h index f25ba60..0272a9c 100644 --- a/include/blt/gp/tree.h +++ b/include/blt/gp/tree.h @@ -118,7 +118,14 @@ namespace blt::gp evaluation_context& evaluate(void* context) const { - return (*func)(*this, context); + auto cur = tracker.start_measurement(); + auto& v = (*func)(*this, context); + tracker.stop_measurement(cur); + if (cur.getAllocatedByteDifference() > 0) + { + print(*program, std::cout, false, true, false); + } + return v; } blt::size_t get_depth(gp_program& program); @@ -194,6 +201,7 @@ namespace blt::gp tracked_vector operations; blt::gp::stack_allocator values; detail::eval_func_t* func; + gp_program* program; }; struct fitness_t diff --git a/src/tree.cpp b/src/tree.cpp index e57a579..624a0b7 100644 --- a/src/tree.cpp +++ b/src/tree.cpp @@ -295,7 +295,7 @@ namespace blt::gp } } - tree_t::tree_t(gp_program& program): func(&program.get_eval_func()) + tree_t::tree_t(gp_program& program): func(&program.get_eval_func()), program(&program) { } From 47b3821b0e020e77d0fbbd5c1e1367c7e59b4898 Mon Sep 17 00:00:00 2001 From: Brett Laptop Date: Tue, 3 Sep 2024 20:34:45 -0400 Subject: [PATCH 5/7] reducing allocations --- CMakeLists.txt | 2 +- include/blt/gp/program.h | 19 +++++-------------- include/blt/gp/tree.h | 20 +++++++++++--------- src/program.cpp | 1 + src/transformers.cpp | 3 +-- src/tree.cpp | 2 +- 6 files changed, 20 insertions(+), 27 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index d3f8d6e..60783e5 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1,5 +1,5 @@ cmake_minimum_required(VERSION 3.25) -project(blt-gp VERSION 0.1.44) +project(blt-gp VERSION 0.1.45) include(CTest) diff --git a/include/blt/gp/program.h b/include/blt/gp/program.h index 3e50669..ad2af8c 100644 --- a/include/blt/gp/program.h +++ b/include/blt/gp/program.h @@ -118,13 +118,16 @@ namespace blt::gp template program_operator_storage_t& build(Operators& ... operators) { - blt::size_t largest = 0; + blt::size_t largest_args = 0; + blt::size_t largest_returns = 0; blt::u32 largest_argc = 0; operator_metadata_t meta; ((meta = add_operator(operators), largest_argc = std::max(meta.argc.argc, largest_argc), - largest = std::max(std::max(meta.arg_size_bytes, meta.return_size_bytes), largest)), ...); + largest_args = std::max(meta.arg_size_bytes, largest_args), largest_returns = std::max(meta.return_size_bytes, + largest_returns)), ...); // largest = largest * largest_argc; + blt::size_t largest = largest_args * largest_argc * largest_returns * largest_argc; BLT_TRACE(largest); storage.eval_func = [&operators..., largest](const tree_t& tree, void* context) -> evaluation_context& { @@ -143,23 +146,11 @@ namespace blt::gp op_pos++; if (operation.is_value) { - auto cur = tracker.start_measurement(); total_so_far += stack_allocator::aligned_size(operation.type_size); results.values.copy_from(vals.from(total_so_far), stack_allocator::aligned_size(operation.type_size)); - tracker.stop_measurement(cur); - if (cur.getAllocatedByteDifference() > 0) - { - BLT_TRACE("Operator %ld allocated! pos: %ld", operation.id, op_pos); - } continue; } - auto cur = tracker.start_measurement(); call_jmp_table(operation.id, context, results.values, results.values, operators...); - tracker.stop_measurement(cur); - if (cur.getAllocatedByteDifference() > 0) - { - BLT_TRACE("Operator %ld allocated! pos: %ld", operation.id, op_pos); - } } return results; diff --git a/include/blt/gp/tree.h b/include/blt/gp/tree.h index 0272a9c..efda04b 100644 --- a/include/blt/gp/tree.h +++ b/include/blt/gp/tree.h @@ -118,14 +118,7 @@ namespace blt::gp evaluation_context& evaluate(void* context) const { - auto cur = tracker.start_measurement(); - auto& v = (*func)(*this, context); - tracker.stop_measurement(cur); - if (cur.getAllocatedByteDifference() > 0) - { - print(*program, std::cout, false, true, false); - } - return v; + return (*func)(*this, context); } blt::size_t get_depth(gp_program& program); @@ -201,7 +194,6 @@ namespace blt::gp tracked_vector operations; blt::gp::stack_allocator values; detail::eval_func_t* func; - gp_program* program; }; struct fitness_t @@ -255,6 +247,16 @@ namespace blt::gp normalized_fitness.push_back(v); } + population_stats(population_stats&& move) noexcept: + overall_fitness(move.overall_fitness.load()), average_fitness(move.average_fitness.load()), best_fitness(move.best_fitness.load()), + worst_fitness(move.worst_fitness.load()), normalized_fitness(std::move(move.normalized_fitness)) + { + move.overall_fitness = 0; + move.average_fitness = 0; + move.best_fitness = 0; + move.worst_fitness = 0; + } + std::atomic overall_fitness = 0; std::atomic average_fitness = 0; std::atomic best_fitness = 0; diff --git a/src/program.cpp b/src/program.cpp index 248d658..8481451 100644 --- a/src/program.cpp +++ b/src/program.cpp @@ -61,6 +61,7 @@ namespace blt::gp #ifdef BLT_TRACK_ALLOCATIONS tracker.reserve(); #endif + statistic_history.reserve(config.max_generations + 1); if (config.threads == 0) config.set_thread_count(std::thread::hardware_concurrency()); // main thread is thread0 diff --git a/src/transformers.cpp b/src/transformers.cpp index e2a0685..3f4e270 100644 --- a/src/transformers.cpp +++ b/src/transformers.cpp @@ -459,8 +459,7 @@ namespace blt::gp config.generator.get().generate(tree, {program, replacement_func_info.argument_types[i].id, config.replacement_min_depth, config.replacement_max_depth}); - blt::size_t total_bytes_for = tree.total_value_bytes(); - vals.copy_from(tree.get_values(), total_bytes_for); + vals.insert(tree.get_values()); ops.insert(ops.begin() + static_cast(start_index), tree.get_operations().begin(), tree.get_operations().end()); start_index += tree.get_operations().size(); diff --git a/src/tree.cpp b/src/tree.cpp index 624a0b7..e57a579 100644 --- a/src/tree.cpp +++ b/src/tree.cpp @@ -295,7 +295,7 @@ namespace blt::gp } } - tree_t::tree_t(gp_program& program): func(&program.get_eval_func()), program(&program) + tree_t::tree_t(gp_program& program): func(&program.get_eval_func()) { } From 3a31b2794c1eca4b3d7b40aa513376010aa8f51b Mon Sep 17 00:00:00 2001 From: Brett Date: Tue, 3 Sep 2024 22:38:45 -0400 Subject: [PATCH 6/7] s --- include/blt/gp/fwdecl.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/include/blt/gp/fwdecl.h b/include/blt/gp/fwdecl.h index 2169194..0a42f5c 100644 --- a/include/blt/gp/fwdecl.h +++ b/include/blt/gp/fwdecl.h @@ -94,7 +94,7 @@ namespace blt::gp { #ifdef BLT_TRACK_ALLOCATIONS tracker.allocate(bytes); - std::cout << "Hey our aligned allocator allocated " << bytes << " bytes!\n"; +// std::cout << "Hey our aligned allocator allocated " << bytes << " bytes!\n"; #endif return std::aligned_alloc(8, bytes); } @@ -105,7 +105,7 @@ namespace blt::gp return; #ifdef BLT_TRACK_ALLOCATIONS tracker.deallocate(bytes); - std::cout << "[Hey our aligned allocator deallocated " << bytes << " bytes!]\n"; +// std::cout << "[Hey our aligned allocator deallocated " << bytes << " bytes!]\n"; #else (void) bytes; #endif From fc611f355564f9cd397ca3139419d9f3b1ed88b2 Mon Sep 17 00:00:00 2001 From: Brett Date: Tue, 3 Sep 2024 22:39:09 -0400 Subject: [PATCH 7/7] reset config --- examples/symbolic_regression.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/examples/symbolic_regression.cpp b/examples/symbolic_regression.cpp index ab87400..45b44a3 100644 --- a/examples/symbolic_regression.cpp +++ b/examples/symbolic_regression.cpp @@ -37,13 +37,13 @@ std::array training_cases; blt::gp::prog_config_t config = blt::gp::prog_config_t() .set_initial_min_tree_size(2) .set_initial_max_tree_size(6) - .set_elite_count(0) + .set_elite_count(200) .set_crossover_chance(0.9) .set_mutation_chance(0.1) .set_reproduction_chance(0) .set_max_generations(50) - .set_pop_size(1) - .set_thread_count(1); + .set_pop_size(20000) + .set_thread_count(0); blt::gp::gp_program program{SEED, config};