diff --git a/CMakeLists.txt b/CMakeLists.txt index e552fbb..60783e5 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1,5 +1,5 @@ cmake_minimum_required(VERSION 3.25) -project(blt-gp VERSION 0.1.40) +project(blt-gp VERSION 0.1.45) include(CTest) @@ -16,7 +16,7 @@ set(CMAKE_CXX_STANDARD 17) set(THREADS_PREFER_PTHREAD_FLAG ON) find_package(Threads REQUIRED) -#SET(CMAKE_CXX_FLAGS_RELEASE "-O3 -g") +SET(CMAKE_CXX_FLAGS_RELEASE "-O3 -g") if (NOT TARGET BLT) add_subdirectory(lib/blt) diff --git a/dhat.out.293761 b/dhat.out.293761 index 21dd56d..7579178 100644 --- a/dhat.out.293761 +++ b/dhat.out.293761 @@ -1240,9 +1240,9 @@ ,"0x122836: main (in /home/brett/Documents/code/c++/blt-gp/cmake-build-release/blt-SR-playground-example)" ,"0x12E6ED: void std::vector >::_M_realloc_insert(__gnu_cxx::__normal_iterator > >, blt::gp::operator_id const&) (in /home/brett/Documents/code/c++/blt-gp/cmake-build-release/blt-SR-playground-example)" ,"0x12284D: main (in /home/brett/Documents/code/c++/blt-gp/cmake-build-release/blt-SR-playground-example)" - ,"0x12EB76: void std::vector >::_M_realloc_insert(__gnu_cxx::__normal_iterator > >, blt::gp::operator_info const&) (in /home/brett/Documents/code/c++/blt-gp/cmake-build-release/blt-SR-playground-example)" + ,"0x12EB76: void std::vector >::_M_realloc_insert(__gnu_cxx::__normal_iterator > >, blt::gp::operator_info_t const&) (in /home/brett/Documents/code/c++/blt-gp/cmake-build-release/blt-SR-playground-example)" ,"0x12294C: main (in /home/brett/Documents/code/c++/blt-gp/cmake-build-release/blt-SR-playground-example)" - ,"0x12E897: void std::vector >::_M_realloc_insert(__gnu_cxx::__normal_iterator > >, blt::gp::operator_info const&) (in /home/brett/Documents/code/c++/blt-gp/cmake-build-release/blt-SR-playground-example)" + ,"0x12E897: void std::vector >::_M_realloc_insert(__gnu_cxx::__normal_iterator > >, blt::gp::operator_info_t const&) (in /home/brett/Documents/code/c++/blt-gp/cmake-build-release/blt-SR-playground-example)" ,"0x12F1FA: void std::vector, std::allocator > >::_M_realloc_insert >(__gnu_cxx::__normal_iterator*, std::vector, std::allocator > > >, std::function&&) (in /home/brett/Documents/code/c++/blt-gp/cmake-build-release/blt-SR-playground-example)" ,"0x12298F: main (in /home/brett/Documents/code/c++/blt-gp/cmake-build-release/blt-SR-playground-example)" ,"0x12F3EB: void std::vector > >, std::allocator > > > >::_M_realloc_insert > > >(__gnu_cxx::__normal_iterator > >*, std::vector > >, std::allocator > > > > >, std::optional > >&&) (in /home/brett/Documents/code/c++/blt-gp/cmake-build-release/blt-SR-playground-example)" @@ -1261,7 +1261,7 @@ ,"0x122F78: main (in /home/brett/Documents/code/c++/blt-gp/cmake-build-release/blt-SR-playground-example)" ,"0x123052: main (in /home/brett/Documents/code/c++/blt-gp/cmake-build-release/blt-SR-playground-example)" ,"0x12305D: main (in /home/brett/Documents/code/c++/blt-gp/cmake-build-release/blt-SR-playground-example)" - ,"0x12ED14: std::vector >::push_back(blt::gp::operator_info const&) (in /home/brett/Documents/code/c++/blt-gp/cmake-build-release/blt-SR-playground-example)" + ,"0x12ED14: std::vector >::push_back(blt::gp::operator_info_t const&) (in /home/brett/Documents/code/c++/blt-gp/cmake-build-release/blt-SR-playground-example)" ,"0x1231C6: main (in /home/brett/Documents/code/c++/blt-gp/cmake-build-release/blt-SR-playground-example)" ,"0x123321: main (in /home/brett/Documents/code/c++/blt-gp/cmake-build-release/blt-SR-playground-example)" ,"0x123390: main (in /home/brett/Documents/code/c++/blt-gp/cmake-build-release/blt-SR-playground-example)" diff --git a/examples/symbolic_regression.cpp b/examples/symbolic_regression.cpp index b63d37f..206bd8a 100644 --- a/examples/symbolic_regression.cpp +++ b/examples/symbolic_regression.cpp @@ -22,6 +22,7 @@ #include #include #include "operations_common.h" +#include "blt/math/averages.h" //static constexpr long SEED = 41912; static const unsigned long SEED = std::random_device()(); @@ -142,26 +143,43 @@ int main() BLT_PRINT_PROFILE("Symbolic Regression", blt::PRINT_CYCLES | blt::PRINT_THREAD | blt::PRINT_WALL); #ifdef BLT_TRACK_ALLOCATIONS - BLT_TRACE("Total Allocations: %ld times with a total of %s", blt::gp::tracker.getAllocations(), - blt::byte_convert_t(blt::gp::tracker.getAllocatedBytes()).convert_to_nearest_type().to_pretty_string().c_str()); + BLT_TRACE("Total Allocations: %ld times with a total of %s, peak allocated bytes %s", blt::gp::tracker.getAllocations(), + blt::byte_convert_t(blt::gp::tracker.getAllocatedBytes()).convert_to_nearest_type().to_pretty_string().c_str(), + blt::byte_convert_t(blt::gp::tracker.getPeakAllocatedBytes()).convert_to_nearest_type().to_pretty_string().c_str()); + BLT_TRACE("------------------------------------------------------"); + auto evaluation_calls_v = blt::gp::evaluation_calls.get_calls(); + auto evaluation_allocations_v = blt::gp::evaluation_allocations.get_calls(); + BLT_TRACE("Total Evaluation Calls: %ld; Peak Bytes Allocated %s", evaluation_calls_v, + blt::string::bytes_to_pretty(blt::gp::evaluation_calls.get_value()).c_str()); + BLT_TRACE("Total Evaluation Allocations: %ld; Bytes %s; Average %s", evaluation_allocations_v, + blt::string::bytes_to_pretty(blt::gp::evaluation_allocations.get_value()).c_str(), + blt::string::bytes_to_pretty(blt::average(blt::gp::evaluation_allocations.get_value(), evaluation_allocations_v)).c_str()); + BLT_TRACE("Percent Evaluation calls allocate? %lf%%", blt::average(evaluation_allocations_v, evaluation_calls_v) * 100); + BLT_TRACE("------------------------------------------------------"); auto crossover_calls_v = blt::gp::crossover_calls.get_calls(); auto crossover_allocations_v = blt::gp::crossover_allocations.get_calls(); auto mutation_calls_v = blt::gp::mutation_calls.get_calls(); auto mutation_allocations_v = blt::gp::mutation_allocations.get_calls(); auto reproduction_calls_v = blt::gp::reproduction_calls.get_calls(); auto reproduction_allocations_v = blt::gp::reproduction_allocations.get_calls(); - BLT_TRACE("Total Crossover Calls: %ld Bytes %s", crossover_calls_v, blt::byte_convert_t(blt::gp::crossover_calls.get_value()).convert_to_nearest_type().to_pretty_string().c_str()); - BLT_TRACE("Total Mutation Calls: %ld Bytes %s", mutation_calls_v, blt::byte_convert_t(blt::gp::mutation_calls.get_value()).convert_to_nearest_type().to_pretty_string().c_str()); - BLT_TRACE("Total Reproduction Calls: %ld Bytes %s", reproduction_calls_v, blt::byte_convert_t(blt::gp::reproduction_calls.get_value()).convert_to_nearest_type().to_pretty_string().c_str()); - BLT_TRACE("Total Crossover Allocations: %ld Bytes %s", crossover_allocations_v, blt::byte_convert_t(blt::gp::crossover_allocations.get_value()).convert_to_nearest_type().to_pretty_string().c_str()); - BLT_TRACE("Total Mutation Allocations: %ld Bytes %s", mutation_allocations_v, blt::byte_convert_t(blt::gp::mutation_allocations.get_value()).convert_to_nearest_type().to_pretty_string().c_str()); - BLT_TRACE("Total Reproduction Allocations: %ld Bytes %s", reproduction_allocations_v, blt::byte_convert_t(blt::gp::reproduction_allocations.get_value()).convert_to_nearest_type().to_pretty_string().c_str()); - BLT_TRACE("Percent Crossover calls allocate? %lf%%", - static_cast(crossover_allocations_v) / static_cast(crossover_calls_v == 0 ? 1 : crossover_calls_v) * 100); - BLT_TRACE("Percent Mutation calls allocate? %lf%%", - static_cast(mutation_allocations_v) / static_cast(mutation_calls_v == 0 ? 1 : mutation_calls_v) * 100); - BLT_TRACE("Percent Reproduction calls allocate? %lf%%", - static_cast(reproduction_allocations_v) / static_cast(reproduction_calls_v == 0 ? 1 : reproduction_calls_v) * 100); + BLT_TRACE("Total Crossover Calls: %ld; Peak Bytes Allocated %s", crossover_calls_v, + blt::string::bytes_to_pretty(blt::gp::crossover_calls.get_value()).c_str()); + BLT_TRACE("Total Mutation Calls: %ld; Peak Bytes Allocated %s", mutation_calls_v, + blt::string::bytes_to_pretty(blt::gp::mutation_calls.get_value()).c_str()); + BLT_TRACE("Total Reproduction Calls: %ld; Peak Bytes Allocated %s", reproduction_calls_v, + blt::string::bytes_to_pretty(blt::gp::reproduction_calls.get_value()).c_str()); + BLT_TRACE("Total Crossover Allocations: %ld; Bytes %s; Average %s", crossover_allocations_v, + blt::string::bytes_to_pretty(blt::gp::crossover_allocations.get_value()).c_str(), + blt::string::bytes_to_pretty(blt::average(blt::gp::crossover_allocations.get_value(), crossover_allocations_v)).c_str()); + BLT_TRACE("Total Mutation Allocations: %ld; Bytes %s; Average %s", mutation_allocations_v, + blt::string::bytes_to_pretty(blt::gp::mutation_allocations.get_value()).c_str(), + blt::string::bytes_to_pretty(blt::average(blt::gp::mutation_allocations.get_value(), mutation_allocations_v)).c_str()); + BLT_TRACE("Total Reproduction Allocations: %ld; Bytes %s; Average %s", reproduction_allocations_v, + blt::string::bytes_to_pretty(blt::gp::reproduction_allocations.get_value()).c_str(), + blt::string::bytes_to_pretty(blt::average(blt::gp::reproduction_allocations.get_value(), reproduction_allocations_v)).c_str()); + BLT_TRACE("Percent Crossover calls allocate? %lf%%", blt::average(crossover_allocations_v, crossover_calls_v) * 100); + BLT_TRACE("Percent Mutation calls allocate? %lf%%", blt::average(mutation_allocations_v, mutation_calls_v) * 100); + BLT_TRACE("Percent Reproduction calls allocate? %lf%%", blt::average(reproduction_allocations_v, reproduction_calls_v) * 100); #endif return 0; diff --git a/include/blt/gp/fwdecl.h b/include/blt/gp/fwdecl.h index 5e751e2..0a42f5c 100644 --- a/include/blt/gp/fwdecl.h +++ b/include/blt/gp/fwdecl.h @@ -30,12 +30,18 @@ namespace blt::gp { #ifdef BLT_TRACK_ALLOCATIONS inline allocation_tracker_t tracker; + + // population gen specifics inline call_tracker_t crossover_calls; inline call_tracker_t mutation_calls; inline call_tracker_t reproduction_calls; inline call_tracker_t crossover_allocations; inline call_tracker_t mutation_allocations; inline call_tracker_t reproduction_allocations; + + // for evaluating fitness + inline call_tracker_t evaluation_calls; + inline call_tracker_t evaluation_allocations; #endif class gp_program; @@ -76,7 +82,7 @@ namespace blt::gp template using tracked_vector = std::vector; #endif - + // using operation_vector_t = tracked_vector; // using individual_vector_t = tracked_vector>; // using tree_vector_t = tracked_vector; @@ -88,6 +94,7 @@ namespace blt::gp { #ifdef BLT_TRACK_ALLOCATIONS tracker.allocate(bytes); +// std::cout << "Hey our aligned allocator allocated " << bytes << " bytes!\n"; #endif return std::aligned_alloc(8, bytes); } @@ -98,6 +105,7 @@ namespace blt::gp return; #ifdef BLT_TRACK_ALLOCATIONS tracker.deallocate(bytes); +// std::cout << "[Hey our aligned allocator deallocated " << bytes << " bytes!]\n"; #else (void) bytes; #endif @@ -128,6 +136,7 @@ namespace blt::gp { #ifdef BLT_TRACK_ALLOCATIONS tracker.allocate(n * sizeof(T)); +// std::cout << "Hey our tracked allocator allocated " << (n * sizeof(T)) << " bytes!\n"; #endif return static_cast(std::malloc(n * sizeof(T))); } @@ -141,6 +150,7 @@ namespace blt::gp { #ifdef BLT_TRACK_ALLOCATIONS tracker.deallocate(n * sizeof(T)); +// std::cout << "[Hey our tracked allocator deallocated " << (n * sizeof(T)) << " bytes!]\n"; #else (void) n; #endif diff --git a/include/blt/gp/program.h b/include/blt/gp/program.h index 8c3c1e3..ad2af8c 100644 --- a/include/blt/gp/program.h +++ b/include/blt/gp/program.h @@ -67,7 +67,7 @@ namespace blt::gp } }; - struct operator_info + struct operator_info_t { // types of the arguments tracked_vector argument_types; @@ -79,6 +79,13 @@ namespace blt::gp detail::operator_func_t func; }; + struct operator_metadata_t + { + blt::size_t arg_size_bytes = 0; + blt::size_t return_size_bytes = 0; + argc_t argc{}; + }; + struct program_operator_storage_t { // indexed from return TYPE ID, returns index of operator @@ -87,7 +94,8 @@ namespace blt::gp blt::expanding_buffer>> operators_ordered_terminals; // indexed from OPERATOR ID (operator number) blt::hashset_t ephemeral_leaf_operators; - tracked_vector operators; + tracked_vector operators; + tracked_vector operator_metadata; tracked_vector print_funcs; tracked_vector destroy_funcs; tracked_vector> names; @@ -110,11 +118,17 @@ namespace blt::gp template program_operator_storage_t& build(Operators& ... operators) { - tracked_vector sizes; - (sizes.push_back(add_operator(operators)), ...); - blt::size_t largest = 0; - for (auto v : sizes) - largest = std::max(v, largest); + blt::size_t largest_args = 0; + blt::size_t largest_returns = 0; + blt::u32 largest_argc = 0; + operator_metadata_t meta; + ((meta = add_operator(operators), largest_argc = std::max(meta.argc.argc, largest_argc), + largest_args = std::max(meta.arg_size_bytes, largest_args), largest_returns = std::max(meta.return_size_bytes, + largest_returns)), ...); + +// largest = largest * largest_argc; + blt::size_t largest = largest_args * largest_argc * largest_returns * largest_argc; + BLT_TRACE(largest); storage.eval_func = [&operators..., largest](const tree_t& tree, void* context) -> evaluation_context& { const auto& ops = tree.get_operations(); @@ -125,9 +139,11 @@ namespace blt::gp results.values.reserve(largest); blt::size_t total_so_far = 0; + blt::size_t op_pos = 0; for (const auto& operation : blt::reverse_iterate(ops.begin(), ops.end())) { + op_pos++; if (operation.is_value) { total_so_far += stack_allocator::aligned_size(operation.type_size); @@ -214,14 +230,11 @@ namespace blt::gp (storage.system.register_type(), ...); storage.system.register_type(); - auto total_size_required = stack_allocator::aligned_size(sizeof(Return)); - ((total_size_required += stack_allocator::aligned_size(sizeof(Args))), ...); - auto return_type_id = storage.system.get_type().id(); auto operator_id = blt::gp::operator_id(storage.operators.size()); op.id = operator_id; - operator_info info; + operator_info_t info; if constexpr (sizeof...(Args) > 0) { @@ -240,6 +253,16 @@ namespace blt::gp BLT_ASSERT(info.argc.argc_context - info.argc.argc <= 1 && "Cannot pass multiple context as arguments!"); storage.operators.push_back(info); + + operator_metadata_t meta; + if constexpr (sizeof...(Args) != 0) + { + meta.arg_size_bytes = (stack_allocator::aligned_size(sizeof(Args)) + ...); + } + meta.return_size_bytes = sizeof(Return); + meta.argc = info.argc; + + storage.operator_metadata.push_back(meta); storage.print_funcs.push_back([&op](std::ostream& out, stack_allocator& stack) { if constexpr (blt::meta::is_streamable_v) { @@ -267,11 +290,11 @@ namespace blt::gp storage.names.push_back(op.get_name()); if (op.is_ephemeral()) storage.ephemeral_leaf_operators.insert(operator_id); - return total_size_required * 2; + return meta; } template - void add_non_context_argument(decltype(operator_info::argument_types)& types) + void add_non_context_argument(decltype(operator_info_t::argument_types)& types) { if constexpr (!std::is_same_v>) { @@ -367,8 +390,7 @@ namespace blt::gp (*thread_execution_service)(0); #ifdef BLT_TRACK_ALLOCATIONS blt::gp::tracker.stop_measurement(gen_alloc); - BLT_TRACE("Generation Allocated %ld times with a total of %s", gen_alloc.getAllocationDifference(), - blt::byte_convert_t(gen_alloc.getAllocatedByteDifference()).convert_to_nearest_type().to_pretty_string().c_str()); + gen_alloc.pretty_print("Generation"); #endif } @@ -386,8 +408,13 @@ namespace blt::gp evaluate_fitness_internal(); #ifdef BLT_TRACK_ALLOCATIONS blt::gp::tracker.stop_measurement(fitness_alloc); - BLT_TRACE("Fitness Allocated %ld times with a total of %s", fitness_alloc.getAllocationDifference(), - blt::byte_convert_t(fitness_alloc.getAllocatedByteDifference()).convert_to_nearest_type().to_pretty_string().c_str()); + fitness_alloc.pretty_print("Fitness"); + evaluation_calls.call(); + evaluation_calls.set_value(std::max(evaluation_calls.get_value(), fitness_alloc.getAllocatedByteDifference())); + if (fitness_alloc.getAllocatedByteDifference() > 0) + { + evaluation_allocations.call(fitness_alloc.getAllocatedByteDifference()); + } #endif } @@ -477,9 +504,7 @@ namespace blt::gp mutation_selection.pre_process(*this, current_pop); reproduction_selection.pre_process(*this, current_pop); - perform_elitism(args, next_pop); - - blt::size_t start = config.elites; + blt::size_t start = perform_elitism(args, next_pop); while (start < config.population_size) { @@ -568,10 +593,8 @@ namespace blt::gp mutation_selection.pre_process(*this, current_pop); if (&crossover_selection != &reproduction_selection) reproduction_selection.pre_process(*this, current_pop); - - perform_elitism(args, next_pop); - - thread_helper.next_gen_left -= config.elites; + auto elite_amount = perform_elitism(args, next_pop); + thread_helper.next_gen_left -= elite_amount; } thread_helper.barrier.wait(); @@ -655,7 +678,7 @@ namespace blt::gp return storage.system; } - [[nodiscard]] inline operator_info& get_operator_info(operator_id id) + [[nodiscard]] inline operator_info_t& get_operator_info(operator_id id) { return storage.operators[id]; } @@ -725,8 +748,10 @@ namespace blt::gp return a.second > b.second; }); - for (blt::size_t i = 0; i < size; i++) + for (blt::size_t i = 0; i < std::min(size, config.population_size); i++) arr[i] = values[i].first; + for (blt::size_t i = std::min(size, config.population_size); i < size; i++) + arr[i] = 0; return arr; } @@ -793,7 +818,7 @@ namespace blt::gp struct concurrency_storage { - tracked_vector> threads; + std::vector> threads; std::mutex thread_function_control{}; std::condition_variable thread_function_condition{}; diff --git a/include/blt/gp/selection.h b/include/blt/gp/selection.h index 958a676..7c4c67b 100644 --- a/include/blt/gp/selection.h +++ b/include/blt/gp/selection.h @@ -41,7 +41,7 @@ namespace blt::gp constexpr inline auto perform_elitism = [](const selector_args& args, population_t& next_pop) { auto& [program, current_pop, current_stats, config, random] = args; - if (config.elites > 0) + if (config.elites > 0 && current_pop.get_individuals().size() >= config.elites) { static thread_local tracked_vector> values; values.clear(); @@ -70,7 +70,9 @@ namespace blt::gp for (blt::size_t i = 0; i < config.elites; i++) next_pop.get_individuals()[i].copy_fast(current_pop.get_individuals()[values[i].first].tree); + return config.elites; } + return 0ul; }; template @@ -89,7 +91,7 @@ namespace blt::gp if (random.choice(config.crossover_chance)) { #ifdef BLT_TRACK_ALLOCATIONS - auto state = tracker.start_measurement(); + auto state = tracker.start_measurement_thread_local(); #endif // crossover const tree_t* p1; @@ -100,10 +102,13 @@ namespace blt::gp p2 = &crossover_selection.select(program, current_pop); } while (!config.crossover.get().apply(program, *p1, *p2, c1, *c2)); #ifdef BLT_TRACK_ALLOCATIONS - tracker.stop_measurement(state); - crossover_calls.call(state.getAllocatedByteDifference()); - if (state.getAllocationDifference() != 0) + tracker.stop_measurement_thread_local(state); + crossover_calls.call(); + crossover_calls.set_value(std::max(crossover_calls.get_value(), state.getAllocatedByteDifference())); + if (state.getAllocatedByteDifference() != 0) + { crossover_allocations.call(state.getAllocatedByteDifference()); + } #endif return 2; } @@ -112,7 +117,7 @@ namespace blt::gp if (random.choice(config.mutation_chance)) { #ifdef BLT_TRACK_ALLOCATIONS - auto state = tracker.start_measurement(); + auto state = tracker.start_measurement_thread_local(); #endif // mutation const tree_t* p; @@ -121,8 +126,9 @@ namespace blt::gp p = &mutation_selection.select(program, current_pop); } while (!config.mutator.get().apply(program, *p, c1)); #ifdef BLT_TRACK_ALLOCATIONS - tracker.stop_measurement(state); - mutation_calls.call(state.getAllocatedByteDifference()); + tracker.stop_measurement_thread_local(state); + mutation_calls.call(); + mutation_calls.set_value(std::max(mutation_calls.get_value(), state.getAllocatedByteDifference())); if (state.getAllocationDifference() != 0) { mutation_allocations.call(state.getAllocatedByteDifference()); @@ -135,13 +141,14 @@ namespace blt::gp if (config.reproduction_chance > 0 && random.choice(config.reproduction_chance)) { #ifdef BLT_TRACK_ALLOCATIONS - auto state = tracker.start_measurement(); + auto state = tracker.start_measurement_thread_local(); #endif // reproduction c1 = reproduction_selection.select(program, current_pop); #ifdef BLT_TRACK_ALLOCATIONS - tracker.stop_measurement(state); - reproduction_calls.call(state.getAllocatedByteDifference()); + tracker.stop_measurement_thread_local(state); + reproduction_calls.call(); + reproduction_calls.set_value(std::max(reproduction_calls.get_value(), state.getAllocatedByteDifference())); if (state.getAllocationDifference() != 0) { reproduction_allocations.call(state.getAllocatedByteDifference()); diff --git a/include/blt/gp/stack.h b/include/blt/gp/stack.h index 6ee7e0a..4a883ab 100644 --- a/include/blt/gp/stack.h +++ b/include/blt/gp/stack.h @@ -115,7 +115,7 @@ namespace blt::gp { if (stack.empty()) return; - if (size_ < stack.bytes_stored + bytes_stored) + if (stack.bytes_stored + bytes_stored > size_) expand(stack.bytes_stored + size_); std::memcpy(data_ + bytes_stored, stack.data_, stack.bytes_stored); bytes_stored += stack.bytes_stored; @@ -125,7 +125,7 @@ namespace blt::gp { if (bytes == 0) return; - if (size_ < bytes + bytes_stored) + if (bytes + bytes_stored > size_) expand(bytes + size_); std::memcpy(data_ + bytes_stored, stack.data_ + (stack.bytes_stored - bytes), bytes); bytes_stored += bytes; @@ -135,7 +135,7 @@ namespace blt::gp { if (bytes == 0 || data == nullptr) return; - if (size_ < bytes + bytes_stored) + if (bytes + bytes_stored > size_) expand(bytes + size_); std::memcpy(data_ + bytes_stored, data, bytes); bytes_stored += bytes; diff --git a/include/blt/gp/stats.h b/include/blt/gp/stats.h index 9fda502..5d87e02 100644 --- a/include/blt/gp/stats.h +++ b/include/blt/gp/stats.h @@ -20,15 +20,64 @@ #define BLT_GP_STATS_H #include -#include +#include +#include +#include #include +#include +#include +#include +#include namespace blt::gp { - class allocation_tracker_t { public: + class tl_t + { + friend allocation_tracker_t; + public: + [[nodiscard]] blt::u64 getAllocations() const + { + return get_map(allocations); + } + + [[nodiscard]] blt::u64 getDeallocations() const + { + return get_map(deallocations); + } + + [[nodiscard]] blt::u64 getAllocatedBytes() const + { + return get_map(allocated_bytes); + } + + [[nodiscard]] blt::u64 getDeallocatedBytes() const + { + return get_map(deallocated_bytes); + } + + [[nodiscard]] blt::u64 getAllocationDifference() const + { + return std::abs(static_cast(getAllocations()) - static_cast(getDeallocations())); + } + + [[nodiscard]] blt::u64 getCurrentlyAllocatedBytes() const + { + return getAllocatedBytes() - getDeallocatedBytes(); + } + + private: + blt::hashmap_t> allocations; + blt::hashmap_t> deallocations; + blt::hashmap_t> allocated_bytes; + blt::hashmap_t> deallocated_bytes; + + std::mutex mutex; + std::condition_variable var; + }; + struct allocation_data_t { blt::u64 start_allocations = 0; @@ -60,18 +109,55 @@ namespace blt::gp { return end_deallocated_bytes - start_deallocated_bytes; } + + void pretty_print(const std::string& name) const; }; + void reserve() + { + { + std::scoped_lock lock(tl.mutex); + tl.allocations.insert({std::this_thread::get_id(), std::make_unique()}); + tl.deallocations.insert({std::this_thread::get_id(), std::make_unique()}); + tl.allocated_bytes.insert({std::this_thread::get_id(), std::make_unique()}); + tl.deallocated_bytes.insert({std::this_thread::get_id(), std::make_unique()}); + } + tl.var.notify_all(); + } + + blt::size_t reserved_threads() + { + return tl.allocations.size(); + } + + void await_thread_loading_complete(blt::u64 required_threads) + { + std::unique_lock lock(tl.mutex); + tl.var.wait(lock, [this, required_threads]() { + return reserved_threads() == required_threads; + }); + } + void allocate(blt::size_t bytes) { allocations++; allocated_bytes += bytes; + + auto diff = getCurrentlyAllocatedBytes(); + auto atomic_val = peak_allocated_bytes.load(std::memory_order_relaxed); + while (diff > atomic_val && + !peak_allocated_bytes.compare_exchange_weak(atomic_val, diff, std::memory_order_relaxed, std::memory_order_relaxed)); + + add_map(tl.allocations, 1); + add_map(tl.allocated_bytes, bytes); } void deallocate(blt::size_t bytes) { deallocations++; deallocated_bytes += bytes; + add_map(tl.deallocations, 1); + add_map(tl.deallocated_bytes, bytes); } [[nodiscard]] blt::u64 getAllocations() const @@ -104,29 +190,78 @@ namespace blt::gp return getAllocatedBytes() - getDeallocatedBytes(); } + [[nodiscard]] blt::u64 getPeakAllocatedBytes() const + { + return peak_allocated_bytes; + } + + allocation_tracker_t::tl_t& get_thread_local() + { + return tl; + } + [[nodiscard]] allocation_data_t start_measurement() const { allocation_data_t data{}; - data.start_allocations = allocations; - data.start_deallocations = deallocations; - data.start_allocated_bytes = allocated_bytes; - data.start_deallocated_bytes = deallocated_bytes; + data.start_allocations = getAllocations(); + data.start_deallocations = getDeallocations(); + data.start_allocated_bytes = getAllocatedBytes(); + data.start_deallocated_bytes = getDeallocatedBytes(); + return data; + } + + [[nodiscard]] allocation_data_t start_measurement_thread_local() const + { + allocation_data_t data{}; + data.start_allocations = tl.getAllocations(); + data.start_deallocations = tl.getDeallocations(); + data.start_allocated_bytes = tl.getAllocatedBytes(); + data.start_deallocated_bytes = tl.getDeallocatedBytes(); return data; } void stop_measurement(allocation_data_t& data) const { - data.end_allocations = allocations; - data.end_deallocations = deallocations; - data.end_allocated_bytes = allocated_bytes; - data.end_deallocated_bytes = deallocated_bytes; + data.end_allocations = getAllocations(); + data.end_deallocations = getDeallocations(); + data.end_allocated_bytes = getAllocatedBytes(); + data.end_deallocated_bytes = getDeallocatedBytes(); + } + + void stop_measurement_thread_local(allocation_data_t& data) const + { + data.end_allocations = tl.getAllocations(); + data.end_deallocations = tl.getDeallocations(); + data.end_allocated_bytes = tl.getAllocatedBytes(); + data.end_deallocated_bytes = tl.getDeallocatedBytes(); } private: + static void add_map(blt::hashmap_t>& map, blt::u64 value) + { + auto l = map.find(std::this_thread::get_id()); + if (l == map.end()) + BLT_ABORT("Thread doesn't exist inside this map!"); + auto& v = *l->second; + v += value; + } + + static blt::u64 get_map(const blt::hashmap_t>& map) + { + auto l = map.find(std::this_thread::get_id()); + if (l == map.end()) + BLT_ABORT("Thread doesn't exist inside this map!"); + return *l->second; + } + + tl_t tl; + std::atomic_uint64_t allocations = 0; std::atomic_uint64_t deallocations = 0; std::atomic_uint64_t allocated_bytes = 0; std::atomic_uint64_t deallocated_bytes = 0; + + std::atomic_uint64_t peak_allocated_bytes = 0; }; class call_tracker_t @@ -155,6 +290,11 @@ namespace blt::gp secondary_value += value; } + void set_value(blt::u64 value) + { + secondary_value = value; + } + void call() { primary_calls++; @@ -190,7 +330,6 @@ namespace blt::gp std::atomic_uint64_t primary_calls = 0; std::atomic_uint64_t secondary_value = 0; }; - } #endif //BLT_GP_STATS_H diff --git a/include/blt/gp/tree.h b/include/blt/gp/tree.h index f25ba60..efda04b 100644 --- a/include/blt/gp/tree.h +++ b/include/blt/gp/tree.h @@ -247,6 +247,16 @@ namespace blt::gp normalized_fitness.push_back(v); } + population_stats(population_stats&& move) noexcept: + overall_fitness(move.overall_fitness.load()), average_fitness(move.average_fitness.load()), best_fitness(move.best_fitness.load()), + worst_fitness(move.worst_fitness.load()), normalized_fitness(std::move(move.normalized_fitness)) + { + move.overall_fitness = 0; + move.average_fitness = 0; + move.best_fitness = 0; + move.worst_fitness = 0; + } + std::atomic overall_fitness = 0; std::atomic average_fitness = 0; std::atomic best_fitness = 0; diff --git a/lib/blt b/lib/blt index ab482f1..a7645d9 160000 --- a/lib/blt +++ b/lib/blt @@ -1 +1 @@ -Subproject commit ab482f1a1c5782bd3501428f26c02f0bb4729946 +Subproject commit a7645d9ddec57ecaad525b48a30f8001adcf75e8 diff --git a/src/program.cpp b/src/program.cpp index 1f310d3..8481451 100644 --- a/src/program.cpp +++ b/src/program.cpp @@ -58,12 +58,20 @@ namespace blt::gp void gp_program::create_threads() { +#ifdef BLT_TRACK_ALLOCATIONS + tracker.reserve(); +#endif + statistic_history.reserve(config.max_generations + 1); if (config.threads == 0) config.set_thread_count(std::thread::hardware_concurrency()); // main thread is thread0 for (blt::size_t i = 1; i < config.threads; i++) { thread_helper.threads.emplace_back(new std::thread([i, this]() { +#ifdef BLT_TRACK_ALLOCATIONS + tracker.reserve(); + tracker.await_thread_loading_complete(config.threads); +#endif std::function* execution_function = nullptr; while (!should_thread_terminate()) { @@ -83,5 +91,8 @@ namespace blt::gp } })); } +#ifdef BLT_TRACK_ALLOCATIONS + tracker.await_thread_loading_complete(config.threads); +#endif } } \ No newline at end of file diff --git a/src/stats.cpp b/src/stats.cpp index d5be6a7..2ee9322 100644 --- a/src/stats.cpp +++ b/src/stats.cpp @@ -17,8 +17,14 @@ */ #include #include +#include "blt/std/format.h" namespace blt::gp { - + + void allocation_tracker_t::allocation_data_t::pretty_print(const std::string& name) const + { + BLT_TRACE("%s Allocations: %ld times with a total of %s", name.c_str(), getAllocationDifference(), + blt::byte_convert_t(getAllocatedByteDifference()).convert_to_nearest_type().to_pretty_string().c_str()); + } } \ No newline at end of file diff --git a/src/transformers.cpp b/src/transformers.cpp index 5dc65ea..3f4e270 100644 --- a/src/transformers.cpp +++ b/src/transformers.cpp @@ -105,6 +105,9 @@ namespace blt::gp auto copy_ptr_c1 = get_thread_pointer_for_size(c1_total); auto copy_ptr_c2 = get_thread_pointer_for_size(c2_total); + c1_stack.reserve(c1_stack.bytes_in_head() - c1_stack_for_bytes + c2_stack_for_bytes); + c2_stack.reserve(c2_stack.bytes_in_head() - c2_stack_for_bytes + c1_stack_for_bytes); + c1_stack.copy_to(copy_ptr_c1, c1_total); c1_stack.pop_bytes(c1_total); @@ -168,7 +171,7 @@ namespace blt::gp blt::size_t attempted_point = 0; const auto& crossover_point_type = program.get_operator_info(c1_ops[crossover_point].id); - operator_info* attempted_point_type = nullptr; + operator_info_t* attempted_point_type = nullptr; blt::size_t counter = 0; do @@ -456,8 +459,7 @@ namespace blt::gp config.generator.get().generate(tree, {program, replacement_func_info.argument_types[i].id, config.replacement_min_depth, config.replacement_max_depth}); - blt::size_t total_bytes_for = tree.total_value_bytes(); - vals.copy_from(tree.get_values(), total_bytes_for); + vals.insert(tree.get_values()); ops.insert(ops.begin() + static_cast(start_index), tree.get_operations().begin(), tree.get_operations().end()); start_index += tree.get_operations().size();