From a58fe64c0e74c29639c178d90a9bfd3b830b4202 Mon Sep 17 00:00:00 2001 From: Brett Laptop Date: Tue, 21 Jan 2025 21:20:16 -0500 Subject: [PATCH] fix copy op, change selection behaviour --- CMakeLists.txt | 2 +- examples/src/rice_classification.cpp | 2 +- include/blt/gp/program.h | 29 ++- include/blt/gp/transformers.h | 270 ++++++++++++++------------- lib/blt | 2 +- src/transformers.cpp | 142 ++++++-------- src/tree.cpp | 1 - 7 files changed, 210 insertions(+), 238 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 58ec75b..d7999b0 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -27,7 +27,7 @@ macro(compile_options target_name) sanitizers(${target_name}) endmacro() -project(blt-gp VERSION 0.3.30) +project(blt-gp VERSION 0.3.31) include(CTest) diff --git a/examples/src/rice_classification.cpp b/examples/src/rice_classification.cpp index 86e29cf..8f8b95b 100644 --- a/examples/src/rice_classification.cpp +++ b/examples/src/rice_classification.cpp @@ -35,7 +35,7 @@ blt::gp::prog_config_t config = blt::gp::prog_config_t() .set_initial_min_tree_size(2) .set_initial_max_tree_size(6) .set_elite_count(2) - .set_crossover_chance(0.9) + .set_crossover_chance(0.8) .set_mutation_chance(0.1) .set_reproduction_chance(0) .set_max_generations(50) diff --git a/include/blt/gp/program.h b/include/blt/gp/program.h index 84092f8..3a699e8 100644 --- a/include/blt/gp/program.h +++ b/include/blt/gp/program.h @@ -568,9 +568,9 @@ namespace blt::gp while (thread_helper.next_gen_left > 0) { - blt::size_t size = 0; - blt::size_t begin = 0; - blt::size_t end = thread_helper.next_gen_left.load(std::memory_order_relaxed); + size_t size = 0; + size_t begin = 0; + size_t end = thread_helper.next_gen_left.load(std::memory_order_relaxed); do { size = std::min(end, config.evaluation_size); @@ -766,26 +766,24 @@ namespace blt::gp { if (get_random().choice(selection_probabilities.crossover_chance)) { - // if (c2 == nullptr) - // return 0; - // auto ptr = c2; - // if (ptr == nullptr) - // ptr = &tree_t::get_thread_local(*this); + auto ptr = c2; + if (ptr == nullptr) + ptr = &tree_t::get_thread_local(*this); #ifdef BLT_TRACK_ALLOCATIONS auto state = tracker.start_measurement_thread_local(); #endif const tree_t* p1; const tree_t* p2; size_t runs = 0; - tree_t tree{*this}; do { // BLT_TRACE("%lu %p %p", runs, &c1, &tree); p1 = &crossover.select(*this, current_pop); p2 = &crossover.select(*this, current_pop); + // BLT_TRACE("%p %p || %lu", p1, p2, current_pop.get_individuals().size()); c1.copy_fast(*p1); - tree.copy_fast(*p2); + ptr->copy_fast(*p2); // ptr->copy_fast(*p2); if (++runs >= config.crossover.get().get_config().max_crossover_iterations) @@ -794,7 +792,7 @@ namespace blt::gp crossover_calls.value(1); #endif } - while (!config.crossover.get().apply(*this, *p1, *p2, c1, tree)); + while (!config.crossover.get().apply(*this, *p1, *p2, c1, *ptr)); #ifdef BLT_TRACK_ALLOCATIONS tracker.stop_measurement_thread_local(state); crossover_calls.call(); @@ -804,10 +802,11 @@ namespace blt::gp crossover_allocations.set_value(std::max(crossover_allocations.get_value(), state.getAllocatedByteDifference())); } #endif - // if (c2 == nullptr) - // tree_t::get_thread_local(*this); - if (c2 != nullptr) - *c2 = tree; + if (c2 == nullptr) + { + tree_t::get_thread_local(*this).clear(*this); + return 1; + } return 2; } if (get_random().choice(selection_probabilities.mutation_chance)) diff --git a/include/blt/gp/transformers.h b/include/blt/gp/transformers.h index 3287db5..2da1aed 100644 --- a/include/blt/gp/transformers.h +++ b/include/blt/gp/transformers.h @@ -30,16 +30,16 @@ namespace blt::gp { namespace detail { - template + template inline static constexpr double sum(const T& array) { double init = 0.0; - for (double i : array) + for (const double i : array) init += i; return init; } - - template + + template static constexpr std::array aggregate_array(Args... list) { std::array data{list...}; @@ -54,145 +54,151 @@ namespace blt::gp return data; } } - + class crossover_t { - public: - struct point_info_t - { - ptrdiff_t point; - operator_info_t& type_operator_info; - }; - struct crossover_point_t - { - tree_t::subtree_point_t p1_crossover_point; - tree_t::subtree_point_t p2_crossover_point; - }; - struct config_t - { - // number of times crossover will try to pick a valid point in the tree. this is purely based on the return type of the operators - u32 max_crossover_tries = 5; - // how many times the crossover function can fail before we will skip this operation. - u32 max_crossover_iterations = 10; - // if tree have fewer nodes than this number, they will not be considered for crossover - // should be at least 5 as crossover will not select the root node. - u32 min_tree_size = 5; - // used by the traverse version of get_crossover_point - // at each depth level, what chance do we have to exit with this as our point? or in other words what's the chance we continue traversing - // this is what this option configures. - f32 depth_multiplier = 0.5; - // how often should we select terminals over functions. By default, we only allow selection of terminals 10% of the time - // this applies to both types of crossover point functions. Traversal will use the parent if it should not pick a terminal. - f32 terminal_chance = 0.1; - // use traversal to select point instead of random selection - bool traverse = false; - }; - - crossover_t() = default; - - explicit crossover_t(const config_t& config): config(config) - {} + public: + struct point_info_t + { + ptrdiff_t point; + operator_info_t& type_operator_info; + }; - [[nodiscard]] const config_t& get_config() const - { - return config; - } - - std::optional get_crossover_point(const tree_t& c1, const tree_t& c2) const; - - std::optional get_crossover_point_traverse(const tree_t& c1, const tree_t& c2) const; - - /** - * child1 and child2 are copies of the parents, the result of selecting a crossover point and performing standard subtree crossover. - * the parents are not modified during this process - * @param program reference to the global program container responsible for managing these trees - * @param p1 reference to the first parent - * @param p2 reference to the second parent - * @return expected pair of child otherwise returns error enum - */ - virtual bool apply(gp_program& program, const tree_t& p1, const tree_t& p2, tree_t& c1, tree_t& c2); // NOLINT - - virtual ~crossover_t() = default; - - protected: - [[nodiscard]] std::optional get_point_traverse_retry(const tree_t& t, std::optional type) const; - - config_t config; + struct crossover_point_t + { + tree_t::subtree_point_t p1_crossover_point; + tree_t::subtree_point_t p2_crossover_point; + }; + + struct config_t + { + // number of times crossover will try to pick a valid point in the tree. this is purely based on the return type of the operators + u32 max_crossover_tries = 5; + // how many times the crossover function can fail before we will skip this operation. + u32 max_crossover_iterations = 10; + // if tree have fewer nodes than this number, they will not be considered for crossover + // should be at least 5 as crossover will not select the root node. + u32 min_tree_size = 5; + // used by the traverse version of get_crossover_point + // at each depth level, what chance do we have to exit with this as our point? or in other words what's the chance we continue traversing + // this is what this option configures. + f32 depth_multiplier = 0.5; + // how often should we select terminals over functions. By default, we only allow selection of terminals 10% of the time + // this applies to both types of crossover point functions. Traversal will use the parent if it should not pick a terminal. + f32 terminal_chance = 0.1; + // use traversal to select point instead of random selection + bool traverse = false; + }; + + crossover_t() = default; + + explicit crossover_t(const config_t& config): config(config) + { + } + + [[nodiscard]] const config_t& get_config() const + { + return config; + } + + std::optional get_crossover_point(const tree_t& c1, const tree_t& c2) const; + + std::optional get_crossover_point_traverse(const tree_t& c1, const tree_t& c2) const; + + /** + * child1 and child2 are copies of the parents, the result of selecting a crossover point and performing standard subtree crossover. + * the parents are not modified during this process + * @param program reference to the global program container responsible for managing these trees + * @param p1 reference to the first parent + * @param p2 reference to the second parent + * @return expected pair of child otherwise returns error enum + */ + virtual bool apply(gp_program& program, const tree_t& p1, const tree_t& p2, tree_t& c1, tree_t& c2); // NOLINT + + virtual ~crossover_t() = default; + + protected: + [[nodiscard]] std::optional get_point_traverse_retry(const tree_t& t, std::optional type) const; + + config_t config; }; - + class mutation_t { - public: - struct config_t + public: + struct config_t + { + blt::size_t replacement_min_depth = 2; + blt::size_t replacement_max_depth = 6; + + std::reference_wrapper generator; + + config_t(tree_generator_t& generator): generator(generator) // NOLINT { - blt::size_t replacement_min_depth = 2; - blt::size_t replacement_max_depth = 6; - - std::reference_wrapper generator; - - config_t(tree_generator_t& generator): generator(generator) // NOLINT - {} - - config_t(); - }; - - mutation_t() = default; - - explicit mutation_t(const config_t& config): config(config) - {} - - virtual bool apply(gp_program& program, const tree_t& p, tree_t& c); - - // returns the point after the mutation - size_t mutate_point(gp_program& program, tree_t& c, tree_t::subtree_point_t node) const; - - virtual ~mutation_t() = default; - - protected: - config_t config; + } + + config_t(); + }; + + mutation_t() = default; + + explicit mutation_t(const config_t& config): config(config) + { + } + + virtual bool apply(gp_program& program, const tree_t& p, tree_t& c); + + // returns the point after the mutation + size_t mutate_point(gp_program& program, tree_t& c, tree_t::subtree_point_t node) const; + + virtual ~mutation_t() = default; + + protected: + config_t config; }; - + class advanced_mutation_t : public mutation_t { - public: - enum class mutation_operator : blt::i32 - { - EXPRESSION, // Generate a new random expression - ADJUST, // adjust the value of the type. (if it is a function it will mutate it to a different one) - SUB_FUNC, // subexpression becomes argument to new random function. Other args are generated. - JUMP_FUNC, // subexpression becomes this new node. Other arguments discarded. - COPY, // node can become copy of another subexpression. - END, // helper - }; - - advanced_mutation_t() = default; - - explicit advanced_mutation_t(const config_t& config): mutation_t(config) - {} - - bool apply(gp_program& program, const tree_t& p, tree_t& c) final; - - advanced_mutation_t& set_per_node_mutation_chance(double v) - { - per_node_mutation_chance = v; - return *this; - } - - private: - static constexpr auto operators_size = static_cast(mutation_operator::END); - private: - // this value is adjusted inversely to the size of the tree. - double per_node_mutation_chance = 5.0; - - static constexpr std::array mutation_operator_chances = detail::aggregate_array( - 0.25, // EXPRESSION - 0.15, // ADJUST - 0.01, // SUB_FUNC - 0.01, // JUMP_FUNC - 0.05 // COPY - ); + public: + enum class mutation_operator : i32 + { + EXPRESSION, // Generate a new random expression + ADJUST, // adjust the value of the type. (if it is a function it will mutate it to a different one) + SUB_FUNC, // subexpression becomes argument to new random function. Other args are generated. + JUMP_FUNC, // subexpression becomes this new node. Other arguments discarded. + COPY, // node can become copy of another subexpression. + END, // helper + }; + + advanced_mutation_t() = default; + + explicit advanced_mutation_t(const config_t& config): mutation_t(config) + { + } + + bool apply(gp_program& program, const tree_t& p, tree_t& c) final; + + advanced_mutation_t& set_per_node_mutation_chance(double v) + { + per_node_mutation_chance = v; + return *this; + } + + private: + static constexpr auto operators_size = static_cast(mutation_operator::END); + + private: + // this value is adjusted inversely to the size of the tree. + double per_node_mutation_chance = 5.0; + + static constexpr std::array mutation_operator_chances = detail::aggregate_array( + 0.25, // EXPRESSION + 0.20, // ADJUST + 0.05, // SUB_FUNC + 0.15, // JUMP_FUNC + 0.10 // COPY + ); }; - } #endif //BLT_GP_TRANSFORMERS_H diff --git a/lib/blt b/lib/blt index 74c1010..baa5952 160000 --- a/lib/blt +++ b/lib/blt @@ -1 +1 @@ -Subproject commit 74c1010118c3ae13f27499f564ce477b23ae0b0a +Subproject commit baa5952666594ce0d07a2b013e46c4bc343ba164 diff --git a/src/transformers.cpp b/src/transformers.cpp index 88b2e9c..511ad74 100644 --- a/src/transformers.cpp +++ b/src/transformers.cpp @@ -26,7 +26,7 @@ namespace blt::gp { -#if BLT_DEBUG_LEVEL >= 2 +#if BLT_DEBUG_LEVEL >= 2 || defined(BLT_TRACK_ALLOCATIONS) std::atomic_uint64_t mutate_point_counter = 0; std::atomic_uint64_t mutate_expression_counter = 0; std::atomic_uint64_t mutate_adjust_counter = 0; @@ -36,35 +36,31 @@ namespace blt::gp inline void print_mutate_stats() { - std::cerr << "Mutation statistics:" << std::endl; + std::cerr << "Mutation statistics (Total: " << (mutate_point_counter + mutate_expression_counter + mutate_adjust_counter + + mutate_sub_func_counter + mutate_jump_counter + mutate_copy_counter) << "):" << std::endl; std::cerr << "\tSuccessful Point Mutations: " << mutate_point_counter << std::endl; std::cerr << "\tSuccessful Expression Mutations: " << mutate_expression_counter << std::endl; std::cerr << "\tSuccessful Adjust Mutations: " << mutate_adjust_counter << std::endl; std::cerr << "\tSuccessful Sub Func Mutations: " << mutate_sub_func_counter << std::endl; - std::cerr << "\tSuccessful Func Jump Mutations: " << mutate_jump_counter << std::endl; + std::cerr << "\tSuccessful Jump Mutations: " << mutate_jump_counter << std::endl; std::cerr << "\tSuccessful Copy Mutations: " << mutate_copy_counter << std::endl; } +#ifdef BLT_TRACK_ALLOCATIONS + + struct run_me_baby + { + ~run_me_baby() + { + print_mutate_stats(); + } + }; + + run_me_baby this_will_run_when_program_exits; +#endif #endif grow_generator_t grow_generator; - inline tree_t& get_static_tree_tl(gp_program& program) - { - thread_local tree_t new_tree{program}; - new_tree.clear(program); - return new_tree; - } - - // TODO: consolidate the two copies of this. other is in tree.cpp - template - static u8* get_thread_pointer_for_size(const size_t bytes) - { - thread_local expanding_buffer buffer; - if (bytes > buffer.size()) - buffer.resize(bytes); - return buffer.data(); - } - mutation_t::config_t::config_t(): generator(grow_generator) { } @@ -147,7 +143,7 @@ namespace blt::gp size_t mutation_t::mutate_point(gp_program& program, tree_t& c, const tree_t::subtree_point_t node) const { - auto& new_tree = get_static_tree_tl(program); + auto& new_tree = tree_t::get_thread_local(program); config.generator.get().generate(new_tree, {program, node.type, config.replacement_min_depth, config.replacement_max_depth}); c.replace_subtree(node, new_tree); @@ -159,6 +155,8 @@ namespace blt::gp print_mutate_stats(); throw std::runtime_error("Mutate Point tree check failed"); } +#endif +#if defined(BLT_TRACK_ALLOCATIONS) || BLT_DEBUG_LEVEL >= 2 ++mutate_point_counter; #endif return node.pos + new_tree.size(); @@ -177,23 +175,13 @@ namespace blt::gp // select an operator to apply auto selected_point = static_cast(mutation_operator::COPY); auto choice = program.get_random().get_double(); - for (const auto& [index, value] : blt::enumerate(mutation_operator_chances)) + + for (const auto& [index, value] : enumerate(mutation_operator_chances)) { - if (index == 0) + if (choice <= value) { - if (choice <= value) - { - selected_point = static_cast(index); - break; - } - } - else - { - if (choice > mutation_operator_chances[index - 1] && choice <= value) - { - selected_point = static_cast(index); - break; - } + selected_point = static_cast(index); + break; } } @@ -201,7 +189,7 @@ namespace blt::gp { case mutation_operator::EXPRESSION: c_node += mutate_point(program, c, c.subtree_from_point(static_cast(c_node))); -#if BLT_DEBUG_LEVEL >= 2 +#if BLT_TRACK_ALLOCATIONS || BLT_DEBUG_LEVEL >= 2 ++mutate_expression_counter; #endif break; @@ -228,7 +216,7 @@ namespace blt::gp if (index < current_func_info.argument_types.size() && val.id != current_func_info.argument_types[index].id) { // TODO: new config? - auto& tree = get_static_tree_tl(program); + auto& tree = tree_t::get_thread_local(program); config.generator.get().generate(tree, {program, val.id, config.replacement_min_depth, config.replacement_max_depth}); @@ -251,15 +239,6 @@ namespace blt::gp } } child_end = static_cast(child_start + tree.size()); - -#if BLT_DEBUG_LEVEL >= 2 - if (!c.check(detail::debug::context_ptr)) - { - print_mutate_stats(); - throw std::runtime_error("Adjust Tree check failed"); - } - ++mutate_adjust_counter; -#endif } } @@ -285,7 +264,7 @@ namespace blt::gp for (ptrdiff_t i = static_cast(replacement_func_info.argc.argc) - 1; i >= current_func_info.argc.argc; i--) { - auto& tree = get_static_tree_tl(program); + auto& tree = tree_t::get_thread_local(program); config.generator.get().generate(tree, { program, replacement_func_info.argument_types[i].id, config.replacement_min_depth, @@ -308,6 +287,8 @@ namespace blt::gp print_mutate_stats(); BLT_ABORT("Adjust Tree Check Failed."); } +#endif +#if defined(BLT_TRACK_ALLOCATIONS) || BLT_DEBUG_LEVEL >= 2 ++mutate_adjust_counter; #endif } @@ -358,7 +339,7 @@ namespace blt::gp size_t start_index = c_node; for (ptrdiff_t i = new_argc - 1; i > static_cast(arg_position); i--) { - auto& tree = get_static_tree_tl(program); + auto& tree = tree_t::get_thread_local(program); config.generator.get().generate(tree, { program, replacement_func_info.argument_types[i].id, config.replacement_min_depth, @@ -370,7 +351,7 @@ namespace blt::gp // vals.copy_from(combined_ptr, for_bytes); for (blt::ptrdiff_t i = static_cast(arg_position) - 1; i >= 0; i--) { - auto& tree = get_static_tree_tl(program); + auto& tree = tree_t::get_thread_local(program); config.generator.get().generate(tree, { program, replacement_func_info.argument_types[i].id, config.replacement_min_depth, @@ -397,6 +378,8 @@ namespace blt::gp print_mutate_stats(); BLT_ABORT("SUB_FUNC Tree Check Failed."); } +#endif +#if defined(BLT_TRACK_ALLOCATIONS) || BLT_DEBUG_LEVEL >= 2 ++mutate_sub_func_counter; #endif } @@ -469,6 +452,8 @@ namespace blt::gp print_mutate_stats(); BLT_ABORT("JUMP_FUNC Tree Check Failed."); } +#endif +#if defined(BLT_TRACK_ALLOCATIONS) || BLT_DEBUG_LEVEL >= 2 ++mutate_jump_counter; #endif } @@ -476,55 +461,36 @@ namespace blt::gp case mutation_operator::COPY: { auto& info = program.get_operator_info(c.get_operator(c_node).id()); - size_t pt = -1ul; - size_t pf = -1ul; - for (const auto& [index, v] : blt::enumerate(info.argument_types)) - { - for (size_t i = index + 1; i < info.argument_types.size(); i++) - { - auto& v1 = info.argument_types[i]; - if (v == v1) - { - if (pt == -1ul) - pt = index; - else - pf = index; - break; - } - } - if (pt != -1ul && pf != -1ul) - break; - } - if (pt == -1ul || pf == -1ul) + if (c.get_operator(c_node).is_value()) continue; + thread_local tracked_vector potential_indexes; + potential_indexes.clear(); - size_t from = 0; - size_t to = 0; - - if (program.get_random().choice()) + const auto from_index = program.get_random().get_u64(0, info.argument_types.size()); + for (const auto [index, type] : enumerate(info.argument_types)) { - from = pt; - to = pf; - } - else - { - from = pf; - to = pt; + if (index == from_index) + continue; + if (info.argument_types[from_index] == type) + potential_indexes.push_back(index); } + if (potential_indexes.empty()) + continue; + const auto to_index = program.get_random().select(potential_indexes); thread_local tracked_vector child_data; child_data.clear(); c.find_child_extends(child_data, c_node, info.argument_types.size()); - auto from_index = child_data.size() - 1 - from; - auto to_index = child_data.size() - 1 - to; - auto& from_child = child_data[from_index]; - auto& to_child = child_data[to_index]; + const auto child_from_index = child_data.size() - 1 - from_index; + const auto child_to_index = child_data.size() - 1 - to_index; + const auto& [from_start, from_end] = child_data[child_from_index]; + const auto& [to_start, to_end] = child_data[child_to_index]; thread_local tree_t copy_tree{program}; - c.copy_subtree(tree_t::subtree_point_t{from_child.start}, from_child.end, copy_tree); - c.replace_subtree(tree_t::subtree_point_t{to_child.start}, to_child.end, copy_tree); + c.copy_subtree(tree_t::subtree_point_t{from_start}, from_end, copy_tree); + c.replace_subtree(tree_t::subtree_point_t{to_start}, to_end, copy_tree); copy_tree.clear(program); #if BLT_DEBUG_LEVEL >= 2 @@ -538,6 +504,8 @@ namespace blt::gp print_mutate_stats(); BLT_ABORT("COPY Tree Check Failed."); } +#endif +#if defined(BLT_TRACK_ALLOCATIONS) || BLT_DEBUG_LEVEL >= 2 ++mutate_copy_counter; #endif diff --git a/src/tree.cpp b/src/tree.cpp index 3f768bc..f45a6a7 100644 --- a/src/tree.cpp +++ b/src/tree.cpp @@ -501,7 +501,6 @@ namespace blt::gp tree_t& tree_t::get_thread_local(gp_program& program) { thread_local tree_t tree{program}; - tree.clear(program); return tree; }