From 928f73d9e0d0b1bbf6b3012d7980a75ee9c318f6 Mon Sep 17 00:00:00 2001 From: Brett Laptop Date: Fri, 16 Aug 2024 19:38:27 -0400 Subject: [PATCH] add new advanced mutation operator --- CMakeLists.txt | 6 +- examples/symbolic_regression.cpp | 2 +- include/blt/gp/transformers.h | 67 ++++- include/blt/gp/tree.h | 1 - lib/blt | 2 +- src/program.cpp | 2 +- src/transformers.cpp | 427 +++++++++++++++++++++++++++++++ src/tree.cpp | 13 + tests/stack_tests.cpp | 2 +- 9 files changed, 513 insertions(+), 9 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 0fd941e..0fe62b0 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1,5 +1,5 @@ cmake_minimum_required(VERSION 3.25) -project(blt-gp VERSION 0.0.143) +project(blt-gp VERSION 0.0.144) include(CTest) @@ -24,8 +24,8 @@ file(GLOB_RECURSE PROJECT_BUILD_FILES "${CMAKE_CURRENT_SOURCE_DIR}/src/*.cpp") add_library(blt-gp ${PROJECT_BUILD_FILES}) -target_compile_options(blt-gp PRIVATE -Wall -Wextra -Werror -Wpedantic -Wno-comment) -target_link_options(blt-gp PRIVATE -Wall -Wextra -Werror -Wpedantic -Wno-comment) +target_compile_options(blt-gp PRIVATE -Wall -Wextra -Wpedantic -Wno-comment) +target_link_options(blt-gp PRIVATE -Wall -Wextra -Wpedantic -Wno-comment) target_include_directories(blt-gp PUBLIC include/) diff --git a/examples/symbolic_regression.cpp b/examples/symbolic_regression.cpp index 5dd751b..9eec77c 100644 --- a/examples/symbolic_regression.cpp +++ b/examples/symbolic_regression.cpp @@ -39,7 +39,7 @@ blt::gp::prog_config_t config = blt::gp::prog_config_t() .set_mutation_chance(0.1) .set_reproduction_chance(0) .set_max_generations(50) - .set_pop_size(500) + .set_pop_size(5000) .set_thread_count(0); blt::gp::type_provider type_system; diff --git a/include/blt/gp/transformers.h b/include/blt/gp/transformers.h index da011a3..a2524f1 100644 --- a/include/blt/gp/transformers.h +++ b/include/blt/gp/transformers.h @@ -30,6 +30,30 @@ namespace blt::gp namespace detail { using op_iter = std::vector::iterator; + + template + inline static constexpr double sum(const T& array) + { + double init = 0.0; + for (double i : array) + init += i; + return init; + } + + template + static constexpr std::array aggregate_array(Args... list) + { + std::array data{list...}; + auto total_prob = sum(data); + double sum_of_prob = 0; + for (auto& d : data) + { + auto prob = d / total_prob; + d = prob + sum_of_prob; + sum_of_prob += prob; + } + return data; + } } class crossover_t @@ -104,7 +128,7 @@ namespace blt::gp explicit mutation_t(const config_t& config): config(config) {} - virtual tree_t apply(gp_program& program, const tree_t& p); // NOLINT + virtual tree_t apply(gp_program& program, const tree_t& p); // returns the point after the mutation blt::size_t mutate_point(gp_program& program, tree_t& c, blt::size_t node); @@ -115,6 +139,47 @@ namespace blt::gp config_t config; }; + class advanced_mutation_t : public mutation_t + { + public: + enum class mutation_operator : blt::i32 + { + EXPRESSION, // Generate a new random expression + ADJUST, // adjust the value of the type. (if it is a function it will mutate it to a different one) + SUB_FUNC, // subexpression becomes argument to new random function. Other args are generated. + JUMP_FUNC, // subexpression becomes this new node. Other arguments discarded. + COPY, // node can become copy of another subexpression. + END, // helper + }; + + advanced_mutation_t() = default; + + explicit advanced_mutation_t(const config_t& config): mutation_t(config) + {} + + tree_t apply(gp_program& program, const tree_t& p) final; + + advanced_mutation_t& set_per_node_mutation_chance(double v) + { + per_node_mutation_chance = v; + return *this; + } + + private: + static constexpr auto operators_size = static_cast(mutation_operator::END); + private: + // this value is adjusted inversely to the size of the tree. + double per_node_mutation_chance = 5.0; + + static constexpr std::array mutation_operator_chances = detail::aggregate_array( + 0.1, // EXPRESSION + 0.25, // ADJUST + 0.01, // SUB_FUNC + 0.25, // JUMP_FUNC + 0.12 // COPY + ); +}; + } #endif //BLT_GP_TRANSFORMERS_H diff --git a/include/blt/gp/tree.h b/include/blt/gp/tree.h index 176202e..97eca1d 100644 --- a/include/blt/gp/tree.h +++ b/include/blt/gp/tree.h @@ -40,7 +40,6 @@ namespace blt::gp std::reference_wrapper func; blt::size_t type_size; - //std::reference_wrapper transfer; operator_id id; bool is_value; }; diff --git a/lib/blt b/lib/blt index 6f06647..941aa68 160000 --- a/lib/blt +++ b/lib/blt @@ -1 +1 @@ -Subproject commit 6f06647a21f7f29e99ef8e45e1d3c08db0b46038 +Subproject commit 941aa6809c92f05c64ca6624d5898958cfac496d diff --git a/src/program.cpp b/src/program.cpp index c8eb40c..9f4b023 100644 --- a/src/program.cpp +++ b/src/program.cpp @@ -23,7 +23,7 @@ namespace blt::gp // default static references for mutation, crossover, and initializer // this is largely to not break the tests :3 // it's also to allow for quick setup of a gp program if you don't care how crossover or mutation is handled - static mutation_t s_mutator; + static advanced_mutation_t s_mutator; static crossover_t s_crossover; static ramped_half_initializer_t s_init; diff --git a/src/transformers.cpp b/src/transformers.cpp index e30e034..c13df5d 100644 --- a/src/transformers.cpp +++ b/src/transformers.cpp @@ -295,4 +295,431 @@ namespace blt::gp #endif return begin_point + new_ops_r.size(); } + + tree_t advanced_mutation_t::apply(gp_program& program, const tree_t& p) + { + // child tree + tree_t c = p; + + auto& ops = c.get_operations(); + auto& vals = c.get_values(); + + for (blt::size_t c_node = 0; c_node < ops.size(); c_node++) + { + double node_mutation_chance = per_node_mutation_chance / static_cast(ops.size()); + if (!program.get_random().choice(node_mutation_chance)) + continue; + +#if BLT_DEBUG_LEVEL >= 2 + tree_t c_copy = c; +#endif + + // select an operator to apply + auto selected_point = static_cast(mutation_operator::COPY); + auto choice = program.get_random().get_double(); + for (const auto& [index, value] : blt::enumerate(mutation_operator_chances)) + { + if (index == 0) + { + if (choice <= value) + { + selected_point = static_cast(index); + break; + } + } else + { + if (choice > mutation_operator_chances[index - 1] && choice <= value) + { + selected_point = static_cast(index); + break; + } + } + } + + switch (static_cast(selected_point)) + { + case mutation_operator::EXPRESSION: + c_node += mutate_point(program, c, c_node); + break; + case mutation_operator::ADJUST: + { + // this is going to be evil >:3 + const auto& node = ops[c_node]; + if (!node.is_value) + { + auto& current_func_info = program.get_operator_info(ops[c_node].id); + operator_id random_replacement = program.get_random().select( + program.get_type_non_terminals(current_func_info.return_type.id)); + auto& replacement_func_info = program.get_operator_info(random_replacement); + + // cache memory used for offset data. + thread_local static std::vector children_data; + children_data.clear(); + + c.find_child_extends(program, children_data, c_node, current_func_info.argument_types.size()); + + for (const auto& [index, val] : blt::enumerate(replacement_func_info.argument_types)) + { + // need to generate replacement. + if (index < current_func_info.argument_types.size() && val.id != current_func_info.argument_types[index].id) + { + // TODO: new config? + auto tree = config.generator.get().generate( + {program, val.id, config.replacement_min_depth, config.replacement_max_depth}); + + auto& child = children_data[children_data.size() - 1 - index]; + blt::size_t total_bytes_for = c.total_value_bytes(child.start, child.end); + blt::size_t total_bytes_after = c.total_value_bytes(child.end); + + auto after_ptr = get_thread_pointer_for_size(total_bytes_after); + vals.copy_to(after_ptr, total_bytes_after); + vals.pop_bytes(static_cast(total_bytes_after + total_bytes_for)); + + blt::size_t total_child_bytes = tree.total_value_bytes(); + + vals.copy_from(tree.get_values(), total_child_bytes); + vals.copy_from(after_ptr, total_bytes_after); + + ops.erase(ops.begin() + child.start, ops.begin() + child.end); + ops.insert(ops.begin() + child.start, tree.get_operations().begin(), tree.get_operations().end()); + + // shift over everybody after. + if (index > 0) + { + // don't need to update if the index is the last + for (auto& new_child : blt::iterate(children_data.end() - static_cast(index), + children_data.end())) + { + // remove the old tree size, then add the new tree size to get the correct positions. + new_child.start = + new_child.start - (child.end - child.start) + + static_cast(tree.get_operations().size()); + new_child.end = + new_child.end - (child.end - child.start) + static_cast(tree.get_operations().size()); + } + } + child.end = static_cast(child.start + tree.get_operations().size()); + +#if BLT_DEBUG_LEVEL >= 2 + blt::size_t found_bytes = vals.size().total_used_bytes; + blt::size_t expected_bytes = std::accumulate(ops.begin(), + ops.end(), 0ul, + [](const auto& v1, const auto& v2) { + if (v2.is_value) + return v1 + stack_allocator::aligned_size(v2.type_size); + return v1; + }); + if (found_bytes != expected_bytes) + { + BLT_WARN("Found bytes %ld vs Expected Bytes %ld", found_bytes, expected_bytes); + BLT_ABORT("Amount of bytes in stack doesn't match the number of bytes expected for the operations"); + } +#endif + } + } + + if (current_func_info.argc.argc > replacement_func_info.argc.argc) + { + blt::size_t end_index = children_data[(current_func_info.argc.argc - replacement_func_info.argc.argc) - 1].end; + blt::size_t start_index = children_data.begin()->start; + blt::size_t total_bytes_for = c.total_value_bytes(start_index, end_index); + blt::size_t total_bytes_after = c.total_value_bytes(end_index); + auto* data = get_thread_pointer_for_size(total_bytes_after); + vals.copy_to(data, total_bytes_after); + vals.pop_bytes(static_cast(total_bytes_after + total_bytes_for)); + vals.copy_from(data, total_bytes_after); + ops.erase(ops.begin() + static_cast(start_index), ops.begin() + static_cast(end_index)); + } else if (current_func_info.argc.argc == replacement_func_info.argc.argc) + { + // exactly enough args + // return types should have been replaced if needed. this part should do nothing? + } else + { + // not enough args + blt::size_t start_index = c_node + 1; + blt::size_t total_bytes_after = c.total_value_bytes(start_index); + auto* data = get_thread_pointer_for_size(total_bytes_after); + vals.copy_to(data, total_bytes_after); + vals.pop_bytes(static_cast(total_bytes_after)); + + for (blt::ptrdiff_t i = static_cast(replacement_func_info.argc.argc) - 1; + i >= current_func_info.argc.argc; i--) + { + auto tree = config.generator.get().generate( + {program, replacement_func_info.argument_types[i].id, config.replacement_min_depth, + config.replacement_max_depth}); + blt::size_t total_bytes_for = tree.total_value_bytes(); + vals.copy_from(tree.get_values(), total_bytes_for); + ops.insert(ops.begin() + static_cast(start_index), tree.get_operations().begin(), + tree.get_operations().end()); + start_index += tree.get_operations().size(); + } + vals.copy_from(data, total_bytes_after); + } + // now finally update the type. + ops[c_node] = {replacement_func_info.function, program.get_typesystem().get_type(replacement_func_info.return_type).size(), + random_replacement, program.is_static(random_replacement)}; + } +#if BLT_DEBUG_LEVEL >= 2 + if (!c.check(program, nullptr)) + { + std::cout << "Parent: " << std::endl; + c_copy.print(program, std::cout, false, true); + std::cout << "Child Values:" << std::endl; + c.print(program, std::cout, true, true); + std::cout << std::endl; + BLT_ABORT("Tree Check Failed."); + } +#endif + } + break; + case mutation_operator::SUB_FUNC: + { + auto& current_func_info = program.get_operator_info(ops[c_node].id); + + // need to: + // mutate the current function. + // current function is moved to one of the arguments. + // other arguments are generated. + + // get a replacement which returns the same type. + auto& non_terminals = program.get_type_non_terminals(current_func_info.return_type.id); + if (non_terminals.empty()) + continue; + operator_id random_replacement = program.get_random().select(non_terminals); + blt::size_t arg_position = 0; + do + { + auto& replacement_func_info = program.get_operator_info(random_replacement); + for (const auto& [index, v] : blt::enumerate(replacement_func_info.argument_types)) + { + if (v.id == current_func_info.return_type.id) + { + arg_position = index; + goto exit; + } + } + random_replacement = program.get_random().select(program.get_type_non_terminals(current_func_info.return_type.id)); + } while (true); + exit: + auto& replacement_func_info = program.get_operator_info(random_replacement); + auto new_argc = replacement_func_info.argc.argc; + // replacement function should be valid. let's make a copy of us. + auto current_end = c.find_endpoint(program, static_cast(c_node)); + blt::size_t for_bytes = c.total_value_bytes(c_node, current_end); + blt::size_t after_bytes = c.total_value_bytes(current_end); + auto size = current_end - c_node; + + auto combined_ptr = get_thread_pointer_for_size(for_bytes + after_bytes); + + vals.copy_to(combined_ptr, for_bytes + after_bytes); + vals.pop_bytes(static_cast(for_bytes + after_bytes)); + + blt::size_t start_index = c_node; + for (blt::ptrdiff_t i = new_argc - 1; i > static_cast(arg_position); i--) + { + auto tree = config.generator.get().generate( + {program, replacement_func_info.argument_types[i].id, config.replacement_min_depth, + config.replacement_max_depth}); + blt::size_t total_bytes_for = tree.total_value_bytes(); + vals.copy_from(tree.get_values(), total_bytes_for); + ops.insert(ops.begin() + static_cast(start_index), tree.get_operations().begin(), + tree.get_operations().end()); + start_index += tree.get_operations().size(); + } + start_index += size; + vals.copy_from(combined_ptr, for_bytes); + for (blt::ptrdiff_t i = static_cast(arg_position) - 1; i >= 0; i--) + { + auto tree = config.generator.get().generate( + {program, replacement_func_info.argument_types[i].id, config.replacement_min_depth, + config.replacement_max_depth}); + blt::size_t total_bytes_for = tree.total_value_bytes(); + vals.copy_from(tree.get_values(), total_bytes_for); + ops.insert(ops.begin() + static_cast(start_index), tree.get_operations().begin(), + tree.get_operations().end()); + start_index += tree.get_operations().size(); + } + vals.copy_from(combined_ptr + for_bytes, after_bytes); + + ops.insert(ops.begin() + static_cast(c_node), + {replacement_func_info.function, program.get_typesystem().get_type(replacement_func_info.return_type).size(), + random_replacement, program.is_static(random_replacement)}); + +#if BLT_DEBUG_LEVEL >= 2 + if (!c.check(program, nullptr)) + { + std::cout << "Parent: " << std::endl; + p.print(program, std::cout, false, true); + std::cout << "Child:" << std::endl; + c.print(program, std::cout, false, true); + std::cout << "Child Values:" << std::endl; + c.print(program, std::cout, true, true); + std::cout << std::endl; + BLT_ABORT("Tree Check Failed."); + } +#endif + } + break; + case mutation_operator::JUMP_FUNC: + { + auto& info = program.get_operator_info(ops[c_node].id); + blt::size_t argument_index = -1ul; + for (const auto& [index, v] : blt::enumerate(info.argument_types)) + { + if (v.id == info.return_type.id) + { + argument_index = index; + break; + } + } + if (argument_index == -1ul) + continue; + + static thread_local std::vector child_data; + child_data.clear(); + + c.find_child_extends(program, child_data, c_node, info.argument_types.size()); + + auto child_index = child_data.size() - 1 - argument_index; + auto child = child_data[child_index]; + auto for_bytes = c.total_value_bytes(child.start, child.end); + auto after_bytes = c.total_value_bytes(child_data.back().end); + + auto storage_ptr = get_thread_pointer_for_size(for_bytes + after_bytes); + vals.copy_to(storage_ptr + for_bytes, after_bytes); + vals.pop_bytes(static_cast(after_bytes)); + + for (auto i = static_cast(child_data.size() - 1); i > static_cast(child_index); i--) + { + auto& cc = child_data[i]; + auto bytes = c.total_value_bytes(cc.start, cc.end); + vals.pop_bytes(static_cast(bytes)); + ops.erase(ops.begin() + cc.start, ops.begin() + cc.end); + } + vals.copy_to(storage_ptr, for_bytes); + vals.pop_bytes(static_cast(for_bytes)); + for (auto i = static_cast(child_index - 1); i >= 0; i--) + { + auto& cc = child_data[i]; + auto bytes = c.total_value_bytes(cc.start, cc.end); + vals.pop_bytes(static_cast(bytes)); + ops.erase(ops.begin() + cc.start, ops.begin() + cc.end); + } + ops.erase(ops.begin() + static_cast(c_node)); + vals.copy_from(storage_ptr, for_bytes + after_bytes); + +#if BLT_DEBUG_LEVEL >= 2 + if (!c.check(program, nullptr)) + { + std::cout << "Parent: " << std::endl; + p.print(program, std::cout, false, true); + std::cout << "Child Values:" << std::endl; + c.print(program, std::cout, true, true); + std::cout << std::endl; + BLT_ABORT("Tree Check Failed."); + } +#endif + } + break; + case mutation_operator::COPY: + { + auto& info = program.get_operator_info(ops[c_node].id); + blt::size_t pt = -1ul; + blt::size_t pf = -1ul; + for (const auto& [index, v] : blt::enumerate(info.argument_types)) + { + for (blt::size_t i = index + 1; i < info.argument_types.size(); i++) + { + auto& v1 = info.argument_types[i]; + if (v == v1) + { + if (pt == -1ul) + pt = index; + else + pf = index; + break; + } + } + if (pt != -1ul && pf != -1ul) + break; + } + if (pt == -1ul || pf == -1ul) + continue; + + blt::size_t from = 0; + blt::size_t to = 0; + + if (program.get_random().choice()) + { + from = pt; + to = pf; + } else + { + from = pf; + to = pt; + } + + static thread_local std::vector child_data; + child_data.clear(); + + c.find_child_extends(program, child_data, c_node, info.argument_types.size()); + + auto from_index = child_data.size() - 1 - from; + auto to_index = child_data.size() - 1 - to; + auto& from_child = child_data[from_index]; + auto& to_child = child_data[to_index]; + blt::size_t from_bytes = c.total_value_bytes(from_child.start, from_child.end); + blt::size_t after_from_bytes = c.total_value_bytes(from_child.end); + blt::size_t to_bytes = c.total_value_bytes(to_child.start, to_child.end); + blt::size_t after_to_bytes = c.total_value_bytes(to_child.end); + + auto after_bytes = std::max(after_from_bytes, after_to_bytes); + + auto from_ptr = get_thread_pointer_for_size(from_bytes); + auto after_ptr = get_thread_pointer_for_size(after_bytes); + + vals.copy_to(after_ptr, after_from_bytes); + vals.pop_bytes(static_cast(after_from_bytes)); + vals.copy_to(from_ptr, from_bytes); + vals.copy_from(after_ptr, after_from_bytes); + + vals.copy_to(after_ptr, after_to_bytes); + vals.pop_bytes(static_cast(after_to_bytes + to_bytes)); + + vals.copy_from(from_ptr, from_bytes); + vals.copy_from(after_ptr, after_to_bytes); + + static std::vector op_copy; + op_copy.clear(); + op_copy.insert(op_copy.begin(), ops.begin() + from_child.start, ops.begin() + from_child.end); + + ops.erase(ops.begin() + to_child.start, ops.begin() + to_child.end); + ops.insert(ops.begin() + to_child.start, op_copy.begin(), op_copy.end()); + } + break; + case mutation_operator::END: + default: +#if BLT_DEBUG_LEVEL > 1 + BLT_ABORT("You shouldn't be able to get here!"); +#else + BLT_UNREACHABLE; +#endif + } + } + +#if BLT_DEBUG_LEVEL >= 2 + if (!c.check(program, nullptr)) + { + std::cout << "Parent: " << std::endl; + p.print(program, std::cout, false, true); + std::cout << "Child Values:" << std::endl; + c.print(program, std::cout, true, true); + std::cout << std::endl; + BLT_ABORT("Tree Check Failed."); + } +#endif + + return c; + } } \ No newline at end of file diff --git a/src/tree.cpp b/src/tree.cpp index 9ec026f..46cd827 100644 --- a/src/tree.cpp +++ b/src/tree.cpp @@ -24,6 +24,19 @@ namespace blt::gp { + // this one will copy previous bytes over + template + blt::span get_pointer_for_size(blt::size_t size) + { + static blt::span buffer{nullptr, 0}; + if (buffer.size() < size) + { + delete[] buffer.data(); + buffer = {new blt::u8[size], size}; + } + return buffer; + } + inline auto empty_callable = detail::callable_t( [](void*, stack_allocator&, stack_allocator&, detail::bitmask_t*) { BLT_ABORT("This should never be called!"); }); diff --git a/tests/stack_tests.cpp b/tests/stack_tests.cpp index 10e9995..b50ab24 100644 --- a/tests/stack_tests.cpp +++ b/tests/stack_tests.cpp @@ -33,7 +33,7 @@ struct log_box ~log_box() { - for (auto& _ : text) + for ([[maybe_unused]] auto& _ : text) logger << '-'; logger << '\n'; }