diff --git a/.idea/editor.xml b/.idea/editor.xml index 04cdbc9..521c365 100644 --- a/.idea/editor.xml +++ b/.idea/editor.xml @@ -1,246 +1,249 @@ - \ No newline at end of file diff --git a/.idea/inspectionProfiles/Project_Default.xml b/.idea/inspectionProfiles/Project_Default.xml new file mode 100644 index 0000000..38dc036 --- /dev/null +++ b/.idea/inspectionProfiles/Project_Default.xml @@ -0,0 +1,8 @@ + + + + \ No newline at end of file diff --git a/CMakeLists.txt b/CMakeLists.txt index 8dec521..7c22918 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -27,7 +27,7 @@ macro(compile_options target_name) sanitizers(${target_name}) endmacro() -project(blt-gp VERSION 0.4.9) +project(blt-gp VERSION 0.5.28) include(CTest) @@ -37,7 +37,8 @@ option(ENABLE_TSAN "Enable the thread data race sanitizer" OFF) option(BUILD_EXAMPLES "Build example programs. This will build with CTest" OFF) option(BUILD_GP_TESTS "Build test programs." OFF) option(DEBUG_LEVEL "Enable debug features which prints extra information to the console, might slow processing down. [0, 3)" 0) -option(TRACK_ALLOCATIONS "Track total allocations. Can be accessed with blt::gp::tracker" OFF) +option(BLT_GP_DEBUG_CHECK_TREES "Enable checking of trees after every operation" OFF) +option(BLT_GP_DEBUG_TRACK_ALLOCATIONS "Track total allocations. Can be accessed with blt::gp::tracker" OFF) set(CMAKE_CXX_STANDARD 17) @@ -122,5 +123,6 @@ if (${BUILD_GP_TESTS}) blt_add_project(blt-symbolic-regression tests/symbolic_regression_test.cpp test) blt_add_project(blt-drop tests/drop_test.cpp test) blt_add_project(blt-drop-2-type tests/2_type_drop_test.cpp test) + blt_add_project(blt-serialization tests/serialization_test.cpp test) endif () \ No newline at end of file diff --git a/CrossoverOperatorApplication.cpp b/CrossoverOperatorApplication.cpp new file mode 100644 index 0000000..b30ac99 --- /dev/null +++ b/CrossoverOperatorApplication.cpp @@ -0,0 +1,230 @@ +bool type_aware_crossover_t::apply(gp_program& program, const tree_t& p1, const tree_t& p2, tree_t& c1, tree_t& c2) +{ + if (p1.size() < config.min_tree_size || p2.size() < config.min_tree_size) + return false; + + tree_t::subtree_point_t point1, point2; + if (config.traverse) + { + point1 = p1.select_subtree_traverse(config.terminal_chance, config.depth_multiplier); + if (const auto val = p2.select_subtree_traverse(point1.type, config.max_crossover_tries, config.terminal_chance, config.depth_multiplier)) + point2 = *val; + else + return false; + } else + { + point1 = p1.select_subtree(config.terminal_chance); + if (const auto val = p2.select_subtree(point1.type, config.max_crossover_tries, config.terminal_chance)) + point2 = *val; + else + return false; + } + + const auto& p1_operator = p1.get_operator(point1.pos); + const auto& p2_operator = p2.get_operator(point2.pos); + + // If either is a terminal (value), just do normal subtree crossover + if (p1_operator.is_value() || p2_operator.is_value()) + { + c1.swap_subtrees(point1, c2, point2); + return true; + } + + const auto& p1_info = program.get_operator_info(p1_operator.id()); + const auto& p2_info = program.get_operator_info(p2_operator.id()); + + // Find the child subtrees of both operators + thread_local tracked_vector children_data_p1; + thread_local tracked_vector children_data_p2; + children_data_p1.clear(); + children_data_p2.clear(); + + p1.find_child_extends(children_data_p1, point1.pos, p1_info.argument_types.size()); + p2.find_child_extends(children_data_p2, point2.pos, p2_info.argument_types.size()); + + // Check if all types are identical but possibly in different order + bool same_types_different_order = p1_info.argument_types.size() == p2_info.argument_types.size(); + + if (same_types_different_order) + { + // Create frequency counts of types in both operators + std::unordered_map type_counts_p1; + std::unordered_map type_counts_p2; + + for (const auto& type : p1_info.argument_types) + type_counts_p1[type.id]++; + + for (const auto& type : p2_info.argument_types) + type_counts_p2[type.id]++; + + // Check if the type counts match + for (const auto& [type, count] : type_counts_p1) + { + if (type_counts_p2[type] != count) + { + same_types_different_order = false; + break; + } + } + } + + if (same_types_different_order) + { + // Create a mapping from p1's argument positions to p2's positions + std::vector arg_mapping(p1_info.argument_types.size(), (size_t)-1); + std::vector p2_used(p2_info.argument_types.size(), false); + + // First pass: match exact types in order + for (size_t i = 0; i < p1_info.argument_types.size(); i++) + { + for (size_t j = 0; j < p2_info.argument_types.size(); j++) + { + if (!p2_used[j] && p1_info.argument_types[i].id == p2_info.argument_types[j].id) + { + arg_mapping[i] = j; + p2_used[j] = true; + break; + } + } + } + + // Copy operators first + auto& c1_temp = tree_t::get_thread_local(program); + auto& c2_temp = tree_t::get_thread_local(program); + c1_temp.clear(program); + c2_temp.clear(program); + + // Create new operators with the same return types + c1_temp.insert_operator({ + program.get_typesystem().get_type(p2_info.return_type).size(), + p2_operator.id(), + program.is_operator_ephemeral(p2_operator.id()), + program.get_operator_flags(p2_operator.id()) + }); + + c2_temp.insert_operator({ + program.get_typesystem().get_type(p1_info.return_type).size(), + p1_operator.id(), + program.is_operator_ephemeral(p1_operator.id()), + program.get_operator_flags(p1_operator.id()) + }); + + // Copy child subtrees according to the mapping + for (size_t i = 0; i < p1_info.argument_types.size(); i++) + { + auto& p1_child = children_data_p1[i]; + auto& p2_child = children_data_p2[arg_mapping[i]]; + + tree_t p1_subtree(program); + tree_t p2_subtree(program); + + p1.copy_subtree(tree_t::subtree_point_t(p1_child.start), p1_child.end, p1_subtree); + p2.copy_subtree(tree_t::subtree_point_t(p2_child.start), p2_child.end, p2_subtree); + + c1_temp.insert_subtree(tree_t::subtree_point_t(c1_temp.size()), p2_subtree); + c2_temp.insert_subtree(tree_t::subtree_point_t(c2_temp.size()), p1_subtree); + } + + // Replace the original subtrees with our new reordered ones + c1.replace_subtree(point1, c1_temp); + c2.replace_subtree(point2, c2_temp); + } + else + { + // If types don't match exactly, fall back to simple operator swap + // but we need to ensure the children are compatible + + // Create new operators with swapped operators but appropriate children + auto& c1_temp = tree_t::get_thread_local(program); + auto& c2_temp = tree_t::get_thread_local(program); + c1_temp.clear(program); + c2_temp.clear(program); + + c1_temp.insert_operator({ + program.get_typesystem().get_type(p2_info.return_type).size(), + p2_operator.id(), + program.is_operator_ephemeral(p2_operator.id()), + program.get_operator_flags(p2_operator.id()) + }); + + c2_temp.insert_operator({ + program.get_typesystem().get_type(p1_info.return_type).size(), + p1_operator.id(), + program.is_operator_ephemeral(p1_operator.id()), + program.get_operator_flags(p1_operator.id()) + }); + + // Create a mapping of which children we can reuse and which need to be regenerated + for (size_t i = 0; i < p2_info.argument_types.size(); i++) + { + const auto& needed_type = p2_info.argument_types[i]; + bool found_match = false; + + // Try to find a matching child from p1 + for (size_t j = 0; j < p1_info.argument_types.size(); j++) + { + if (needed_type.id == p1_info.argument_types[j].id) + { + // Copy this child subtree from p1 + auto& p1_child = children_data_p1[j]; + tree_t p1_subtree(program); + p1.copy_subtree(tree_t::subtree_point_t(p1_child.start), p1_child.end, p1_subtree); + c1_temp.insert_subtree(tree_t::subtree_point_t(c1_temp.size()), p1_subtree); + found_match = true; + break; + } + } + + if (!found_match) + { + // If no matching child, we need to generate a new subtree of the correct type + auto& tree = tree_t::get_thread_local(program); + tree.clear(program); + config.generator.get().generate(tree, {program, needed_type.id, config.replacement_min_depth, config.replacement_max_depth}); + c1_temp.insert_subtree(tree_t::subtree_point_t(c1_temp.size()), tree); + } + } + + // Do the same for the other direction (c2) + for (size_t i = 0; i < p1_info.argument_types.size(); i++) + { + const auto& needed_type = p1_info.argument_types[i]; + bool found_match = false; + + // Try to find a matching child from p2 + for (size_t j = 0; j < p2_info.argument_types.size(); j++) + { + if (needed_type.id == p2_info.argument_types[j].id) + { + // Copy this child subtree from p2 + auto& p2_child = children_data_p2[j]; + tree_t p2_subtree(program); + p2.copy_subtree(tree_t::subtree_point_t(p2_child.start), p2_child.end, p2_subtree); + c2_temp.insert_subtree(tree_t::subtree_point_t(c2_temp.size()), p2_subtree); + found_match = true; + break; + } + } + + if (!found_match) + { + // If no matching child, we need to generate a new subtree of the correct type + auto& tree = tree_t::get_thread_local(program); + tree.clear(program); + config.generator.get().generate(tree, {program, needed_type.id, config.replacement_min_depth, config.replacement_max_depth}); + c2_temp.insert_subtree(tree_t::subtree_point_t(c2_temp.size()), tree); + } + } + + // Replace the original subtrees with our new ones + c1.replace_subtree(point1, c1_temp); + c2.replace_subtree(point2, c2_temp); + } + +#if BLT_DEBUG_LEVEL >= 2 + if (!c1.check(detail::debug::context_ptr) || !c2.check(detail::debug::context_ptr)) + throw std::runtime_error("Tree check failed"); +#endif + + return true; +} \ No newline at end of file diff --git a/default.nix b/default.nix new file mode 100644 index 0000000..e91d468 --- /dev/null +++ b/default.nix @@ -0,0 +1,44 @@ +{ pkgs ? (import { + config.allowUnfree = true; + config.segger-jlink.acceptLicense = true; +}), ... }: +pkgs.mkShell +{ + buildInputs = with pkgs; [ + cmake + gcc + clang + emscripten + ninja + renderdoc + valgrind + gtest + opentelemetry-cpp + opentelemetry-cpp.dev + ]; + nativeBuildInputs = with pkgs; [ + pkg-config + opentelemetry-cpp + opentelemetry-cpp.dev + ]; + propagatedBuildInputs = with pkgs; [ + abseil-cpp + protobuf + grpc + prometheus-cpp + prometheus-cpp.dev + openssl + openssl.dev + opentelemetry-cpp + opentelemetry-cpp.dev + civetweb + civetweb.dev + c-ares + c-ares.dev + nlohmann_json + glibc + glibc.dev + curl + ]; + LD_LIBRARY_PATH="/run/opengl-driver/lib:/run/opengl-driver-32/lib"; +} diff --git a/examples/rice_classification.h b/examples/rice_classification.h index 2b03156..c7193df 100644 --- a/examples/rice_classification.h +++ b/examples/rice_classification.h @@ -79,7 +79,7 @@ namespace blt::gp::example BLT_DEBUG("Begin Generation Loop"); while (!program.should_terminate()) { - BLT_TRACE("------------{Begin Generation %ld}------------", program.get_current_generation()); + BLT_TRACE("------------\\{Begin Generation {}}------------", program.get_current_generation()); BLT_TRACE("Creating next generation"); program.create_next_generation(); BLT_TRACE("Move to next generation"); @@ -116,8 +116,8 @@ namespace blt::gp::example mutation_sel = &sel; if (reproduction_sel == nullptr) reproduction_sel = &sel; - program.generate_population(program.get_typesystem().get_type().id(), fitness_function_ref, *crossover_sel, *mutation_sel, - *reproduction_sel); + program.generate_initial_population(program.get_typesystem().get_type().id()); + program.setup_generational_evaluation(fitness_function_ref, *crossover_sel, *mutation_sel, *reproduction_sel); } void print_best(const size_t amount = 3) diff --git a/examples/src/rice_classification.cpp b/examples/src/rice_classification.cpp index 8f8b95b..b31325d 100644 --- a/examples/src/rice_classification.cpp +++ b/examples/src/rice_classification.cpp @@ -21,7 +21,7 @@ #include #include #include -#include +#include #include #include #include @@ -191,7 +191,7 @@ void blt::gp::example::rice_classification_t::load_rice_data(const std::string_v training_cases.insert(training_cases.end(), c.begin(), c.end()); training_cases.insert(training_cases.end(), o.begin(), o.end()); std::shuffle(training_cases.begin(), training_cases.end(), program.get_random()); - BLT_INFO("Created testing set of size %ld, training set is of size %ld", testing_cases.size(), training_cases.size()); + BLT_INFO("Created testing set of size {}, training set is of size {}", testing_cases.size(), training_cases.size()); } blt::gp::confusion_matrix_t blt::gp::example::rice_classification_t::test_individual(const individual_t& individual) const diff --git a/include/blt/gp/allocator.h b/include/blt/gp/allocator.h index 15f78d8..471d80c 100644 --- a/include/blt/gp/allocator.h +++ b/include/blt/gp/allocator.h @@ -29,7 +29,10 @@ namespace blt::gp { namespace detail { - static constexpr inline size_t MAX_ALIGNMENT = 8; +#ifndef BLT_GP_MAX_ALIGNMENT +#define BLT_GP_MAX_ALIGNMENT 8 +#endif + static constexpr inline size_t MAX_ALIGNMENT = BLT_GP_MAX_ALIGNMENT; #if BLT_DEBUG_LEVEL > 0 static void check_alignment(const size_t bytes, const std::string& message = "Invalid alignment") diff --git a/include/blt/gp/defines.h b/include/blt/gp/defines.h new file mode 100644 index 0000000..80b527a --- /dev/null +++ b/include/blt/gp/defines.h @@ -0,0 +1,48 @@ +#pragma once +/* + * Copyright (C) 2024 Brett Terpstra + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +#ifndef BLT_GP_DEFINES_H +#define BLT_GP_DEFINES_H + +#include + +#if BLT_DEBUG_LEVEL > 0 + #if defined(__has_include) &&__has_include() + #define BLT_DEBUG_OTEL_ENABLED 1 + #endif +#endif + +#if BLT_DEBUG_LEVEL > 1 + #define BLT_GP_DEBUG_TRACK_ALLOCATIONS +#endif + +#if BLT_DEBUG_LEVEL > 2 + #define BLT_GP_DEBUG_CHECK_TREES +#endif + +#ifdef BLT_GP_DEBUG_TRACK_ALLOCATIONS + #undef BLT_GP_DEBUG_TRACK_ALLOCATIONS + #define BLT_GP_DEBUG_TRACK_ALLOCATIONS +#endif + +#ifdef BLT_GP_DEBUG_CHECK_TREES + #undef BLT_GP_DEBUG_CHECK_TREES + #define BLT_GP_DEBUG_CHECK_TREES 1 +#endif + +#endif //BLT_GP_DEFINES_H diff --git a/include/blt/gp/fwdecl.h b/include/blt/gp/fwdecl.h index 66cef46..40ee4ad 100644 --- a/include/blt/gp/fwdecl.h +++ b/include/blt/gp/fwdecl.h @@ -23,10 +23,6 @@ #include #include #include -#include -#include -#include -#include #include #include diff --git a/include/blt/gp/operations.h b/include/blt/gp/operations.h index 1fe2e69..ec6fb34 100644 --- a/include/blt/gp/operations.h +++ b/include/blt/gp/operations.h @@ -46,7 +46,7 @@ namespace blt::gp void print_args(std::integer_sequence) { BLT_INFO("Arguments:"); - (BLT_INFO("%ld: %s", indices, blt::type_string().c_str()), ...); + (BLT_INFO("{}: {}", indices, blt::type_string().c_str()), ...); } template diff --git a/include/blt/gp/program.h b/include/blt/gp/program.h index 4bf6f94..9fe1c59 100644 --- a/include/blt/gp/program.h +++ b/include/blt/gp/program.h @@ -370,24 +370,20 @@ namespace blt::gp #endif } - void reset_program(type_id root_type, bool eval_fitness_now = true) - { - current_generation = 0; - current_pop = config.pop_initializer.get().generate({ - *this, - root_type, - config.population_size, - config.initial_min_tree_size, - config.initial_max_tree_size - }); - next_pop = population_t(current_pop); - BLT_ASSERT_MSG(current_pop.get_individuals().size() == config.population_size, - ("cur pop size: " + std::to_string(current_pop.get_individuals().size())).c_str()); - BLT_ASSERT_MSG(next_pop.get_individuals().size() == config.population_size, - ("next pop size: " + std::to_string(next_pop.get_individuals().size())).c_str()); - if (eval_fitness_now) - evaluate_fitness_internal(); - } + void reset_program(type_id root_type, bool eval_fitness_now = true) + { + current_generation = 0; + current_pop = config.pop_initializer.get().generate({ + *this, root_type, config.population_size, config.initial_min_tree_size, config.initial_max_tree_size + }); + next_pop = population_t(current_pop); + BLT_ASSERT_MSG(current_pop.get_individuals().size() == config.population_size, + ("cur pop size: " + std::to_string(current_pop.get_individuals().size())).c_str()); + BLT_ASSERT_MSG(next_pop.get_individuals().size() == config.population_size, + ("next pop size: " + std::to_string(next_pop.get_individuals().size())).c_str()); + if (eval_fitness_now) + evaluate_fitness_internal(); + } void kill() { @@ -397,11 +393,7 @@ namespace blt::gp void generate_initial_population(const type_id root_type) { current_pop = config.pop_initializer.get().generate({ - *this, - root_type, - config.population_size, - config.initial_min_tree_size, - config.initial_max_tree_size + *this, root_type, config.population_size, config.initial_min_tree_size, config.initial_max_tree_size }); next_pop = population_t(current_pop); BLT_ASSERT_MSG(current_pop.get_individuals().size() == config.population_size, diff --git a/include/blt/gp/stack.h b/include/blt/gp/stack.h index a83457c..e51149d 100644 --- a/include/blt/gp/stack.h +++ b/include/blt/gp/stack.h @@ -37,53 +37,40 @@ namespace blt::gp namespace detail { BLT_META_MAKE_FUNCTION_CHECK(drop); - // BLT_META_MAKE_FUNCTION_CHECK(drop_ephemeral); } + /** + * @brief This is the primary class that enables a type-erased GP system without compromising on performance. + * + * This class provides an efficient way to allocate, deallocate, and manage memory blocks + * in a stack-like structure. It supports operations like memory alignment, copying, moving, + * insertion, and removal of memory. This is particularly useful for performance-critical + * systems requiring temporary memory management without frequent heap allocation overhead. + * + * Types placed within this container cannot have an alignment greater than `BLT_GP_MAX_ALIGNMENT` bytes, doing so will result in unaligned pointer access. + * You can configure this by setting `BLT_GP_MAX_ALIGNMENT` as a compiler definition but be aware it will increase memory requirements. + * Setting `BLT_GP_MAX_ALIGNMENT` to lower than 8 is UB on x86-64 systems. + * Consequently, all types have a minimum storage size of `BLT_GP_MAX_ALIGNMENT` (8) bytes, meaning a char, float, int, etc. will take `BLT_GP_MAX_ALIGNMENT` bytes + */ class stack_allocator { constexpr static size_t PAGE_SIZE = 0x100; - template - using NO_REF_T = std::remove_cv_t>; using Allocator = aligned_allocator; - // todo remove this once i fix all the broken references - struct detail + static constexpr size_t align_bytes(const size_t size) noexcept { - static constexpr size_t aligned_size(const size_t size) noexcept - { - return (size + (gp::detail::MAX_ALIGNMENT - 1)) & ~(gp::detail::MAX_ALIGNMENT - 1); - } - }; + return (size + (detail::MAX_ALIGNMENT - 1)) & ~(detail::MAX_ALIGNMENT - 1); + } public: static Allocator& get_allocator(); - struct size_data_t - { - blt::size_t total_size_bytes = 0; - blt::size_t total_used_bytes = 0; - blt::size_t total_remaining_bytes = 0; - - friend std::ostream& operator<<(std::ostream& stream, const size_data_t& data) - { - stream << "["; - stream << data.total_used_bytes << " / " << data.total_size_bytes; - stream << " (" - << (data.total_size_bytes != 0 - ? (static_cast(data.total_used_bytes) / static_cast(data.total_size_bytes) * - 100) - : 0) << "%); space left: " << data.total_remaining_bytes << "]"; - return stream; - } - }; - template static constexpr size_t aligned_size() noexcept { - const auto bytes = detail::aligned_size(sizeof(NO_REF_T)); - if constexpr (blt::gp::detail::has_func_drop_v>) - return bytes + detail::aligned_size(sizeof(std::atomic_uint64_t*)); + const auto bytes = align_bytes(sizeof(std::decay_t)); + if constexpr (blt::gp::detail::has_func_drop_v>) + return bytes + align_bytes(sizeof(std::atomic_uint64_t*)); return bytes; } @@ -166,26 +153,28 @@ namespace blt::gp std::memcpy(data, data_ + (bytes_stored - bytes), bytes); } - template > + template void push(const T& t) { - static_assert(std::is_trivially_copyable_v, "Type must be bitwise copyable!"); - static_assert(alignof(NO_REF) <= gp::detail::MAX_ALIGNMENT, "Type alignment must not be greater than the max alignment!"); - const auto ptr = static_cast(allocate_bytes_for_size(aligned_size())); - std::memcpy(ptr, &t, sizeof(NO_REF)); + using DecayedT = std::decay_t; + static_assert(std::is_trivially_copyable_v, "Type must be bitwise copyable!"); + static_assert(alignof(DecayedT) <= detail::MAX_ALIGNMENT, "Type alignment must not be greater than the max alignment!"); + const auto ptr = static_cast(allocate_bytes_for_size(aligned_size())); + std::memcpy(ptr, &t, sizeof(DecayedT)); - if constexpr (gp::detail::has_func_drop_v>) + if constexpr (gp::detail::has_func_drop_v>) { - new(ptr + sizeof(NO_REF)) mem::pointer_storage{nullptr}; + new(ptr + sizeof(DecayedT)) mem::pointer_storage{nullptr}; } } - template > + template T pop() { - static_assert(std::is_trivially_copyable_v, "Type must be bitwise copyable!"); - static_assert(alignof(NO_REF) <= gp::detail::MAX_ALIGNMENT, "Type alignment must not be greater than the max alignment!"); - constexpr auto size = aligned_size(); + using DecayedT = std::decay_t; + static_assert(std::is_trivially_copyable_v, "Type must be bitwise copyable!"); + static_assert(alignof(DecayedT) <= detail::MAX_ALIGNMENT, "Type alignment must not be greater than the max alignment!"); + constexpr auto size = aligned_size(); #if BLT_DEBUG_LEVEL > 0 if (bytes_stored < size) throw std::runtime_error(("Not enough bytes left to pop!" __FILE__ ":") + std::to_string(__LINE__)); @@ -205,28 +194,34 @@ namespace blt::gp return data_ + (bytes_stored - bytes); } - template > + template T& from(const size_t bytes) const { - static_assert(std::is_trivially_copyable_v && "Type must be bitwise copyable!"); - static_assert(alignof(NO_REF) <= gp::detail::MAX_ALIGNMENT && "Type alignment must not be greater than the max alignment!"); - return *reinterpret_cast(from(aligned_size() + bytes)); + using DecayedT = std::decay_t; + static_assert(std::is_trivially_copyable_v && "Type must be bitwise copyable!"); + static_assert(alignof(DecayedT) <= detail::MAX_ALIGNMENT && "Type alignment must not be greater than the max alignment!"); + return *reinterpret_cast(from(aligned_size() + bytes)); } [[nodiscard]] std::pair&> access_pointer(const size_t bytes, const size_t type_size) const { const auto type_ref = from(bytes); - return {type_ref, *std::launder( - reinterpret_cast*>(type_ref + (type_size - detail::aligned_size( - sizeof(std::atomic_uint64_t*)))))}; + return { + type_ref, *std::launder( + reinterpret_cast*>(type_ref + (type_size - align_bytes( + sizeof(std::atomic_uint64_t*))))) + }; } - [[nodiscard]] std::pair&> access_pointer_forward(const size_t bytes, const size_t type_size) const + [[nodiscard]] std::pair&> access_pointer_forward( + const size_t bytes, const size_t type_size) const { const auto type_ref = data_ + bytes; - return {type_ref, *std::launder( - reinterpret_cast*>(type_ref + (type_size - detail::aligned_size( - sizeof(std::atomic_uint64_t*)))))}; + return { + type_ref, *std::launder( + reinterpret_cast*>(type_ref + (type_size - align_bytes( + sizeof(std::atomic_uint64_t*))))) + }; } template @@ -236,7 +231,7 @@ namespace blt::gp return { type_ref, *std::launder( reinterpret_cast*>(reinterpret_cast(&type_ref) + - detail::aligned_size(sizeof(T)))) + align_bytes(sizeof(T)))) }; } @@ -264,41 +259,19 @@ namespace blt::gp pop_bytes(aligned_bytes); } - // template - // void call_destructors() - // { - // if constexpr (sizeof...(Args) > 0) - // { - // size_t offset = (aligned_size>() + ...) - aligned_size::First>>(); - // ((call_drop(offset + (gp::detail::has_func_drop_v ? sizeof(u64*) : 0)), offset -= aligned_size>()), ...); - // (void) offset; - // } - // } - [[nodiscard]] bool empty() const noexcept { return bytes_stored == 0; } - [[nodiscard]] ptrdiff_t remaining_bytes_in_block() const noexcept + [[nodiscard]] ptrdiff_t remainder() const noexcept { return static_cast(size_ - bytes_stored); } - [[nodiscard]] ptrdiff_t bytes_in_head() const noexcept + [[nodiscard]] size_t stored() const noexcept { - return static_cast(bytes_stored); - } - - [[nodiscard]] size_data_t size() const noexcept - { - size_data_t data; - - data.total_used_bytes = bytes_stored; - data.total_size_bytes = size_; - data.total_remaining_bytes = remaining_bytes_in_block(); - - return data; + return bytes_stored; } void reserve(const size_t bytes) @@ -307,12 +280,13 @@ namespace blt::gp expand_raw(bytes); } - [[nodiscard]] size_t stored() const + void resize(const size_t bytes) { - return bytes_stored; + reserve(bytes); + bytes_stored = bytes; } - [[nodiscard]] size_t internal_storage_size() const + [[nodiscard]] size_t capacity() const { return size_; } @@ -322,6 +296,11 @@ namespace blt::gp bytes_stored = 0; } + [[nodiscard]] auto* data() const + { + return data_; + } + private: void expand(const size_t bytes) { @@ -350,7 +329,7 @@ namespace blt::gp { if (data_ == nullptr) return nullptr; - size_t remaining_bytes = remaining_bytes_in_block(); + size_t remaining_bytes = remainder(); auto* pointer = static_cast(data_ + bytes_stored); return std::align(gp::detail::MAX_ALIGNMENT, bytes, pointer, remaining_bytes); } @@ -372,15 +351,6 @@ namespace blt::gp return aligned_ptr; } - // template - // void call_drop(const size_t offset) - // { - // if constexpr (blt::gp::detail::has_func_drop_v) - // { - // from>(offset).drop(); - // } - // } - u8* data_ = nullptr; // place in the data_ array which has a free spot. size_t bytes_stored = 0; diff --git a/include/blt/gp/sync.h b/include/blt/gp/sync.h new file mode 100644 index 0000000..1ef3501 --- /dev/null +++ b/include/blt/gp/sync.h @@ -0,0 +1,88 @@ +#pragma once +/* + * Copyright (C) 2024 Brett Terpstra + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +#ifndef BLT_GP_SYNC_H +#define BLT_GP_SYNC_H + +#include +#include + +namespace blt::gp +{ + class sync_t + { + public: + explicit sync_t(gp_program& program, fs::writer_t& writer); + + virtual void trigger(u64 current_time) const; + + sync_t& with_timer(u64 seconds) + { + m_timer_seconds = seconds; + return *this; + } + + sync_t& every_generations(u64 generations) + { + m_generations = generations; + return *this; + } + + sync_t& overwrite_file_on_write() + { + m_reset_to_start_of_file = true; + return *this; + } + + sync_t& append_to_file_on_write() + { + m_reset_to_start_of_file = false; + return *this; + } + + /** + * Save the state of the whole program instead of just the generation information. + */ + sync_t& whole_program() + { + m_whole_program = true; + return *this; + } + + /** + * Only save the current generation to disk. + */ + sync_t& generation_only() + { + m_whole_program = false; + return *this; + } + + ~sync_t(); + + private: + gp_program* m_program; + fs::writer_t* m_writer; + std::optional m_timer_seconds; + std::optional m_generations; + bool m_reset_to_start_of_file = false; + bool m_whole_program = false; + }; +} + +#endif //BLT_GP_SYNC_H diff --git a/include/blt/gp/threading.h b/include/blt/gp/threading.h index 263a896..00c91b1 100644 --- a/include/blt/gp/threading.h +++ b/include/blt/gp/threading.h @@ -97,9 +97,9 @@ namespace blt::gp task_builder_t() = default; template - static std::function make_callable(Tasks&&... tasks) + static std::function make_callable(Tasks&&... tasks) { - return [&tasks...](barrier& sync_barrier, EnumId task, size_t thread_index) + return [&tasks...](barrier_t& sync_barrier, EnumId task, size_t thread_index) { call_jmp_table(sync_barrier, task, thread_index, tasks...); }; @@ -107,7 +107,7 @@ namespace blt::gp private: template - static void execute(barrier& sync_barrier, const size_t thread_index, Task&& task) + static void execute(barrier_t& sync_barrier, const size_t thread_index, Task&& task) { // sync_barrier.wait(); if (task.requires_single_sync) @@ -121,7 +121,7 @@ namespace blt::gp } template - static bool call(barrier& sync_barrier, const EnumId current_task, const size_t thread_index, Task&& task) + static bool call(barrier_t& sync_barrier, const EnumId current_task, const size_t thread_index, Task&& task) { if (static_cast(current_task) == static_cast(task.get_task_id())) { @@ -132,7 +132,7 @@ namespace blt::gp } template - static void call_jmp_table(barrier& sync_barrier, const EnumId current_task, const size_t thread_index, Tasks&&... tasks) + static void call_jmp_table(barrier_t& sync_barrier, const EnumId current_task, const size_t thread_index, Tasks&&... tasks) { if (static_cast(current_task) >= sizeof...(tasks)) BLT_UNREACHABLE; @@ -146,7 +146,7 @@ namespace blt::gp static_assert(std::is_enum_v, "Enum ID must be of enum type!"); public: - explicit thread_manager_t(const size_t thread_count, std::function task_func, + explicit thread_manager_t(const size_t thread_count, std::function task_func, const bool will_main_block = true): barrier(thread_count), will_main_block(will_main_block) { thread_callable = [this, task_func = std::move(task_func)](const size_t thread_index) @@ -226,7 +226,7 @@ namespace blt::gp return will_main_block ? threads.size() + 1 : threads.size(); } - blt::barrier barrier; + blt::barrier_t barrier; std::atomic_bool should_run = true; bool will_main_block; std::vector tasks; diff --git a/include/blt/gp/transformers.h b/include/blt/gp/transformers.h index 2da1aed..f18d696 100644 --- a/include/blt/gp/transformers.h +++ b/include/blt/gp/transformers.h @@ -64,12 +64,6 @@ namespace blt::gp operator_info_t& type_operator_info; }; - struct crossover_point_t - { - tree_t::subtree_point_t p1_crossover_point; - tree_t::subtree_point_t p2_crossover_point; - }; - struct config_t { // number of times crossover will try to pick a valid point in the tree. this is purely based on the return type of the operators @@ -88,22 +82,60 @@ namespace blt::gp f32 terminal_chance = 0.1; // use traversal to select point instead of random selection bool traverse = false; - }; - crossover_t() = default; + BLT_MAKE_SETTER_LVALUE(u32, max_crossover_tries); + BLT_MAKE_SETTER_LVALUE(u32, max_crossover_iterations); + BLT_MAKE_SETTER_LVALUE(u32, min_tree_size); + BLT_MAKE_SETTER_LVALUE(f32, depth_multiplier); + BLT_MAKE_SETTER_LVALUE(f32, terminal_chance); + BLT_MAKE_SETTER_LVALUE(bool, traverse); + }; explicit crossover_t(const config_t& config): config(config) { } + /** + * Apply crossover to a set of parents. Note: c1 and c2 are already filled with thier respective parent's elements. + * @return true if the crossover succeeded, otherwise return false will erase progress. + */ + virtual bool apply(gp_program& program, const tree_t& p1, const tree_t& p2, tree_t& c1, tree_t& c2) = 0; + [[nodiscard]] const config_t& get_config() const { return config; } - std::optional get_crossover_point(const tree_t& c1, const tree_t& c2) const; + virtual ~crossover_t() = default; - std::optional get_crossover_point_traverse(const tree_t& c1, const tree_t& c2) const; + protected: + config_t config; + }; + + /** + * Base class for crossover which performs basic subtree crossover on two random nodes in the parent tree + */ + class subtree_crossover_t : public crossover_t + { + public: + struct crossover_point_t + { + tree_t::subtree_point_t p1_crossover_point; + tree_t::subtree_point_t p2_crossover_point; + }; + + + subtree_crossover_t(): crossover_t(config_t{}) + { + } + + explicit subtree_crossover_t(const config_t& config): crossover_t(config) + { + } + + [[nodiscard]] std::optional get_crossover_point(const tree_t& c1, const tree_t& c2) const; + + [[nodiscard]] std::optional get_crossover_point_traverse(const tree_t& c1, const tree_t& c2) const; /** * child1 and child2 are copies of the parents, the result of selecting a crossover point and performing standard subtree crossover. @@ -111,16 +143,39 @@ namespace blt::gp * @param program reference to the global program container responsible for managing these trees * @param p1 reference to the first parent * @param p2 reference to the second parent - * @return expected pair of child otherwise returns error enum + * @param c1 reference to output child 1 + * @param c2 reference to output child 2 + * @return true if function succeeded, otherwise false */ - virtual bool apply(gp_program& program, const tree_t& p1, const tree_t& p2, tree_t& c1, tree_t& c2); // NOLINT + virtual bool apply(gp_program& program, const tree_t& p1, const tree_t& p2, tree_t& c1, tree_t& c2) override; // NOLINT - virtual ~crossover_t() = default; + ~subtree_crossover_t() override = default; protected: [[nodiscard]] std::optional get_point_traverse_retry(const tree_t& t, std::optional type) const; + }; - config_t config; + class one_point_crossover_t : public crossover_t + { + public: + one_point_crossover_t(): crossover_t(config_t{}) + { + } + + explicit one_point_crossover_t(const config_t& config): crossover_t(config) + { + } + + bool apply(gp_program& program, const tree_t& p1, const tree_t& p2, tree_t& c1, tree_t& c2) override; + }; + + class advanced_crossover_t : public crossover_t + { + advanced_crossover_t(): crossover_t(config_t{}) + { + } + public: + bool apply(gp_program& program, const tree_t& p1, const tree_t& p2, tree_t& c1, tree_t& c2) override; }; class mutation_t diff --git a/include/blt/gp/tree.h b/include/blt/gp/tree.h index f25fba0..136370b 100644 --- a/include/blt/gp/tree.h +++ b/include/blt/gp/tree.h @@ -24,6 +24,7 @@ #include #include #include +#include #include #include @@ -87,6 +88,8 @@ namespace blt::gp return m_flags; } + friend bool operator==(const op_container_t& a, const op_container_t& b); + private: size_t m_type_size; operator_id m_id; @@ -185,18 +188,13 @@ namespace blt::gp class tree_t { public: - struct child_t - { - ptrdiff_t start; - // one past the end - ptrdiff_t end; - }; - struct subtree_point_t { ptrdiff_t pos; type_id type; + subtree_point_t() = default; + explicit subtree_point_t(const ptrdiff_t pos): pos(pos), type(0) { } @@ -206,6 +204,15 @@ namespace blt::gp } }; + struct child_t + { + ptrdiff_t start; + // one past the end + ptrdiff_t end; + + + }; + struct byte_only_transaction_t { byte_only_transaction_t(tree_t& tree, const size_t bytes): tree(tree), data(nullptr), bytes(bytes) @@ -411,6 +418,13 @@ namespace blt::gp copy_subtree(point, find_endpoint(point.pos), out_tree); } + void copy_subtree(const child_t subtree, tree_t& out_tree) + { + copy_subtree(subtree_point_t{subtree.start}, subtree.end, out_tree); + } + + void swap_subtrees(child_t our_subtree, tree_t& other_tree, child_t other_subtree); + /** * Swaps the subtrees between this tree and the other tree * @param our_subtree @@ -457,6 +471,11 @@ namespace blt::gp delete_subtree(point, find_endpoint(point.pos)); } + void delete_subtree(const child_t subtree) + { + delete_subtree(subtree_point_t{subtree.start}, subtree.end); + } + /** * Insert a subtree before the specified point * @param point point to insert into @@ -583,7 +602,8 @@ namespace blt::gp template static auto make_execution_lambda(size_t call_reserve_size, Operators&... operators) { - return [call_reserve_size, &operators...](const tree_t& tree, void* context) -> evaluation_context& { + return [call_reserve_size, &operators...](const tree_t& tree, void* context) -> evaluation_context& + { const auto& ops = tree.operations; const auto& vals = tree.values; @@ -608,6 +628,16 @@ namespace blt::gp }; } + [[nodiscard]] size_t required_size() const; + + void to_byte_array(std::byte* out) const; + + void to_file(fs::writer_t& file) const; + + void from_byte_array(const std::byte* in); + + void from_file(fs::reader_t& file); + ~tree_t() { clear(*m_program); @@ -615,6 +645,13 @@ namespace blt::gp static tree_t& get_thread_local(gp_program& program); + friend bool operator==(const tree_t& a, const tree_t& b); + + friend bool operator!=(const tree_t& a, const tree_t& b) + { + return !(a == b); + } + private: void handle_operator_inserted(const op_container_t& op); @@ -766,6 +803,13 @@ namespace blt::gp individual_t& operator=(const individual_t&) = delete; individual_t& operator=(individual_t&&) = default; + + friend bool operator==(const individual_t& a, const individual_t& b); + + friend bool operator!=(const individual_t& a, const individual_t& b) + { + return !(a == b); + } }; class population_t diff --git a/include/blt/gp/util/statistics.h b/include/blt/gp/util/statistics.h index bf79733..93aed06 100644 --- a/include/blt/gp/util/statistics.h +++ b/include/blt/gp/util/statistics.h @@ -196,6 +196,20 @@ namespace blt::gp worst_fitness = std::numeric_limits::max(); normalized_fitness.clear(); } + + friend bool operator==(const population_stats& a, const population_stats& b) + { + return a.overall_fitness.load(std::memory_order_relaxed) == b.overall_fitness.load(std::memory_order_relaxed) && + a.average_fitness.load(std::memory_order_relaxed) == b.average_fitness.load(std::memory_order_relaxed) && + a.best_fitness.load(std::memory_order_relaxed) == b.best_fitness.load(std::memory_order_relaxed) && + a.worst_fitness.load(std::memory_order_relaxed) == b.worst_fitness.load(std::memory_order_relaxed) && + a.normalized_fitness == b.normalized_fitness; + } + + friend bool operator!=(const population_stats& a, const population_stats& b) + { + return !(a == b); + } }; } diff --git a/src/program.cpp b/src/program.cpp index 1cee290..c3bcf88 100644 --- a/src/program.cpp +++ b/src/program.cpp @@ -17,6 +17,13 @@ */ #include #include +#include + +#ifndef BLT_ASSERT_RET +#define BLT_ASSERT_RET(expr) if (!(expr)) { return false; } +#endif + +#define BLT_READ(read_statement, size) do { auto read = read_statement; if (read != size) { return blt::gp::errors::serialization::invalid_read_t{read, size}; } } while (false) namespace blt::gp { @@ -24,25 +31,28 @@ namespace blt::gp // this is largely to not break the tests :3 // it's also to allow for quick setup of a gp program if you don't care how crossover or mutation is handled static advanced_mutation_t s_mutator; - static crossover_t s_crossover; + static subtree_crossover_t s_crossover; + // static one_point_crossover_t s_crossover; static ramped_half_initializer_t s_init; prog_config_t::prog_config_t(): mutator(s_mutator), crossover(s_crossover), pop_initializer(s_init) { - } prog_config_t::prog_config_t(const std::reference_wrapper& popInitializer): - mutator(s_mutator), crossover(s_crossover), pop_initializer(popInitializer) - {} + mutator(s_mutator), crossover(s_crossover), pop_initializer(popInitializer) + { + } prog_config_t::prog_config_t(size_t populationSize, const std::reference_wrapper& popInitializer): - population_size(populationSize), mutator(s_mutator), crossover(s_crossover), pop_initializer(popInitializer) - {} + population_size(populationSize), mutator(s_mutator), crossover(s_crossover), pop_initializer(popInitializer) + { + } prog_config_t::prog_config_t(size_t populationSize): - population_size(populationSize), mutator(s_mutator), crossover(s_crossover), pop_initializer(s_init) - {} + population_size(populationSize), mutator(s_mutator), crossover(s_crossover), pop_initializer(s_init) + { + } random_t& gp_program::get_random() const { @@ -56,6 +66,171 @@ namespace blt::gp return allocator; } + void gp_program::save_generation(fs::writer_t& writer) + { + const auto individuals = current_pop.get_individuals().size(); + writer.write(&individuals, sizeof(individuals)); + for (const auto& individual : current_pop.get_individuals()) + { + writer.write(&individual.fitness, sizeof(individual.fitness)); + individual.tree.to_file(writer); + } + } + + bool gp_program::load_generation(fs::reader_t& reader) + { + size_t individuals; + BLT_ASSERT_RET(reader.read(&individuals, sizeof(individuals)) == sizeof(individuals)); + if (current_pop.get_individuals().size() != individuals) + { + for (size_t i = current_pop.get_individuals().size(); i < individuals; i++) + current_pop.get_individuals().emplace_back(tree_t{*this}); + } + for (auto& individual : current_pop.get_individuals()) + { + BLT_ASSERT_RET(reader.read(&individual.fitness, sizeof(individual.fitness)) == sizeof(individual.fitness)); + individual.tree.clear(*this); + individual.tree.from_file(reader); + } + return true; + } + + void write_stat(fs::writer_t& writer, const population_stats& stat) + { + const auto overall_fitness = stat.overall_fitness.load(); + const auto average_fitness = stat.average_fitness.load(); + const auto best_fitness = stat.best_fitness.load(); + const auto worst_fitness = stat.worst_fitness.load(); + writer.write(&overall_fitness, sizeof(overall_fitness)); + writer.write(&average_fitness, sizeof(average_fitness)); + writer.write(&best_fitness, sizeof(best_fitness)); + writer.write(&worst_fitness, sizeof(worst_fitness)); + const size_t fitness_count = stat.normalized_fitness.size(); + writer.write(&fitness_count, sizeof(fitness_count)); + for (const auto& fitness : stat.normalized_fitness) + writer.write(&fitness, sizeof(fitness)); + } + + bool load_stat(fs::reader_t& reader, population_stats& stat) + { + BLT_ASSERT_RET(reader.read(&stat.overall_fitness, sizeof(stat.overall_fitness)) == sizeof(stat.overall_fitness)); + BLT_ASSERT_RET(reader.read(&stat.average_fitness, sizeof(stat.average_fitness)) == sizeof(stat.average_fitness)); + BLT_ASSERT_RET(reader.read(&stat.best_fitness, sizeof(stat.best_fitness)) == sizeof(stat.best_fitness)); + BLT_ASSERT_RET(reader.read(&stat.worst_fitness, sizeof(stat.worst_fitness)) == sizeof(stat.worst_fitness)); + size_t fitness_count; + BLT_ASSERT_RET(reader.read(&fitness_count, sizeof(fitness_count)) == sizeof(size_t)); + stat.normalized_fitness.resize(fitness_count); + for (auto& fitness : stat.normalized_fitness) + BLT_ASSERT_RET(reader.read(&fitness, sizeof(fitness)) == sizeof(fitness)); + return true; + } + + void gp_program::save_state(fs::writer_t& writer) + { + const size_t operator_count = storage.operators.size(); + writer.write(&operator_count, sizeof(operator_count)); + for (const auto& [i, op] : enumerate(storage.operators)) + { + writer.write(&i, sizeof(i)); + bool has_name = storage.names[i].has_value(); + writer.write(&has_name, sizeof(has_name)); + if (has_name) + { + auto size = storage.names[i]->size(); + writer.write(&size, sizeof(size)); + writer.write(storage.names[i]->data(), size); + } + writer.write(&storage.operator_metadata[i].arg_size_bytes, sizeof(storage.operator_metadata[i].arg_size_bytes)); + writer.write(&storage.operator_metadata[i].return_size_bytes, sizeof(storage.operator_metadata[i].return_size_bytes)); + writer.write(&op.argc, sizeof(op.argc)); + writer.write(&op.return_type, sizeof(op.return_type)); + const size_t argc_type_count = op.argument_types.size(); + writer.write(&argc_type_count, sizeof(argc_type_count)); + for (const auto argument : op.argument_types) + writer.write(&argument, sizeof(argument)); + } + const size_t history_count = statistic_history.size(); + writer.write(&history_count, sizeof(history_count)); + for (const auto& stat : statistic_history) + write_stat(writer, stat); + write_stat(writer, current_stats); + save_generation(writer); + } + + std::optional gp_program::load_state(fs::reader_t& reader) + { + size_t operator_count; + BLT_READ(reader.read(&operator_count, sizeof(operator_count)), sizeof(operator_count)); + if (operator_count != storage.operators.size()) + return errors::serialization::unexpected_size_t{operator_count, storage.operators.size()}; + for (size_t i = 0; i < operator_count; i++) + { + size_t expected_i; + BLT_READ(reader.read(&expected_i, sizeof(expected_i)), sizeof(expected_i)); + if (expected_i != i) + return errors::serialization::invalid_operator_id_t{i, expected_i}; + bool has_name; + BLT_READ(reader.read(&has_name, sizeof(has_name)), sizeof(has_name)); + if (has_name) + { + size_t size; + BLT_READ(reader.read(&size, sizeof(size)), sizeof(size)); + std::string name; + name.resize(size); + BLT_READ(reader.read(name.data(), size), static_cast(size)); + if (!storage.names[i].has_value()) + return errors::serialization::invalid_name_t{i, name, "NO NAME"}; + if (name != *storage.names[i]) + return errors::serialization::invalid_name_t{i, name, std::string{*storage.names[i]}}; + const auto& op = storage.operators[i]; + const auto& op_meta = storage.operator_metadata[i]; + + decltype(std::declval().arg_size_bytes) arg_size_bytes; + decltype(std::declval().return_size_bytes) return_size_bytes; + BLT_READ(reader.read(&arg_size_bytes, sizeof(arg_size_bytes)), sizeof(arg_size_bytes)); + BLT_READ(reader.read(&return_size_bytes, sizeof(return_size_bytes)), sizeof(return_size_bytes)); + + if (op_meta.arg_size_bytes != arg_size_bytes) + return errors::serialization::mismatched_bytes_t{i, arg_size_bytes, op_meta.arg_size_bytes}; + + if (op_meta.return_size_bytes != return_size_bytes) + return errors::serialization::mismatched_bytes_t{i, return_size_bytes, op_meta.return_size_bytes}; + + argc_t argc; + BLT_READ(reader.read(&argc, sizeof(argc)), sizeof(argc)); + if (argc.argc != op.argc.argc) + return errors::serialization::mismatched_argc_t{i, argc.argc, op.argc.argc}; + if (argc.argc_context != op.argc.argc_context) + return errors::serialization::mismatched_argc_t{i, argc.argc_context, op.argc.argc_context}; + + type_id return_type; + BLT_READ(reader.read(&return_type, sizeof(return_type)), sizeof(return_type)); + if (return_type != op.return_type) + return errors::serialization::mismatched_return_type_t{i, return_type, op.return_type}; + size_t arg_type_count; + BLT_READ(reader.read(&arg_type_count, sizeof(arg_type_count)), sizeof(return_type)); + if (arg_type_count != op.argument_types.size()) + return errors::serialization::unexpected_size_t{arg_type_count, op.argument_types.size()}; + for (size_t j = 0; j < arg_type_count; j++) + { + type_id type; + BLT_READ(reader.read(&type, sizeof(type)), sizeof(type)); + if (type != op.argument_types[j]) + return errors::serialization::mismatched_arg_type_t{i, j, type, op.argument_types[j]}; + } + } + } + size_t history_count; + BLT_READ(reader.read(&history_count, sizeof(history_count)), sizeof(history_count)); + statistic_history.resize(history_count); + for (size_t i = 0; i < history_count; i++) + load_stat(reader, statistic_history[i]); + load_stat(reader, current_stats); + load_generation(reader); + + return {}; + } + void gp_program::create_threads() { #ifdef BLT_TRACK_ALLOCATIONS @@ -67,7 +242,8 @@ namespace blt::gp // main thread is thread0 for (blt::size_t i = 1; i < config.threads; i++) { - thread_helper.threads.emplace_back(new std::thread([i, this]() { + thread_helper.threads.emplace_back(new std::thread([i, this]() + { #ifdef BLT_TRACK_ALLOCATIONS tracker.reserve(); tracker.await_thread_loading_complete(config.threads); @@ -95,4 +271,4 @@ namespace blt::gp tracker.await_thread_loading_complete(config.threads); #endif } -} \ No newline at end of file +} diff --git a/src/sync.cpp b/src/sync.cpp new file mode 100644 index 0000000..7519907 --- /dev/null +++ b/src/sync.cpp @@ -0,0 +1,106 @@ +/* + * + * Copyright (C) 2025 Brett Terpstra + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ +#include +#include +#include +#include +#include + +namespace blt::gp +{ + struct global_sync_state_t + { + std::vector syncs; + std::mutex mutex; + std::thread* thread = nullptr; + std::atomic_bool should_run = true; + std::condition_variable condition_variable; + + void add(sync_t* sync) + { + if (thread == nullptr) + { + thread = new std::thread([this]() + { + while (should_run) + { + std::unique_lock lock(mutex); + condition_variable.wait_for(lock, std::chrono::milliseconds(100)); + const auto current_time = system::getCurrentTimeMilliseconds(); + for (const auto& sync : syncs) + sync->trigger(current_time); + } + }); + } + std::scoped_lock lock(mutex); + syncs.push_back(sync); + } + + void remove(const sync_t* sync) + { + if (thread == nullptr) + { + BLT_WARN("Tried to remove sync from global sync state, but no thread was running"); + return; + } + std::unique_lock lock(mutex); + const auto iter = std::find(syncs.begin(), syncs.end(), sync); + std::iter_swap(iter, syncs.end() - 1); + syncs.pop_back(); + if (syncs.empty()) + { + lock.unlock(); + should_run = false; + condition_variable.notify_all(); + thread->join(); + delete thread; + thread = nullptr; + } + } + }; + + global_sync_state_t& get_state() + { + static global_sync_state_t state; + return state; + } + + sync_t::sync_t(gp_program& program, fs::writer_t& writer): m_program(&program), m_writer(&writer) + { + get_state().add(this); + } + + void sync_t::trigger(const u64 current_time) const + { + if ((m_timer_seconds && (current_time % *m_timer_seconds == 0)) || (m_generations && (m_program->get_current_generation() % *m_generations == + 0))) + { + if (m_reset_to_start_of_file) + m_writer->seek(0, fs::writer_t::seek_origin::seek_set); + if (m_whole_program) + m_program->save_state(*m_writer); + else + m_program->save_generation(*m_writer); + } + } + + sync_t::~sync_t() + { + get_state().remove(this); + } +} diff --git a/src/transformers.cpp b/src/transformers.cpp index b7bf704..24ff404 100644 --- a/src/transformers.cpp +++ b/src/transformers.cpp @@ -65,7 +65,7 @@ namespace blt::gp { } - bool crossover_t::apply(gp_program& program, const tree_t& p1, const tree_t& p2, tree_t& c1, tree_t& c2) // NOLINT + bool subtree_crossover_t::apply(gp_program& program, const tree_t& p1, const tree_t& p2, tree_t& c1, tree_t& c2) // NOLINT { if (p1.size() < config.min_tree_size || p2.size() < config.min_tree_size) return false; @@ -80,13 +80,318 @@ namespace blt::gp if (!point) return false; + c1.swap_subtrees(point->p1_crossover_point, c2, point->p2_crossover_point); + +#if BLT_DEBUG_LEVEL >= 2 + if (!c1.check(detail::debug::context_ptr) || !c2.check(detail::debug::context_ptr)) + throw std::runtime_error("Tree check failed"); +#endif + + return true; + } + + std::optional subtree_crossover_t::get_crossover_point(const tree_t& c1, + const tree_t& c2) const + { + const auto first = c1.select_subtree(config.terminal_chance); + const auto second = c2.select_subtree(first.type, config.max_crossover_tries, config.terminal_chance); + + if (!second) + return {}; + + return {{first, *second}}; + } + + std::optional subtree_crossover_t::get_crossover_point_traverse(const tree_t& c1, + const tree_t& c2) const + { + auto c1_point_o = get_point_traverse_retry(c1, {}); + if (!c1_point_o) + return {}; + const auto c2_point_o = get_point_traverse_retry(c2, c1_point_o->type); + if (!c2_point_o) + return {}; + return {{*c1_point_o, *c2_point_o}}; + } + + std::optional subtree_crossover_t::get_point_traverse_retry(const tree_t& t, const std::optional type) const + { + if (type) + return t.select_subtree_traverse(*type, config.max_crossover_tries, config.terminal_chance, config.depth_multiplier); + return t.select_subtree_traverse(config.terminal_chance, config.depth_multiplier); + } + + bool one_point_crossover_t::apply(gp_program& program, const tree_t& p1, const tree_t& p2, tree_t& c1, tree_t& c2) + { + // if (p1.size() < config.min_tree_size || p2.size() < config.min_tree_size) + // return false; + + tree_t::subtree_point_t point1, point2; // NOLINT + if (config.traverse) + { + point1 = p1.select_subtree_traverse(config.terminal_chance, config.depth_multiplier); + if (const auto val = p2.select_subtree_traverse(point1.type, config.max_crossover_tries, config.terminal_chance, config.depth_multiplier)) + point2 = *val; + else + return false; + } else + { + point1 = p1.select_subtree(config.terminal_chance); + if (const auto val = p2.select_subtree(point1.type, config.max_crossover_tries, config.terminal_chance)) + point2 = *val; + else + return false; + } + + const auto& p1_operator = p1.get_operator(point1.pos); + const auto& p2_operator = p2.get_operator(point2.pos); + + const auto& p1_info = program.get_operator_info(p1_operator.id()); + const auto& p2_info = program.get_operator_info(p2_operator.id()); + + struct reorder_index_t + { + size_t index1; + size_t index2; + }; + + struct swap_index_t + { + size_t p1_index; + size_t p2_index; + }; + + thread_local struct type_resolver_t + { + tracked_vector children_data_p1; + tracked_vector children_data_p2; + hashmap_t> missing_p1_types; + hashmap_t> missing_p2_types; + hashset_t correct_types; + hashset_t p1_correct_types; + hashset_t p2_correct_types; + std::vector p1_reorder_types; + std::vector p2_reorder_types; + std::vector swap_types; + std::vector temp_trees; + + void print_missing_types() + { + for (const auto& [id, v] : missing_p1_types) + { + if (!v.empty()) + { + BLT_INFO("(P1) For type {} missing indexes:", id); + for (const auto idx : v) + BLT_INFO("\t{}", idx); + BLT_INFO("----"); + } + } + for (const auto& [id, v] : missing_p2_types) + { + if (!v.empty()) + { + BLT_INFO("(P2) For type {} missing indexes:", id); + for (const auto idx : v) + BLT_INFO("\t{}", idx); + BLT_INFO("----"); + } + } + } + + std::optional get_p1_index(const type_id& id) + { + if (!missing_p1_types.contains(id)) + return {}; + if (missing_p1_types[id].empty()) + return {}; + auto idx = missing_p1_types[id].back(); + missing_p1_types[id].pop_back(); + return idx; + } + + std::optional get_p2_index(const type_id& id) + { + if (!missing_p2_types.contains(id)) + return {}; + if (missing_p2_types[id].empty()) + return {}; + auto idx = missing_p2_types[id].back(); + missing_p2_types[id].pop_back(); + return idx; + } + + [[nodiscard]] bool handled_p1(const size_t index) const + { + return correct_types.contains(index) || p1_correct_types.contains(index); + } + + [[nodiscard]] bool handled_p2(const size_t index) const + { + return correct_types.contains(index) || p2_correct_types.contains(index); + } + + void clear(gp_program& program) + { + children_data_p1.clear(); + children_data_p2.clear(); + correct_types.clear(); + p1_correct_types.clear(); + p2_correct_types.clear(); + p1_reorder_types.clear(); + p2_reorder_types.clear(); + swap_types.clear(); + for (auto& tree : temp_trees) + tree.clear(program); + for (auto& [id, v] : missing_p1_types) + v.clear(); + for (auto& [id, v] : missing_p2_types) + v.clear(); + } + } resolver; + resolver.clear(program); + + auto min_size = std::min(p1_info.argument_types.size(), p2_info.argument_types.size()); + + // resolve type information + for (size_t i = 0; i < min_size; i++) + { + if (p1_info.argument_types[i] != p2_info.argument_types[i]) + { + resolver.missing_p1_types[p1_info.argument_types[i].id].push_back(i); + resolver.missing_p2_types[p2_info.argument_types[i].id].push_back(i); + } else + resolver.correct_types.insert(i); + } + + for (size_t i = min_size; i < p1_info.argument_types.size(); i++) + resolver.missing_p1_types[p1_info.argument_types[i].id].push_back(i); + + for (size_t i = min_size; i < p2_info.argument_types.size(); i++) + resolver.missing_p2_types[p2_info.argument_types[i].id].push_back(i); + + // if swaping p1 -> p2 and p2 -> p1, we may already have the types we need just in a different order + + // first, make a list of types which can simply be reordered + for (size_t i = 0; i < p1_info.argument_types.size(); i++) + { + if (resolver.correct_types.contains(i)) + continue; + if (auto index = resolver.get_p2_index(p1_info.argument_types[i].id)) + { + resolver.p2_reorder_types.push_back({i, *index}); + resolver.p2_correct_types.insert(i); + } + } + + BLT_DEBUG("Operator C1 {} expects types: ", p1_operator.id()); + for (const auto [i, type] : enumerate(p1_info.argument_types)) + BLT_TRACE("{} -> {}", i, type); + BLT_DEBUG("Operator C2 {} expects types: ", p2_operator.id()); + for (const auto [i, type] : enumerate(p2_info.argument_types)) + BLT_TRACE("{} -> {}", i, type); + resolver.print_missing_types(); + + for (size_t i = 0; i < p2_info.argument_types.size(); i++) + { + if (resolver.correct_types.contains(i)) + continue; + if (auto index = resolver.get_p1_index(p2_info.argument_types[i].id)) + { + resolver.p1_reorder_types.push_back({i, *index}); + resolver.p1_correct_types.insert(i); + } + } + + // next we need to figure out which types need to be swapped + for (size_t i = 0; i < p1_info.argument_types.size(); i++) + { + if (resolver.handled_p2(i)) + continue; + if (auto index = resolver.get_p1_index(p1_info.argument_types[i].id)) + resolver.swap_types.push_back({*index, i}); + } + + for (size_t i = 0; i < p2_info.argument_types.size(); i++) + { + if (resolver.handled_p1(i)) + continue; + if (auto index = resolver.get_p2_index(p2_info.argument_types[i].id)) + resolver.swap_types.push_back({i, *index}); + } + + // now we do the swap + p1.find_child_extends(resolver.children_data_p1, point1.pos, p1_info.argument_types.size()); + p2.find_child_extends(resolver.children_data_p2, point2.pos, p2_info.argument_types.size()); + + for (const auto& [index1, index2] : resolver.p1_reorder_types) + { + BLT_DEBUG("Reordering in C1: {} -> {}", index1, index2); + c1.swap_subtrees(resolver.children_data_p1[index1], c1, resolver.children_data_p1[index2]); + } + + for (const auto& [index1, index2] : resolver.p2_reorder_types) + { + BLT_DEBUG("Reordering in C2: {} -> {}", index1, index2); + c2.swap_subtrees(resolver.children_data_p2[index1], c2, resolver.children_data_p2[index2]); + } + + auto c1_insert = resolver.children_data_p1.back().end; + auto c2_insert = resolver.children_data_p2.back().end; + + for (const auto& [p1_index, p2_index] : resolver.swap_types) + { + if (p1_index < p1_info.argument_types.size() && p2_index < p2_info.argument_types.size()) + c1.swap_subtrees(resolver.children_data_p1[p1_index], c2, resolver.children_data_p2[p2_index]); + else if (p1_index < p1_info.argument_types.size() && p2_index >= p2_info.argument_types.size()) + { + BLT_TRACE("(P1 IS UNDER!) Trying to swap P1 {} for P2 {} (Sizes: P1: {} P2: {})", p1_index, p2_index, p1_info.argument_types.size(), p2_info.argument_types.size()); + BLT_TRACE("Inserting into P2 from P1!"); + c1.copy_subtree(resolver.children_data_p1[p1_index], resolver.temp_trees[0]); + c1.delete_subtree(resolver.children_data_p1[p1_index]); + c2_insert = c2.insert_subtree(tree_t::subtree_point_t{c1_insert}, resolver.temp_trees[0]); + } else if (p2_index < p2_info.argument_types.size() && p1_index >= p1_info.argument_types.size()) + { + BLT_TRACE("(P2 IS UNDER!) Trying to swap P1 {} for P2 {} (Sizes: P1: {} P2: {})", p1_index, p2_index, p1_info.argument_types.size(), p2_info.argument_types.size()); + } else + { + BLT_WARN("This should be an impossible state!"); + } + } + + + c1.modify_operator(point1.pos, p2_operator.id(), p2_info.return_type); + c2.modify_operator(point2.pos, p1_operator.id(), p1_info.return_type); + +#if BLT_DEBUG_LEVEL >= 2 + if (!c1.check(detail::debug::context_ptr) || !c2.check(detail::debug::context_ptr)) + throw std::runtime_error("Tree check failed"); +#endif + return true; + } + + bool advanced_crossover_t::apply(gp_program& program, const tree_t& p1, const tree_t& p2, tree_t& c1, tree_t& c2) + { + if (p1.size() < config.min_tree_size || p2.size() < config.min_tree_size) + return false; + // TODO: more crossover! switch (program.get_random().get_u32(0, 2)) { + // single point crossover (only if operators at this point are "compatible") case 0: + { + + // check if can work + // otherwise goto case2 + } + // Mating crossover analogs to same species breeding. Only works if tree is mostly similar case 1: - c1.swap_subtrees(point->p1_crossover_point, c2, point->p2_crossover_point); - break; + { + } + // Subtree crossover, select random points inside trees and swap their subtrees + case 2: + return subtree_crossover_t{}.apply(program, p1, p2, c1, c2); default: #if BLT_DEBUG_LEVEL > 0 BLT_ABORT("This place should be unreachable!"); @@ -99,39 +404,6 @@ namespace blt::gp if (!c1.check(detail::debug::context_ptr) || !c2.check(detail::debug::context_ptr)) throw std::runtime_error("Tree check failed"); #endif - - return true; - } - - std::optional crossover_t::get_crossover_point(const tree_t& c1, - const tree_t& c2) const - { - auto first = c1.select_subtree(config.terminal_chance); - auto second = c2.select_subtree(first.type, config.max_crossover_tries, config.terminal_chance); - - if (!second) - return {}; - - return {{first, *second}}; - } - - std::optional crossover_t::get_crossover_point_traverse(const tree_t& c1, - const tree_t& c2) const - { - auto c1_point_o = get_point_traverse_retry(c1, {}); - if (!c1_point_o) - return {}; - auto c2_point_o = get_point_traverse_retry(c2, c1_point_o->type); - if (!c2_point_o) - return {}; - return {{*c1_point_o, *c2_point_o}}; - } - - std::optional crossover_t::get_point_traverse_retry(const tree_t& t, const std::optional type) const - { - if (type) - return t.select_subtree_traverse(*type, config.max_crossover_tries, config.terminal_chance, config.depth_multiplier); - return t.select_subtree_traverse(config.terminal_chance, config.depth_multiplier); } bool mutation_t::apply(gp_program& program, const tree_t&, tree_t& c) diff --git a/src/tree.cpp b/src/tree.cpp index 7a45418..e549db1 100644 --- a/src/tree.cpp +++ b/src/tree.cpp @@ -293,13 +293,13 @@ namespace blt::gp stack.copy_from(values, for_bytes, after_bytes); } - void tree_t::swap_subtrees(const subtree_point_t our_subtree, tree_t& other_tree, const subtree_point_t other_subtree) + void tree_t::swap_subtrees(const child_t our_subtree, tree_t& other_tree, const child_t other_subtree) { - const auto c1_subtree_begin_itr = operations.begin() + our_subtree.pos; - const auto c1_subtree_end_itr = operations.begin() + find_endpoint(our_subtree.pos); + const auto c1_subtree_begin_itr = operations.begin() + our_subtree.start; + const auto c1_subtree_end_itr = operations.begin() + our_subtree.end; - const auto c2_subtree_begin_itr = other_tree.operations.begin() + other_subtree.pos; - const auto c2_subtree_end_itr = other_tree.operations.begin() + other_tree.find_endpoint(other_subtree.pos); + const auto c2_subtree_begin_itr = other_tree.operations.begin() + other_subtree.start; + const auto c2_subtree_end_itr = other_tree.operations.begin() + other_subtree.end; thread_local tracked_vector c1_subtree_operators; thread_local tracked_vector c2_subtree_operators; @@ -349,8 +349,8 @@ namespace blt::gp const auto copy_ptr_c1 = get_thread_pointer_for_size(c1_total); const auto copy_ptr_c2 = get_thread_pointer_for_size(c2_total); - values.reserve(values.bytes_in_head() - c1_subtree_bytes + c2_subtree_bytes); - other_tree.values.reserve(other_tree.values.bytes_in_head() - c2_subtree_bytes + c1_subtree_bytes); + values.reserve(values.stored() - c1_subtree_bytes + c2_subtree_bytes); + other_tree.values.reserve(other_tree.values.stored() - c2_subtree_bytes + c1_subtree_bytes); values.copy_to(copy_ptr_c1, c1_total); values.pop_bytes(c1_total); @@ -376,6 +376,12 @@ namespace blt::gp other_tree.operations.insert(insert_point_c2, c1_subtree_operators.begin(), c1_subtree_operators.end()); } + void tree_t::swap_subtrees(const subtree_point_t our_subtree, tree_t& other_tree, const subtree_point_t other_subtree) + { + swap_subtrees(child_t{our_subtree.pos, find_endpoint(our_subtree.pos)}, other_tree, + child_t{other_subtree.pos, other_tree.find_endpoint(other_subtree.pos)}); + } + void tree_t::replace_subtree(const subtree_point_t point, const ptrdiff_t extent, tree_t& other_tree) { const auto point_begin_itr = operations.begin() + point.pos; @@ -535,7 +541,7 @@ namespace blt::gp bool tree_t::check(void* context) const { size_t bytes_expected = 0; - const auto bytes_size = values.size().total_used_bytes; + const auto bytes_size = values.stored(); for (const auto& op : operations) { @@ -545,7 +551,7 @@ namespace blt::gp if (bytes_expected != bytes_size) { - BLT_ERROR("Stack state: {}", values.size()); + BLT_ERROR("Stack state: Stored: {}; Capacity: {}; Remainder: {}", values.stored(), values.capacity(), values.remainder()); BLT_ERROR("Child tree bytes {} vs expected {}, difference: {}", bytes_size, bytes_expected, static_cast(bytes_expected) - static_cast(bytes_size)); BLT_ERROR("Amount of bytes in stack doesn't match the number of bytes expected for the operations"); @@ -580,7 +586,7 @@ namespace blt::gp total_produced += m_program->get_typesystem().get_type(info.return_type).size(); } - const auto v1 = results.values.bytes_in_head(); + const auto v1 = static_cast(results.values.stored()); const auto v2 = static_cast(operations.front().type_size()); // ephemeral don't need to be dropped as there are no copies which matter when checking the tree @@ -668,6 +674,91 @@ namespace blt::gp return {point, m_program->get_operator_info(operations[point].id()).return_type}; } + size_t tree_t::required_size() const + { + // 2 size_t used to store expected_length of operations + size of the values stack + return 2 * sizeof(size_t) + operations.size() * sizeof(size_t) + values.stored(); + } + + void tree_t::to_byte_array(std::byte* out) const + { + const auto op_size = operations.size(); + std::memcpy(out, &op_size, sizeof(size_t)); + out += sizeof(size_t); + for (const auto& op : operations) + { + constexpr auto size_of_op = sizeof(operator_id); + auto id = op.id(); + std::memcpy(out, &id, size_of_op); + out += size_of_op; + } + const auto val_size = values.stored(); + std::memcpy(out, &val_size, sizeof(size_t)); + out += sizeof(size_t); + std::memcpy(out, values.data(), val_size); + } + + void tree_t::to_file(fs::writer_t& file) const + { + const auto op_size = operations.size(); + BLT_ASSERT(file.write(&op_size, sizeof(size_t)) == sizeof(size_t)); + for (const auto& op : operations) + { + auto id = op.id(); + file.write(&id, sizeof(operator_id)); + } + const auto val_size = values.stored(); + BLT_ASSERT(file.write(&val_size, sizeof(size_t)) == sizeof(size_t)); + BLT_ASSERT(file.write(values.data(), val_size) == static_cast(val_size)); + } + + void tree_t::from_byte_array(const std::byte* in) + { + size_t ops_to_read; + std::memcpy(&ops_to_read, in, sizeof(size_t)); + in += sizeof(size_t); + operations.reserve(ops_to_read); + for (size_t i = 0; i < ops_to_read; i++) + { + operator_id id; + std::memcpy(&id, in, sizeof(operator_id)); + in += sizeof(operator_id); + operations.emplace_back( + m_program->get_typesystem().get_type(m_program->get_operator_info(id).return_type).size(), + id, + m_program->is_operator_ephemeral(id), + m_program->get_operator_flags(id) + ); + } + size_t val_size; + std::memcpy(&val_size, in, sizeof(size_t)); + in += sizeof(size_t); + // TODO replace instances of u8 that are used to alias types with the proper std::byte + values.copy_from(reinterpret_cast(in), val_size); + } + + void tree_t::from_file(fs::reader_t& file) + { + size_t ops_to_read; + BLT_ASSERT(file.read(&ops_to_read, sizeof(size_t)) == sizeof(size_t)); + operations.reserve(ops_to_read); + for (size_t i = 0; i < ops_to_read; i++) + { + operator_id id; + BLT_ASSERT(file.read(&id, sizeof(operator_id)) == sizeof(operator_id)); + operations.emplace_back( + m_program->get_typesystem().get_type(m_program->get_operator_info(id).return_type).size(), + id, + m_program->is_operator_ephemeral(id), + m_program->get_operator_flags(id) + ); + } + size_t bytes_in_head; + BLT_ASSERT(file.read(&bytes_in_head, sizeof(size_t)) == sizeof(size_t)); + values.resize(bytes_in_head); + BLT_ASSERT(file.read(values.data(), bytes_in_head) == static_cast(bytes_in_head)); + } + void tree_t::modify_operator(const size_t point, operator_id new_id, std::optional return_type) { if (!return_type) @@ -702,4 +793,23 @@ namespace blt::gp handle_operator_inserted(operations[point]); } } + + bool operator==(const tree_t& a, const tree_t& b) + { + if (a.operations.size() != b.operations.size()) + return false; + if (a.values.stored() != b.values.stored()) + return false; + return std::equal(a.operations.begin(), a.operations.end(), b.operations.begin()); + } + + bool operator==(const op_container_t& a, const op_container_t& b) + { + return a.id() == b.id(); + } + + bool operator==(const individual_t& a, const individual_t& b) + { + return a.tree == b.tree; + } } diff --git a/tests/serialization_test.cpp b/tests/serialization_test.cpp new file mode 100644 index 0000000..4ff77e9 --- /dev/null +++ b/tests/serialization_test.cpp @@ -0,0 +1,171 @@ +/* + * + * Copyright (C) 2025 Brett Terpstra + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ +#include + +#include "../examples/symbolic_regression.h" +#include +#include +#include +#include +#include +#include + +using namespace blt::gp; + +struct context +{ + float x, y; +}; + +prog_config_t config = prog_config_t() + .set_initial_min_tree_size(2) + .set_initial_max_tree_size(6) + .set_elite_count(2) + .set_crossover_chance(0.8) + .set_mutation_chance(0.1) + .set_reproduction_chance(0.1) + .set_max_generations(50) + .set_pop_size(500) + .set_thread_count(1); + + +example::symbolic_regression_t regression{691ul, config}; + +operation_t addf{[](const float a, const float b) { return a + b; }, "addf"}; +operation_t subf([](const float a, const float b) { return a - b; }, "subf"); +operation_t mulf([](const float a, const float b) { return a * b; }, "mulf"); +operation_t pro_divf([](const float a, const float b) { return b == 0.0f ? 0.0f : a / b; }, "divf"); +operation_t op_sinf([](const float a) { return std::sin(a); }, "sinf"); +operation_t op_cosf([](const float a) { return std::cos(a); }, "cosf"); +operation_t op_expf([](const float a) { return std::exp(a); }, "expf"); +operation_t op_logf([](const float a) { return a <= 0.0f ? 0.0f : std::log(a); }, "logf"); + +auto litf = operation_t([]() +{ + return regression.get_program().get_random().get_float(-1.0f, 1.0f); +}, "litf").set_ephemeral(); + +operation_t op_xf([](const context& context) +{ + return context.x; +}, "xf"); + +bool fitness_function(const tree_t& current_tree, fitness_t& fitness, size_t) +{ + constexpr static double value_cutoff = 1.e15; + for (auto& fitness_case : regression.get_training_cases()) + { + BLT_GP_UPDATE_CONTEXT(fitness_case); + auto val = current_tree.get_evaluation_ref(fitness_case); + const auto diff = std::abs(fitness_case.y - val.get()); + if (diff < value_cutoff) + { + fitness.raw_fitness += diff; + if (diff <= 0.01) + fitness.hits++; + } + else + fitness.raw_fitness += value_cutoff; + } + fitness.standardized_fitness = fitness.raw_fitness; + fitness.adjusted_fitness = (1.0 / (1.0 + fitness.standardized_fitness)); + return static_cast(fitness.hits) == regression.get_training_cases().size(); +} + +int main() +{ + operator_builder builder{}; + const auto& operators = builder.build(addf, subf, mulf, pro_divf, op_sinf, op_cosf, op_expf, op_logf, litf, op_xf); + regression.get_program().set_operations(operators); + + auto& program = regression.get_program(); + static auto sel = select_tournament_t{}; + + gp_program test_program{691}; + test_program.set_operations(operators); + test_program.setup_generational_evaluation(fitness_function, sel, sel, sel, false); + + // simulate a program which is similar but incompatible with the other programs. + operator_builder builder2{}; + gp_program bad_program{691}; + bad_program.set_operations(builder2.build(addf, subf, mulf, op_sinf, op_cosf, litf, op_xf)); + bad_program.setup_generational_evaluation(fitness_function, sel, sel, sel, false); + + program.generate_initial_population(program.get_typesystem().get_type().id()); + program.setup_generational_evaluation(fitness_function, sel, sel, sel); + while (!program.should_terminate()) + { + BLT_TRACE("---------------\\{Begin Generation {}}---------------", program.get_current_generation()); + BLT_TRACE("Creating next generation"); + program.create_next_generation(); + BLT_TRACE("Move to next generation"); + program.next_generation(); + BLT_TRACE("Evaluate Fitness"); + program.evaluate_fitness(); + { + std::ofstream stream{"serialization_test.data", std::ios::binary | std::ios::trunc}; + blt::fs::fstream_writer_t writer{stream}; + program.save_generation(writer); + } + { + std::ifstream stream{"serialization_test.data", std::ios::binary}; + blt::fs::fstream_reader_t reader{stream}; + test_program.load_generation(reader); + } + // do a quick validation check + for (const auto& [saved, loaded] : blt::zip(program.get_current_pop(), test_program.get_current_pop())) + { + if (saved.tree != loaded.tree) + { + BLT_ERROR("Serializer Failed to correctly serialize tree to disk, trees are not equal!"); + std::exit(1); + } + } + } + { + std::ofstream stream{"serialization_test2.data", std::ios::binary | std::ios::trunc}; + blt::fs::fstream_writer_t writer{stream}; + program.save_state(writer); + } + { + std::ifstream stream{"serialization_test2.data", std::ios::binary}; + blt::fs::fstream_reader_t reader{stream}; + if (auto error = test_program.load_state(reader)) + { + BLT_ERROR("Error: {}", error->call_member(&errors::serialization::error_to_string_t::to_string)); + BLT_ABORT("Expected program to succeeded without returning an error state!"); + } + + for (const auto [saved, loaded] : blt::zip(program.get_stats_histories(), test_program.get_stats_histories())) + { + if (saved != loaded) + { + BLT_ERROR("Serializer Failed to correctly serialize histories to disk, histories are not equal!"); + std::exit(1); + } + } + } + { + std::ifstream stream{"serialization_test2.data", std::ios::binary}; + blt::fs::fstream_reader_t reader{stream}; + if (!bad_program.load_state(reader)) + { + BLT_ABORT("Expected program to throw an exception when parsing state data into an incompatible program!"); + } + } +}