#pragma once /* * Copyright (C) 2024 Brett Terpstra * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program. If not, see . */ #ifndef BLT_GP_PROGRAM_H #define BLT_GP_PROGRAM_H #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "blt/format/format.h" namespace blt::gp { struct argc_t { blt::u32 argc = 0; blt::u32 argc_context = 0; [[nodiscard]] bool is_terminal() const { return argc == 0; } }; struct operator_info_t { // types of the arguments tracked_vector argument_types; // return type of this operator type_id return_type; // number of arguments for this operator argc_t argc; // per operator function callable (slow) detail::operator_func_t func; }; struct operator_metadata_t { blt::size_t arg_size_bytes = 0; blt::size_t return_size_bytes = 0; argc_t argc{}; }; struct program_operator_storage_t { // indexed from return TYPE ID, returns index of operator blt::expanding_buffer> terminals; blt::expanding_buffer> non_terminals; blt::expanding_buffer>> operators_ordered_terminals; // indexed from OPERATOR ID (operator number) blt::hashset_t ephemeral_leaf_operators; tracked_vector operators; tracked_vector operator_metadata; tracked_vector print_funcs; tracked_vector destroy_funcs; tracked_vector> names; detail::eval_func_t eval_func; type_provider system; }; template class operator_builder { friend class gp_program; friend class blt::gp::detail::operator_storage_test; public: explicit operator_builder() = default; template program_operator_storage_t& build(Operators& ... operators) { blt::size_t largest_args = 0; blt::size_t largest_returns = 0; blt::u32 largest_argc = 0; operator_metadata_t meta; ((meta = add_operator(operators), largest_argc = std::max(meta.argc.argc, largest_argc), largest_args = std::max(meta.arg_size_bytes, largest_args), largest_returns = std::max(meta.return_size_bytes, largest_returns)), ...); // largest = largest * largest_argc; blt::size_t largest = largest_args * largest_argc * largest_returns * largest_argc; storage.eval_func = [&operators..., largest](const tree_t& tree, void* context) -> evaluation_context& { const auto& ops = tree.get_operations(); const auto& vals = tree.get_values(); static thread_local evaluation_context results{}; results.values.reset(); results.values.reserve(largest); blt::size_t total_so_far = 0; blt::size_t op_pos = 0; for (const auto& operation : blt::reverse_iterate(ops.begin(), ops.end())) { op_pos++; if (operation.is_value) { total_so_far += stack_allocator::aligned_size(operation.type_size); results.values.copy_from(vals.from(total_so_far), stack_allocator::aligned_size(operation.type_size)); continue; } call_jmp_table(operation.id, context, results.values, results.values, operators...); } return results; }; blt::hashset_t has_terminals; for (const auto& v : blt::enumerate(storage.terminals)) { if (!v.second.empty()) has_terminals.insert(v.first); } for (const auto& op_r : blt::enumerate(storage.non_terminals)) { if (op_r.second.empty()) continue; auto return_type = op_r.first; tracked_vector> ordered_terminals; for (const auto& op : op_r.second) { // count number of terminals blt::size_t terminals = 0; for (const auto& type : storage.operators[op].argument_types) { if (has_terminals.contains(type)) terminals++; } ordered_terminals.emplace_back(op, terminals); } bool found_terminal_inputs = false; bool matches_argc = false; for (const auto& terms : ordered_terminals) { if (terms.second == storage.operators[terms.first].argc.argc) matches_argc = true; if (terms.second != 0) found_terminal_inputs = true; if (matches_argc && found_terminal_inputs) break; } if (!found_terminal_inputs) BLT_ABORT(("Failed to find function with terminal arguments for return type " + std::to_string(return_type)).c_str()); if (!matches_argc) { BLT_ABORT(("Failed to find a function which purely translates types " "(that is all input types are terminals) for return type " + std::to_string(return_type)).c_str()); } std::sort(ordered_terminals.begin(), ordered_terminals.end(), [](const auto& a, const auto& b) { return a.second > b.second; }); auto first_size = *ordered_terminals.begin(); auto iter = ordered_terminals.begin(); while (++iter != ordered_terminals.end() && iter->second == first_size.second) {} ordered_terminals.erase(iter, ordered_terminals.end()); storage.operators_ordered_terminals[return_type] = ordered_terminals; } return storage; } program_operator_storage_t&& grab() { return std::move(storage); } private: template auto add_operator(operation_t& op) { // check for types we can register (storage.system.register_type(), ...); storage.system.register_type(); auto return_type_id = storage.system.get_type().id(); auto operator_id = blt::gp::operator_id(storage.operators.size()); op.id = operator_id; operator_info_t info; if constexpr (sizeof...(Args) > 0) { (add_non_context_argument>(info.argument_types), ...); } info.argc.argc_context = info.argc.argc = sizeof...(Args); info.return_type = return_type_id; info.func = op.template make_callable(); ((std::is_same_v, Context> ? info.argc.argc -= 1 : (blt::size_t) nullptr), ...); auto& operator_list = info.argc.argc == 0 ? storage.terminals : storage.non_terminals; operator_list[return_type_id].push_back(operator_id); BLT_ASSERT(info.argc.argc_context - info.argc.argc <= 1 && "Cannot pass multiple context as arguments!"); storage.operators.push_back(info); operator_metadata_t meta; if constexpr (sizeof...(Args) != 0) { meta.arg_size_bytes = (stack_allocator::aligned_size(sizeof(Args)) + ...); } meta.return_size_bytes = sizeof(Return); meta.argc = info.argc; storage.operator_metadata.push_back(meta); storage.print_funcs.push_back([&op](std::ostream& out, stack_allocator& stack) { if constexpr (blt::meta::is_streamable_v) { out << stack.from(0); (void) (op); // remove warning } else { out << "[Printing Value on '" << (op.get_name() ? *op.get_name() : "") << "' Not Supported!]"; } }); storage.destroy_funcs.push_back([](detail::destroy_t type, stack_allocator& alloc) { switch (type) { case detail::destroy_t::ARGS: alloc.call_destructors(); break; case detail::destroy_t::RETURN: if constexpr (detail::has_func_drop_v>) { alloc.from>(0).drop(); } break; } }); storage.names.push_back(op.get_name()); if (op.is_ephemeral()) storage.ephemeral_leaf_operators.insert(operator_id); return meta; } template void add_non_context_argument(decltype(operator_info_t::argument_types)& types) { if constexpr (!std::is_same_v>) { types.push_back(storage.system.get_type().id()); } } template static inline void execute(void* context, stack_allocator& write_stack, stack_allocator& read_stack, Operator& operation) { if constexpr (std::is_same_v, Context>) { write_stack.push(operation(context, read_stack)); } else { write_stack.push(operation(read_stack)); } } template static inline bool call(blt::size_t op, void* context, stack_allocator& write_stack, stack_allocator& read_stack, Operator& operation) { if (id == op) { execute(context, write_stack, read_stack, operation); return false; } return true; } template static inline void call_jmp_table_internal(size_t op, void* context, stack_allocator& write_stack, stack_allocator& read_stack, std::integer_sequence, Operators& ... operators) { if (op >= sizeof...(operator_ids)) { BLT_UNREACHABLE; } (call(op, context, write_stack, read_stack, operators) && ...); } template static inline void call_jmp_table(size_t op, void* context, stack_allocator& write_stack, stack_allocator& read_stack, Operators& ... operators) { call_jmp_table_internal(op, context, write_stack, read_stack, std::index_sequence_for(), operators...); } program_operator_storage_t storage; }; class gp_program { public: /** * Note about context size: This is required as context is passed to every operator in the GP tree, this context will be provided by your * call to one of the evaluator functions. This was the nicest way to provide this as C++ lacks reflection * * @param engine random engine to use throughout the program. * @param context_size number of arguments which are always present as "context" to the GP system / operators */ explicit gp_program(blt::u64 seed): seed_func([seed] { return seed; }) { create_threads(); } explicit gp_program(blt::u64 seed, prog_config_t config): seed_func([seed] { return seed; }), config(config) { create_threads(); } explicit gp_program(std::function seed_func): seed_func(std::move(seed_func)) { create_threads(); } explicit gp_program(std::function seed_func, prog_config_t config): seed_func(std::move(seed_func)), config(config) { create_threads(); } ~gp_program() { thread_helper.lifetime_over = true; thread_helper.barrier.notify_all(); thread_helper.thread_function_condition.notify_all(); for (auto& thread : thread_helper.threads) { if (thread->joinable()) thread->join(); } } void create_next_generation() { #ifdef BLT_TRACK_ALLOCATIONS auto gen_alloc = blt::gp::tracker.start_measurement(); #endif // should already be empty thread_helper.next_gen_left.store(config.population_size, std::memory_order_release); (*thread_execution_service)(0); #ifdef BLT_TRACK_ALLOCATIONS blt::gp::tracker.stop_measurement(gen_alloc); gen_alloc.pretty_print("Generation"); #endif } void next_generation() { std::swap(current_pop, next_pop); current_generation++; } void evaluate_fitness() { #ifdef BLT_TRACK_ALLOCATIONS auto fitness_alloc = blt::gp::tracker.start_measurement(); #endif evaluate_fitness_internal(); #ifdef BLT_TRACK_ALLOCATIONS blt::gp::tracker.stop_measurement(fitness_alloc); fitness_alloc.pretty_print("Fitness"); evaluation_calls.call(); evaluation_calls.set_value(std::max(evaluation_calls.get_value(), fitness_alloc.getAllocatedByteDifference())); if (fitness_alloc.getAllocatedByteDifference() > 0) { evaluation_allocations.call(fitness_alloc.getAllocatedByteDifference()); } #endif } void reset_program(type_id root_type, bool eval_fitness_now = true) { current_generation = 0; current_pop = config.pop_initializer.get().generate( {*this, root_type, config.population_size, config.initial_min_tree_size, config.initial_max_tree_size}); next_pop = population_t(current_pop); BLT_ASSERT_MSG(current_pop.get_individuals().size() == config.population_size, ("cur pop size: " + std::to_string(current_pop.get_individuals().size())).c_str()); BLT_ASSERT_MSG(next_pop.get_individuals().size() == config.population_size, ("next pop size: " + std::to_string(next_pop.get_individuals().size())).c_str()); if (eval_fitness_now) evaluate_fitness_internal(); } void kill() { thread_helper.lifetime_over = true; } /** * takes in a reference to a function for the fitness evaluation function (must return a value convertable to double) * The lambda must accept a tree for evaluation, and an index (current tree) * * tree_t& current_tree, blt::size_t index_of_tree * * Container must be concurrently accessible from multiple threads using operator[] * * NOTE: 0 is considered the best, in terms of standardized fitness */ template)> void generate_population(type_id root_type, FitnessFunc& fitness_function, Crossover& crossover_selection, Mutation& mutation_selection, Reproduction& reproduction_selection, CreationFunc& func = default_next_pop_creator, bool eval_fitness_now = true) { using LambdaReturn = typename decltype(blt::meta::lambda_helper(fitness_function))::Return; current_pop = config.pop_initializer.get().generate( {*this, root_type, config.population_size, config.initial_min_tree_size, config.initial_max_tree_size}); next_pop = population_t(current_pop); BLT_ASSERT_MSG(current_pop.get_individuals().size() == config.population_size, ("cur pop size: " + std::to_string(current_pop.get_individuals().size())).c_str()); BLT_ASSERT_MSG(next_pop.get_individuals().size() == config.population_size, ("next pop size: " + std::to_string(next_pop.get_individuals().size())).c_str()); if (config.threads == 1) { BLT_INFO("Starting with single thread variant!"); thread_execution_service = std::unique_ptr>(new std::function( [this, &fitness_function, &crossover_selection, &mutation_selection, &reproduction_selection, &func](blt::size_t) { if (thread_helper.evaluation_left > 0) { current_stats.normalized_fitness.clear(); double sum_of_prob = 0; for (const auto& [index, ind] : blt::enumerate(current_pop.get_individuals())) { if constexpr (std::is_same_v || std::is_convertible_v) { auto result = fitness_function(ind.tree, ind.fitness, index); if (result) fitness_should_exit = true; } else fitness_function(ind.tree, ind.fitness, index); if (ind.fitness.adjusted_fitness > current_stats.best_fitness) current_stats.best_fitness = ind.fitness.adjusted_fitness; if (ind.fitness.adjusted_fitness < current_stats.worst_fitness) current_stats.worst_fitness = ind.fitness.adjusted_fitness; current_stats.overall_fitness = current_stats.overall_fitness + ind.fitness.adjusted_fitness; } for (auto& ind : current_pop) { auto prob = (ind.fitness.adjusted_fitness / current_stats.overall_fitness); current_stats.normalized_fitness.push_back(sum_of_prob + prob); sum_of_prob += prob; } thread_helper.evaluation_left = 0; } if (thread_helper.next_gen_left > 0) { auto args = get_selector_args(); crossover_selection.pre_process(*this, current_pop); mutation_selection.pre_process(*this, current_pop); reproduction_selection.pre_process(*this, current_pop); blt::size_t start = perform_elitism(args, next_pop); while (start < config.population_size) { tree_t& c1 = next_pop.get_individuals()[start].tree; tree_t* c2 = nullptr; if (start + 1 < config.population_size) c2 = &next_pop.get_individuals()[start + 1].tree; start += func(args, crossover_selection, mutation_selection, reproduction_selection, c1, c2); } thread_helper.next_gen_left = 0; } })); } else { BLT_INFO("Starting thread execution service!"); std::scoped_lock lock(thread_helper.thread_function_control); thread_execution_service = std::unique_ptr>(new std::function( [this, &fitness_function, &crossover_selection, &mutation_selection, &reproduction_selection, &func](blt::size_t id) { thread_helper.barrier.wait(); if (thread_helper.evaluation_left > 0) { while (thread_helper.evaluation_left > 0) { blt::size_t size = 0; blt::size_t begin = 0; blt::size_t end = thread_helper.evaluation_left.load(std::memory_order_relaxed); do { size = std::min(end, config.evaluation_size); begin = end - size; } while (!thread_helper.evaluation_left.compare_exchange_weak(end, end - size, std::memory_order::memory_order_relaxed, std::memory_order::memory_order_relaxed)); for (blt::size_t i = begin; i < end; i++) { auto& ind = current_pop.get_individuals()[i]; if constexpr (std::is_same_v || std::is_convertible_v) { auto result = fitness_function(ind.tree, ind.fitness, i); if (result) fitness_should_exit = true; } else { fitness_function(ind.tree, ind.fitness, i); } auto old_best = current_stats.best_fitness.load(std::memory_order_relaxed); while (ind.fitness.adjusted_fitness > old_best && !current_stats.best_fitness.compare_exchange_weak(old_best, ind.fitness.adjusted_fitness, std::memory_order_relaxed, std::memory_order_relaxed)); auto old_worst = current_stats.worst_fitness.load(std::memory_order_relaxed); while (ind.fitness.adjusted_fitness < old_worst && !current_stats.worst_fitness.compare_exchange_weak(old_worst, ind.fitness.adjusted_fitness, std::memory_order_relaxed, std::memory_order_relaxed)); auto old_overall = current_stats.overall_fitness.load(std::memory_order_relaxed); while (!current_stats.overall_fitness.compare_exchange_weak(old_overall, ind.fitness.adjusted_fitness + old_overall, std::memory_order_relaxed, std::memory_order_relaxed)); } } } if (thread_helper.next_gen_left > 0) { auto args = get_selector_args(); if (id == 0) { current_stats.normalized_fitness.clear(); double sum_of_prob = 0; for (auto& ind : current_pop) { auto prob = (ind.fitness.adjusted_fitness / current_stats.overall_fitness); current_stats.normalized_fitness.push_back(sum_of_prob + prob); sum_of_prob += prob; } crossover_selection.pre_process(*this, current_pop); if (&crossover_selection != &mutation_selection) mutation_selection.pre_process(*this, current_pop); if (&crossover_selection != &reproduction_selection) reproduction_selection.pre_process(*this, current_pop); auto elite_amount = perform_elitism(args, next_pop); thread_helper.next_gen_left -= elite_amount; } thread_helper.barrier.wait(); while (thread_helper.next_gen_left > 0) { blt::size_t size = 0; blt::size_t begin = 0; blt::size_t end = thread_helper.next_gen_left.load(std::memory_order_relaxed); do { size = std::min(end, config.evaluation_size); begin = end - size; } while (!thread_helper.next_gen_left.compare_exchange_weak(end, end - size, std::memory_order::memory_order_relaxed, std::memory_order::memory_order_relaxed)); while (begin != end) { auto index = config.elites + begin; tree_t& c1 = next_pop.get_individuals()[index].tree; tree_t* c2 = nullptr; if (begin + 1 < end) c2 = &next_pop.get_individuals()[index + 1].tree; begin += func(args, crossover_selection, mutation_selection, reproduction_selection, c1, c2); } } } thread_helper.barrier.wait(); })); thread_helper.thread_function_condition.notify_all(); } if (eval_fitness_now) evaluate_fitness_internal(); } [[nodiscard]] bool should_terminate() const { return current_generation >= config.max_generations || fitness_should_exit; } [[nodiscard]] bool should_thread_terminate() const { return thread_helper.lifetime_over; } inline operator_id select_terminal(type_id id) { // we wanted a terminal, but could not find one, so we will select from a function that has a terminal if (storage.terminals[id].empty()) return select_non_terminal_too_deep(id); return get_random().select(storage.terminals[id]); } inline operator_id select_non_terminal(type_id id) { // non-terminal doesn't exist, return a terminal. This is useful for types that are defined only to have a random value, nothing more. // was considering an std::optional<> but that would complicate the generator code considerably. I'll mark this as a TODO for v2 if (storage.non_terminals[id].empty()) return select_terminal(id); return get_random().select(storage.non_terminals[id]); } inline operator_id select_non_terminal_too_deep(type_id id) { // this should probably be an error. if (storage.operators_ordered_terminals[id].empty()) BLT_ABORT("An impossible state has been reached. Please consult the manual. Error 43"); return get_random().select(storage.operators_ordered_terminals[id]).first; } inline auto& get_current_pop() { return current_pop; } [[nodiscard]] random_t& get_random() const; [[nodiscard]] inline type_provider& get_typesystem() { return storage.system; } [[nodiscard]] inline operator_info_t& get_operator_info(operator_id id) { return storage.operators[id]; } [[nodiscard]] inline detail::print_func_t& get_print_func(operator_id id) { return storage.print_funcs[id]; } [[nodiscard]] inline detail::destroy_func_t& get_destroy_func(operator_id id) { return storage.destroy_funcs[id]; } [[nodiscard]] inline std::optional get_name(operator_id id) { return storage.names[id]; } [[nodiscard]] inline tracked_vector& get_type_terminals(type_id id) { return storage.terminals[id]; } [[nodiscard]] inline tracked_vector& get_type_non_terminals(type_id id) { return storage.non_terminals[id]; } [[nodiscard]] inline detail::eval_func_t& get_eval_func() { return storage.eval_func; } [[nodiscard]] inline auto get_current_generation() const { return current_generation.load(); } [[nodiscard]] inline const auto& get_population_stats() const { return current_stats; } [[nodiscard]] inline bool is_operator_ephemeral(operator_id id) { return storage.ephemeral_leaf_operators.contains(static_cast(id)); } inline void set_operations(program_operator_storage_t op) { storage = std::move(op); } template std::array get_best_indexes() { std::array arr; tracked_vector> values; values.reserve(current_pop.get_individuals().size()); for (const auto& ind : blt::enumerate(current_pop.get_individuals())) values.emplace_back(ind.first, ind.second.fitness.adjusted_fitness); std::sort(values.begin(), values.end(), [](const auto& a, const auto& b) { return a.second > b.second; }); for (blt::size_t i = 0; i < std::min(size, config.population_size); i++) arr[i] = values[i].first; for (blt::size_t i = std::min(size, config.population_size); i < size; i++) arr[i] = 0; return arr; } template auto get_best_trees() { return convert_array, size>>(get_best_indexes(), [this](auto&& arr, blt::size_t index) -> tree_t& { return current_pop.get_individuals()[arr[index]].tree; }, std::make_integer_sequence()); } template auto get_best_individuals() { return convert_array, size>>(get_best_indexes(), [this](auto&& arr, blt::size_t index) -> individual_t& { return current_pop.get_individuals()[arr[index]]; }, std::make_integer_sequence()); } private: inline selector_args get_selector_args() { return {*this, current_pop, current_stats, config, get_random()}; } template inline Return convert_array(std::array&& arr, Accessor&& accessor, std::integer_sequence) { return Return{accessor(arr, indexes)...}; } void create_threads(); void evaluate_fitness_internal() { statistic_history.push_back(current_stats); current_stats.clear(); thread_helper.evaluation_left.store(config.population_size, std::memory_order_release); (*thread_execution_service)(0); current_stats.average_fitness = current_stats.overall_fitness / static_cast(config.population_size); } private: program_operator_storage_t storage; std::function seed_func; prog_config_t config{}; population_t current_pop; population_t next_pop; std::atomic_uint64_t current_generation = 0; std::atomic_bool fitness_should_exit = false; population_stats current_stats{}; tracked_vector statistic_history; struct concurrency_storage { std::vector> threads; std::mutex thread_function_control{}; std::condition_variable thread_function_condition{}; std::atomic_uint64_t evaluation_left = 0; std::atomic_uint64_t next_gen_left = 0; std::atomic_bool lifetime_over = false; blt::barrier barrier; explicit concurrency_storage(blt::size_t threads): barrier(threads, lifetime_over) {} } thread_helper{config.threads == 0 ? std::thread::hardware_concurrency() : config.threads}; std::unique_ptr> thread_execution_service = nullptr; }; } #endif //BLT_GP_PROGRAM_H