diff --git a/CMakeLists.txt b/CMakeLists.txt index c0b4d2f..cc8684f 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1,5 +1,5 @@ cmake_minimum_required(VERSION 3.25) -project(blt-gp VERSION 0.0.58) +project(blt-gp VERSION 0.0.59) include(CTest) @@ -79,5 +79,6 @@ if (${BUILD_EXAMPLES}) blt_add_example(blt-gp5 examples/gp_test_5.cpp) blt_add_example(blt-gp6 examples/gp_test_6.cpp) blt_add_example(blt-gp7 examples/gp_test_7.cpp) + blt_add_example(blt-symbolic-regression examples/gp_symbolic_regression_example.cpp) endif () \ No newline at end of file diff --git a/examples/gp_symbolic_regression_example.cpp b/examples/gp_symbolic_regression_example.cpp new file mode 100644 index 0000000..8751b1b --- /dev/null +++ b/examples/gp_symbolic_regression_example.cpp @@ -0,0 +1,130 @@ +/* + * + * Copyright (C) 2024 Brett Terpstra + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ +#include +#include +#include +#include + +static constexpr long SEED = 41912; + +struct context +{ + float x, y; +}; + +std::array fitness_cases; + +blt::gp::prog_config_t config = blt::gp::prog_config_t() + .set_initial_min_tree_size(2) + .set_initial_max_tree_size(6) + .set_elite_count(0) + .set_max_generations(50) + .set_pop_size(500); + +blt::gp::type_provider type_system; +blt::gp::gp_program program(type_system, blt::gp::random_t{SEED}, config); // NOLINT + +blt::gp::operation_t add([](float a, float b) { return a + b; }, "add"); +blt::gp::operation_t sub([](float a, float b) { return a - b; }, "sub"); +blt::gp::operation_t mul([](float a, float b) { return a * b; }, "mul"); +blt::gp::operation_t pro_div([](float a, float b) { return b == 0.0f ? 1.0f : a / b; }, "div"); +blt::gp::operation_t op_sin([](float a) { return std::sin(a); }, "sin"); +blt::gp::operation_t op_cos([](float a) { return std::cos(a); }, "cos"); +blt::gp::operation_t op_exp([](float a) { return std::exp(a); }, "exp"); +blt::gp::operation_t op_log([](float a) { return a == 0.0f ? 0.0f : std::log(a); }, "log"); + +blt::gp::operation_t lit([]() { + return program.get_random().get_float(-320.0f, 320.0f); +}, "lit"); +blt::gp::operation_t op_x([](const context& context) { + return context.x; +}, "x"); + +constexpr auto fitness_function = [](blt::gp::tree_t& current_tree, blt::gp::fitness_t& fitness, blt::size_t index) { + constexpr double value_cutoff = 1.e15; + for (auto& fitness_case : fitness_cases) + { + auto diff = std::abs(fitness_case.y - current_tree.get_evaluation_value(&fitness_case)); + if (diff < value_cutoff) + { + fitness.raw_fitness += diff; + if (diff < 0.01) + fitness.hits++; + } else + fitness.raw_fitness += value_cutoff; + } + fitness.standardized_fitness = fitness.raw_fitness; + fitness.adjusted_fitness = 1.0 / (1.0 + fitness.standardized_fitness); + //BLT_TRACE("fitness: %lf raw: %lf", fitness.adjusted_fitness, fitness.raw_fitness); +}; + +float example_function(float x) +{ + return x * x * x * x + x * x * x + x * x + x; +} + +int main() +{ + for (auto& fitness_case : fitness_cases) + { + constexpr float range = 10; + constexpr float half_range = range / 2.0; + auto x = program.get_random().get_float(-half_range, half_range); + auto y = example_function(x); + fitness_case = {x, y}; + } + + type_system.register_type(); + + blt::gp::operator_builder builder{type_system}; + builder.add_operator(add); + builder.add_operator(sub); + builder.add_operator(mul); + builder.add_operator(pro_div); + builder.add_operator(op_sin); + builder.add_operator(op_cos); + builder.add_operator(op_exp); + builder.add_operator(op_log); + + builder.add_operator(lit, true); + builder.add_operator(op_x); + + program.set_operations(builder.build()); + + program.generate_population(type_system.get_type().id(), fitness_function); + + while (!program.should_terminate()) + { + program.create_next_generation(blt::gp::select_fitness_proportionate_t{}, blt::gp::select_fitness_proportionate_t{}, blt::gp::select_fitness_proportionate_t{}); + program.next_generation(); + program.evaluate_fitness(); + } + + auto best = program.get_best_individuals<3>(); + + BLT_INFO("Best approximations:"); + for (auto& i_ref : best) + { + auto& i = i_ref.get(); + BLT_DEBUG("Fitness: %lf, stand: %lf, raw: %lf", i.fitness.adjusted_fitness, i.fitness.standardized_fitness, i.fitness.raw_fitness); + i.tree.print(program, std::cout); + std::cout << "\n"; + } + + return 0; +} \ No newline at end of file diff --git a/examples/gp_test_7.cpp b/examples/gp_test_7.cpp index c60a91b..89c462a 100644 --- a/examples/gp_test_7.cpp +++ b/examples/gp_test_7.cpp @@ -16,14 +16,12 @@ * along with this program. If not, see . */ #include -#include #include #include -#include static constexpr long SEED = 41912; -blt::gp::prog_config_t config = blt::gp::prog_config_t().set_elite_count(1); +blt::gp::prog_config_t config = blt::gp::prog_config_t().set_elite_count(2); blt::gp::type_provider type_system; blt::gp::gp_program program(type_system, blt::gp::random_t{SEED}, config); // NOLINT @@ -72,6 +70,17 @@ void print_best() //BLT_TRACE(small); } +constexpr auto fitness_function = [](blt::gp::tree_t& current_tree, blt::gp::fitness_t& fitness, blt::size_t index) { + /*auto size = current_tree.get_values().size(); + BLT_DEBUG("(depth: %ld) (blocks: %ld) (size: t: %ld m: %ld u: %ld r: %ld) filled: %f%%", + current_tree.get_depth(program), size.blocks, size.total_size_bytes, size.total_no_meta_bytes, size.total_used_bytes, + size.total_remaining_bytes, static_cast(size.total_used_bytes) / static_cast(size.total_no_meta_bytes));*/ + result_container[index] = current_tree.get_evaluation_value(nullptr); + fitness.raw_fitness = result_container[index] / 1000000000.0; + fitness.standardized_fitness = fitness.raw_fitness; + fitness.adjusted_fitness = 1.0 - (1.0 / (1.0 + fitness.raw_fitness)); +}; + /** * This is a test using multiple types with blt::gp */ @@ -100,29 +109,16 @@ int main() program.set_operations(builder.build()); - program.generate_population(type_system.get_type().id()); + program.generate_population(type_system.get_type().id(), fitness_function); while (!program.should_terminate()) { - program.evaluate_fitness([](blt::gp::tree_t& current_tree, decltype(result_container)& container, blt::size_t index) { - /*auto size = current_tree.get_values().size(); - BLT_DEBUG("(depth: %ld) (blocks: %ld) (size: t: %ld m: %ld u: %ld r: %ld) filled: %f%%", - current_tree.get_depth(program), size.blocks, size.total_size_bytes, size.total_no_meta_bytes, size.total_used_bytes, - size.total_remaining_bytes, static_cast(size.total_used_bytes) / static_cast(size.total_no_meta_bytes));*/ - container[index] = current_tree.get_evaluation_value(nullptr); - return container[index] / 1000000000.0; - }, result_container); print_best(); - program.create_next_generation(blt::gp::select_tournament_t{}, blt::gp::select_tournament_t{}, - blt::gp::select_tournament_t{}); + program.create_next_generation(blt::gp::select_tournament_t{}, blt::gp::select_tournament_t{}, blt::gp::select_tournament_t{}); program.next_generation(); + program.evaluate_fitness(); } - program.evaluate_fitness([](blt::gp::tree_t& current_tree, decltype(result_container)& container, blt::size_t index) { - container[index] = current_tree.get_evaluation_value(nullptr); - return container[index] / 1000000000.0; - }, result_container); - print_best(); return 0; diff --git a/include/blt/gp/operations.h b/include/blt/gp/operations.h index ddd7c53..f3602f3 100644 --- a/include/blt/gp/operations.h +++ b/include/blt/gp/operations.h @@ -168,11 +168,6 @@ namespace blt::gp }; } - [[nodiscard]] inline constexpr blt::size_t get_argc() const - { - return sizeof...(Args); - } - [[nodiscard]] inline constexpr std::optional get_name() const { return name; diff --git a/include/blt/gp/program.h b/include/blt/gp/program.h index 88550d7..a7cebdd 100644 --- a/include/blt/gp/program.h +++ b/include/blt/gp/program.h @@ -104,9 +104,6 @@ namespace blt::gp auto return_type_id = system.get_type().id(); auto operator_id = blt::gp::operator_id(storage.operators.size()); - auto& operator_list = op.get_argc() == 0 ? storage.terminals : storage.non_terminals; - operator_list[return_type_id].push_back(operator_id); - operator_info info; if constexpr (sizeof...(Args) > 0) @@ -119,6 +116,9 @@ namespace blt::gp ((std::is_same_v, Context> ? info.argc.argc -= 1 : (blt::size_t) nullptr), ...); + auto& operator_list = info.argc.argc == 0 ? storage.terminals : storage.non_terminals; + operator_list[return_type_id].push_back(operator_id); + BLT_ASSERT(info.argc.argc_context - info.argc.argc <= 1 && "Cannot pass multiple context as arguments!"); info.function = op.template make_callable(); @@ -246,15 +246,9 @@ namespace blt::gp system(system), engine(engine), config(config) {} - void generate_population(type_id root_type) - { - current_pop = config.pop_initializer.get().generate( - {*this, root_type, config.population_size, config.initial_min_tree_size, config.initial_max_tree_size}); - } - - template)> + template)> void create_next_generation(Crossover&& crossover_selection, Mutation&& mutation_selection, Reproduction&& reproduction_selection, - Creation_Func& func = default_next_pop_creator) + CreationFunc& func = default_next_pop_creator) { // should already be empty next_pop.clear(); @@ -266,65 +260,30 @@ namespace blt::gp std::forward(reproduction_selection)); } + void evaluate_fitness() + { + evaluate_fitness_func(); + } + /** - * takes in a lambda for the fitness evaluation function (must return a value convertable to double) - * The lambda must accept a tree for evaluation, container for evaluation context, and a index into that container (current tree) + * takes in a reference to a function for the fitness evaluation function (must return a value convertable to double) + * The lambda must accept a tree for evaluation, and an index (current tree) * - * tree_t&, Container&, blt::size_t + * tree_t& current_tree, blt::size_t index_of_tree * * Container must be concurrently accessible from multiple threads using operator[] * - * NOTE: 0 is considered the best, in terms of standardized and adjusted fitness + * NOTE: 0 is considered the best, in terms of standardized fitness */ - template - void evaluate_fitness(Callable&& fitness_function, Container& result_storage, bool larger_better = true) + template + void generate_population(type_id root_type, FitnessFunc& fitness_function) { - for (const auto& ind : blt::enumerate(current_pop.get_individuals())) - ind.second.raw_fitness = static_cast(fitness_function(ind.second.tree, result_storage, ind.first)); - double min = 0; - double max = 0; - for (auto& ind : current_pop.get_individuals()) - { - if (ind.raw_fitness < min) - min = ind.raw_fitness; - if (ind.raw_fitness > max) - max = ind.raw_fitness; - } - - double overall_fitness = 0; - double best_fitness = 2; - double worst_fitness = 0; - individual* best = nullptr; - individual* worst = nullptr; - - auto diff = -min; - for (auto& ind : current_pop.get_individuals()) - { - // make standardized fitness [0, +inf) - ind.standardized_fitness = ind.raw_fitness + diff; - //BLT_WARN(ind.standardized_fitness); - if (larger_better) - ind.standardized_fitness = (max + diff) - ind.standardized_fitness; - //BLT_WARN(ind.standardized_fitness); - //ind.adjusted_fitness = (1.0 / (1.0 + ind.standardized_fitness)); - - if (ind.standardized_fitness > worst_fitness) - { - worst_fitness = ind.standardized_fitness; - worst = &ind; - } - - if (ind.standardized_fitness < best_fitness) - { - best_fitness = ind.standardized_fitness; - best = &ind; - } - - overall_fitness += ind.standardized_fitness / static_cast(config.population_size); - } - - current_stats = {overall_fitness, overall_fitness, best_fitness, worst_fitness, best, - worst}; + current_pop = config.pop_initializer.get().generate( + {*this, root_type, config.population_size, config.initial_min_tree_size, config.initial_max_tree_size}); + evaluate_fitness_func = [this, &fitness_function]() { + evaluate_fitness_internal(fitness_function); + }; + evaluate_fitness_func(); } void next_generation() @@ -347,10 +306,10 @@ namespace blt::gp values.reserve(current_pop.get_individuals().size()); for (const auto& ind : blt::enumerate(current_pop.get_individuals())) - values.emplace_back(ind.first, ind.second.standardized_fitness); + values.emplace_back(ind.first, ind.second.fitness.adjusted_fitness); std::sort(values.begin(), values.end(), [](const auto& a, const auto& b) { - return a.second < b.second; + return a.second > b.second; }); for (blt::size_t i = 0; i < size; i++) @@ -360,9 +319,23 @@ namespace blt::gp } template - std::array, size> get_best() + auto get_best_trees() { - return convert_array(get_best_indexes(), std::make_integer_sequence()); + return convert_array, size>>(get_best_indexes(), + [this](auto&& arr, blt::size_t index) -> tree_t& { + return current_pop.get_individuals()[arr[index]].tree; + }, + std::make_integer_sequence()); + } + + template + auto get_best_individuals() + { + return convert_array, size>>(get_best_indexes(), + [this](auto&& arr, blt::size_t index) -> individual& { + return current_pop.get_individuals()[arr[index]]; + }, + std::make_integer_sequence()); } [[nodiscard]] bool should_terminate() const @@ -452,16 +425,87 @@ namespace blt::gp random_t engine; prog_config_t config; + // for convenience, shouldn't decrease performance too much + std::function evaluate_fitness_func; + inline selector_args get_selector_args() { return {*this, next_pop, current_pop, current_stats, config, engine}; } - template - inline std::array, size> convert_array(std::array&& arr, - std::integer_sequence) + template + inline Return convert_array(std::array&& arr, Accessor&& accessor, + std::integer_sequence) { - return {current_pop.get_individuals()[arr[indexes]].tree...}; + return Return{accessor(arr, indexes)...}; + } + + template + void evaluate_fitness_internal(Callable&& fitness_function) + { + current_stats = {}; + for (const auto& ind : blt::enumerate(current_pop.get_individuals())) + { + fitness_function(ind.second.tree, ind.second.fitness, ind.first); + if (ind.second.fitness.adjusted_fitness > current_stats.best_fitness) + { + current_stats.best_fitness = ind.second.fitness.adjusted_fitness; + current_stats.best_individual = &ind.second; + } + + if (ind.second.fitness.adjusted_fitness < current_stats.worst_fitness) + { + current_stats.worst_fitness = ind.second.fitness.adjusted_fitness; + current_stats.worst_individual = &ind.second; + } + + current_stats.overall_fitness += ind.second.fitness.adjusted_fitness; + } + current_stats.average_fitness /= static_cast(config.population_size); +// double min = 0; +// double max = 0; +// for (auto& ind : current_pop.get_individuals()) +// { +// if (ind.raw_fitness < min) +// min = ind.raw_fitness; +// if (ind.raw_fitness > max) +// max = ind.raw_fitness; +// } +// +// double overall_fitness = 0; +// double best_fitness = 2; +// double worst_fitness = 0; +// individual* best = nullptr; +// individual* worst = nullptr; +// +// auto diff = -min; +// for (auto& ind : current_pop.get_individuals()) +// { +// // make standardized fitness [0, +inf) +// ind.standardized_fitness = ind.raw_fitness + diff; +// //BLT_WARN(ind.standardized_fitness); +// if (larger_better) +// ind.standardized_fitness = (max + diff) - ind.standardized_fitness; +// //BLT_WARN(ind.standardized_fitness); +// //ind.adjusted_fitness = (1.0 / (1.0 + ind.standardized_fitness)); +// +// if (ind.standardized_fitness > worst_fitness) +// { +// worst_fitness = ind.standardized_fitness; +// worst = &ind; +// } +// +// if (ind.standardized_fitness < best_fitness) +// { +// best_fitness = ind.standardized_fitness; +// best = &ind; +// } +// +// overall_fitness += ind.standardized_fitness / static_cast(config.population_size); +// } +// +// current_stats = {overall_fitness, overall_fitness, best_fitness, worst_fitness, best, +// worst}; } }; diff --git a/include/blt/gp/selection.h b/include/blt/gp/selection.h index df3d91b..46adcfc 100644 --- a/include/blt/gp/selection.h +++ b/include/blt/gp/selection.h @@ -52,14 +52,14 @@ namespace blt::gp std::vector> values; for (blt::size_t i = 0; i < config.elites; i++) - values.emplace_back(i, current_pop.get_individuals()[i].standardized_fitness); + values.emplace_back(i, current_pop.get_individuals()[i].fitness.adjusted_fitness); for (const auto& ind : blt::enumerate(current_pop.get_individuals())) { for (blt::size_t i = 0; i < config.elites; i++) { // BLT_INFO("%lf < %lf? // %lf", ind.second.standardized_fitness, values[i].second, ind.second.raw_fitness); - if (ind.second.standardized_fitness < values[i].second) + if (ind.second.fitness.adjusted_fitness > values[i].second) { bool doesnt_contain = true; for (blt::size_t j = 0; j < config.elites; j++) @@ -68,7 +68,7 @@ namespace blt::gp doesnt_contain = false; } if (doesnt_contain) - values[i] = {ind.first, ind.second.standardized_fitness}; + values[i] = {ind.first, ind.second.fitness.adjusted_fitness}; break; } } @@ -185,6 +185,8 @@ namespace blt::gp void pre_process(gp_program& program, population_t& pop, population_stats& stats) final; tree_t& select(gp_program& program, population_t& pop, population_stats& stats) final; + private: + std::vector probabilities; }; } diff --git a/include/blt/gp/tree.h b/include/blt/gp/tree.h index f737aaa..95730f6 100644 --- a/include/blt/gp/tree.h +++ b/include/blt/gp/tree.h @@ -111,13 +111,18 @@ namespace blt::gp blt::gp::stack_allocator values; }; + struct fitness_t + { + double raw_fitness = 0; + double standardized_fitness = 0; + double adjusted_fitness = 0; + blt::i64 hits = 0; + }; + struct individual { tree_t tree; - double raw_fitness = 0; - double standardized_fitness = 0; - //double adjusted_fitness = 0; - double probability = 0; + fitness_t fitness; individual() = default; @@ -140,8 +145,8 @@ namespace blt::gp { double overall_fitness = 0; double average_fitness = 0; - double best_fitness = 1; - double worst_fitness = 0; + double best_fitness = 0; + double worst_fitness = 1; // these will never be null unless your pop is not initialized / fitness eval was not called! individual* best_individual = nullptr; individual* worst_individual = nullptr; diff --git a/lib/blt b/lib/blt index 4de3aeb..456eeb1 160000 --- a/lib/blt +++ b/lib/blt @@ -1 +1 @@ -Subproject commit 4de3aeb87c78a13f8e493e48477220084610c076 +Subproject commit 456eeb12ac416a4ac4b5e72213f5a93fa576607c diff --git a/src/selection.cpp b/src/selection.cpp index 7a159ae..9ac667c 100644 --- a/src/selection.cpp +++ b/src/selection.cpp @@ -24,13 +24,13 @@ namespace blt::gp tree_t& select_best_t::select(gp_program&, population_t& pop, population_stats&) { auto& first = pop.get_individuals()[0]; - double best_fitness = first.standardized_fitness; + double best_fitness = first.fitness.adjusted_fitness; tree_t* tree = &first.tree; for (auto& ind : pop.get_individuals()) { - if (ind.standardized_fitness < best_fitness) + if (ind.fitness.adjusted_fitness > best_fitness) { - best_fitness = ind.standardized_fitness; + best_fitness = ind.fitness.adjusted_fitness; tree = &ind.tree; } } @@ -40,13 +40,13 @@ namespace blt::gp tree_t& select_worst_t::select(gp_program&, population_t& pop, population_stats&) { auto& first = pop.get_individuals()[0]; - double worst_fitness = first.standardized_fitness; + double worst_fitness = first.fitness.adjusted_fitness; tree_t* tree = &first.tree; for (auto& ind : pop.get_individuals()) { - if (ind.standardized_fitness > worst_fitness) + if (ind.fitness.adjusted_fitness < worst_fitness) { - worst_fitness = ind.standardized_fitness; + worst_fitness = ind.fitness.adjusted_fitness; tree = &ind.tree; } } @@ -63,13 +63,13 @@ namespace blt::gp auto& first = pop.get_individuals()[program.get_random().get_size_t(0ul, pop.get_individuals().size())]; individual* ind = &first; - double best_guy = first.standardized_fitness; + double best_guy = first.fitness.adjusted_fitness; for (blt::size_t i = 0; i < selection_size - 1; i++) { auto& sel = pop.get_individuals()[program.get_random().get_size_t(0ul, pop.get_individuals().size())]; - if (sel.standardized_fitness < best_guy) + if (sel.fitness.adjusted_fitness > best_guy) { - best_guy = sel.standardized_fitness; + best_guy = sel.fitness.adjusted_fitness; ind = &sel; } } @@ -82,9 +82,9 @@ namespace blt::gp auto choice = program.get_random().get_double(); for (const auto& ind : blt::enumerate(pop)) { - if (ind.first == pop.get_individuals().size()-1) + if (ind.first == 0) return ind.second.tree; - if (choice > ind.second.probability && pop.get_individuals()[ind.first+1].probability < choice) + if (choice >= probabilities[ind.first] && choice >= probabilities[ind.first - 1]) return ind.second.tree; } BLT_WARN("Unable to find individual with fitness proportionate. This should not be a possible code path!"); @@ -94,11 +94,13 @@ namespace blt::gp void select_fitness_proportionate_t::pre_process(gp_program&, population_t& pop, population_stats& stats) { + probabilities.clear(); double sum_of_prob = 0; for (auto& ind : pop) { - ind.probability = sum_of_prob + (ind.standardized_fitness / stats.overall_fitness); - sum_of_prob += ind.probability; + auto prob = (ind.fitness.adjusted_fitness / stats.overall_fitness); + probabilities.push_back(sum_of_prob + prob); + sum_of_prob += prob; } } } \ No newline at end of file diff --git a/src/tree.cpp b/src/tree.cpp index 5eae548..035ddc2 100644 --- a/src/tree.cpp +++ b/src/tree.cpp @@ -107,7 +107,10 @@ namespace blt::gp if (print_literals) { create_indent(out, indent, pretty_print); - program.get_print_func(v.id)(out, reversed); + if (program.is_static(v.id)) + program.get_print_func(v.id)(out, reversed); + else + out << name; out << return_type << end_indent(pretty_print); } else create_indent(out, indent, pretty_print) << name << return_type << end_indent(pretty_print);