rework fitness

thread
Brett 2024-07-11 04:11:24 -04:00
parent ef46bc3189
commit d6ebcd935c
10 changed files with 298 additions and 120 deletions

View File

@ -1,5 +1,5 @@
cmake_minimum_required(VERSION 3.25)
project(blt-gp VERSION 0.0.58)
project(blt-gp VERSION 0.0.59)
include(CTest)
@ -79,5 +79,6 @@ if (${BUILD_EXAMPLES})
blt_add_example(blt-gp5 examples/gp_test_5.cpp)
blt_add_example(blt-gp6 examples/gp_test_6.cpp)
blt_add_example(blt-gp7 examples/gp_test_7.cpp)
blt_add_example(blt-symbolic-regression examples/gp_symbolic_regression_example.cpp)
endif ()

View File

@ -0,0 +1,130 @@
/*
* <Short Description>
* Copyright (C) 2024 Brett Terpstra
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <https://www.gnu.org/licenses/>.
*/
#include <blt/gp/program.h>
#include <blt/gp/tree.h>
#include <blt/std/logging.h>
#include <iostream>
static constexpr long SEED = 41912;
struct context
{
float x, y;
};
std::array<context, 200> fitness_cases;
blt::gp::prog_config_t config = blt::gp::prog_config_t()
.set_initial_min_tree_size(2)
.set_initial_max_tree_size(6)
.set_elite_count(0)
.set_max_generations(50)
.set_pop_size(500);
blt::gp::type_provider type_system;
blt::gp::gp_program program(type_system, blt::gp::random_t{SEED}, config); // NOLINT
blt::gp::operation_t add([](float a, float b) { return a + b; }, "add");
blt::gp::operation_t sub([](float a, float b) { return a - b; }, "sub");
blt::gp::operation_t mul([](float a, float b) { return a * b; }, "mul");
blt::gp::operation_t pro_div([](float a, float b) { return b == 0.0f ? 1.0f : a / b; }, "div");
blt::gp::operation_t op_sin([](float a) { return std::sin(a); }, "sin");
blt::gp::operation_t op_cos([](float a) { return std::cos(a); }, "cos");
blt::gp::operation_t op_exp([](float a) { return std::exp(a); }, "exp");
blt::gp::operation_t op_log([](float a) { return a == 0.0f ? 0.0f : std::log(a); }, "log");
blt::gp::operation_t lit([]() {
return program.get_random().get_float(-320.0f, 320.0f);
}, "lit");
blt::gp::operation_t op_x([](const context& context) {
return context.x;
}, "x");
constexpr auto fitness_function = [](blt::gp::tree_t& current_tree, blt::gp::fitness_t& fitness, blt::size_t index) {
constexpr double value_cutoff = 1.e15;
for (auto& fitness_case : fitness_cases)
{
auto diff = std::abs(fitness_case.y - current_tree.get_evaluation_value<float>(&fitness_case));
if (diff < value_cutoff)
{
fitness.raw_fitness += diff;
if (diff < 0.01)
fitness.hits++;
} else
fitness.raw_fitness += value_cutoff;
}
fitness.standardized_fitness = fitness.raw_fitness;
fitness.adjusted_fitness = 1.0 / (1.0 + fitness.standardized_fitness);
//BLT_TRACE("fitness: %lf raw: %lf", fitness.adjusted_fitness, fitness.raw_fitness);
};
float example_function(float x)
{
return x * x * x * x + x * x * x + x * x + x;
}
int main()
{
for (auto& fitness_case : fitness_cases)
{
constexpr float range = 10;
constexpr float half_range = range / 2.0;
auto x = program.get_random().get_float(-half_range, half_range);
auto y = example_function(x);
fitness_case = {x, y};
}
type_system.register_type<float>();
blt::gp::operator_builder<context> builder{type_system};
builder.add_operator(add);
builder.add_operator(sub);
builder.add_operator(mul);
builder.add_operator(pro_div);
builder.add_operator(op_sin);
builder.add_operator(op_cos);
builder.add_operator(op_exp);
builder.add_operator(op_log);
builder.add_operator(lit, true);
builder.add_operator(op_x);
program.set_operations(builder.build());
program.generate_population(type_system.get_type<float>().id(), fitness_function);
while (!program.should_terminate())
{
program.create_next_generation(blt::gp::select_fitness_proportionate_t{}, blt::gp::select_fitness_proportionate_t{}, blt::gp::select_fitness_proportionate_t{});
program.next_generation();
program.evaluate_fitness();
}
auto best = program.get_best_individuals<3>();
BLT_INFO("Best approximations:");
for (auto& i_ref : best)
{
auto& i = i_ref.get();
BLT_DEBUG("Fitness: %lf, stand: %lf, raw: %lf", i.fitness.adjusted_fitness, i.fitness.standardized_fitness, i.fitness.raw_fitness);
i.tree.print(program, std::cout);
std::cout << "\n";
}
return 0;
}

View File

@ -16,14 +16,12 @@
* along with this program. If not, see <https://www.gnu.org/licenses/>.
*/
#include <blt/gp/program.h>
#include <blt/gp/generators.h>
#include <blt/gp/tree.h>
#include <blt/std/logging.h>
#include <blt/gp/transformers.h>
static constexpr long SEED = 41912;
blt::gp::prog_config_t config = blt::gp::prog_config_t().set_elite_count(1);
blt::gp::prog_config_t config = blt::gp::prog_config_t().set_elite_count(2);
blt::gp::type_provider type_system;
blt::gp::gp_program program(type_system, blt::gp::random_t{SEED}, config); // NOLINT
@ -72,6 +70,17 @@ void print_best()
//BLT_TRACE(small);
}
constexpr auto fitness_function = [](blt::gp::tree_t& current_tree, blt::gp::fitness_t& fitness, blt::size_t index) {
/*auto size = current_tree.get_values().size();
BLT_DEBUG("(depth: %ld) (blocks: %ld) (size: t: %ld m: %ld u: %ld r: %ld) filled: %f%%",
current_tree.get_depth(program), size.blocks, size.total_size_bytes, size.total_no_meta_bytes, size.total_used_bytes,
size.total_remaining_bytes, static_cast<double>(size.total_used_bytes) / static_cast<double>(size.total_no_meta_bytes));*/
result_container[index] = current_tree.get_evaluation_value<float>(nullptr);
fitness.raw_fitness = result_container[index] / 1000000000.0;
fitness.standardized_fitness = fitness.raw_fitness;
fitness.adjusted_fitness = 1.0 - (1.0 / (1.0 + fitness.raw_fitness));
};
/**
* This is a test using multiple types with blt::gp
*/
@ -100,29 +109,16 @@ int main()
program.set_operations(builder.build());
program.generate_population(type_system.get_type<float>().id());
program.generate_population(type_system.get_type<float>().id(), fitness_function);
while (!program.should_terminate())
{
program.evaluate_fitness([](blt::gp::tree_t& current_tree, decltype(result_container)& container, blt::size_t index) {
/*auto size = current_tree.get_values().size();
BLT_DEBUG("(depth: %ld) (blocks: %ld) (size: t: %ld m: %ld u: %ld r: %ld) filled: %f%%",
current_tree.get_depth(program), size.blocks, size.total_size_bytes, size.total_no_meta_bytes, size.total_used_bytes,
size.total_remaining_bytes, static_cast<double>(size.total_used_bytes) / static_cast<double>(size.total_no_meta_bytes));*/
container[index] = current_tree.get_evaluation_value<float>(nullptr);
return container[index] / 1000000000.0;
}, result_container);
print_best();
program.create_next_generation(blt::gp::select_tournament_t{}, blt::gp::select_tournament_t{},
blt::gp::select_tournament_t{});
program.create_next_generation(blt::gp::select_tournament_t{}, blt::gp::select_tournament_t{}, blt::gp::select_tournament_t{});
program.next_generation();
program.evaluate_fitness();
}
program.evaluate_fitness([](blt::gp::tree_t& current_tree, decltype(result_container)& container, blt::size_t index) {
container[index] = current_tree.get_evaluation_value<float>(nullptr);
return container[index] / 1000000000.0;
}, result_container);
print_best();
return 0;

View File

@ -168,11 +168,6 @@ namespace blt::gp
};
}
[[nodiscard]] inline constexpr blt::size_t get_argc() const
{
return sizeof...(Args);
}
[[nodiscard]] inline constexpr std::optional<std::string_view> get_name() const
{
return name;

View File

@ -104,9 +104,6 @@ namespace blt::gp
auto return_type_id = system.get_type<Return>().id();
auto operator_id = blt::gp::operator_id(storage.operators.size());
auto& operator_list = op.get_argc() == 0 ? storage.terminals : storage.non_terminals;
operator_list[return_type_id].push_back(operator_id);
operator_info info;
if constexpr (sizeof...(Args) > 0)
@ -119,6 +116,9 @@ namespace blt::gp
((std::is_same_v<detail::remove_cv_ref<Args>, Context> ? info.argc.argc -= 1 : (blt::size_t) nullptr), ...);
auto& operator_list = info.argc.argc == 0 ? storage.terminals : storage.non_terminals;
operator_list[return_type_id].push_back(operator_id);
BLT_ASSERT(info.argc.argc_context - info.argc.argc <= 1 && "Cannot pass multiple context as arguments!");
info.function = op.template make_callable<Context>();
@ -246,15 +246,9 @@ namespace blt::gp
system(system), engine(engine), config(config)
{}
void generate_population(type_id root_type)
{
current_pop = config.pop_initializer.get().generate(
{*this, root_type, config.population_size, config.initial_min_tree_size, config.initial_max_tree_size});
}
template<typename Crossover, typename Mutation, typename Reproduction, typename Creation_Func = decltype(default_next_pop_creator<Crossover, Mutation, Reproduction>)>
template<typename Crossover, typename Mutation, typename Reproduction, typename CreationFunc = decltype(default_next_pop_creator<Crossover, Mutation, Reproduction>)>
void create_next_generation(Crossover&& crossover_selection, Mutation&& mutation_selection, Reproduction&& reproduction_selection,
Creation_Func& func = default_next_pop_creator<Crossover, Mutation, Reproduction>)
CreationFunc& func = default_next_pop_creator<Crossover, Mutation, Reproduction>)
{
// should already be empty
next_pop.clear();
@ -266,65 +260,30 @@ namespace blt::gp
std::forward<Reproduction>(reproduction_selection));
}
void evaluate_fitness()
{
evaluate_fitness_func();
}
/**
* takes in a lambda for the fitness evaluation function (must return a value convertable to double)
* The lambda must accept a tree for evaluation, container for evaluation context, and a index into that container (current tree)
* takes in a reference to a function for the fitness evaluation function (must return a value convertable to double)
* The lambda must accept a tree for evaluation, and an index (current tree)
*
* tree_t&, Container&, blt::size_t
* tree_t& current_tree, blt::size_t index_of_tree
*
* Container must be concurrently accessible from multiple threads using operator[]
*
* NOTE: 0 is considered the best, in terms of standardized and adjusted fitness
* NOTE: 0 is considered the best, in terms of standardized fitness
*/
template<typename Container, typename Callable>
void evaluate_fitness(Callable&& fitness_function, Container& result_storage, bool larger_better = true)
template<typename FitnessFunc>
void generate_population(type_id root_type, FitnessFunc& fitness_function)
{
for (const auto& ind : blt::enumerate(current_pop.get_individuals()))
ind.second.raw_fitness = static_cast<double>(fitness_function(ind.second.tree, result_storage, ind.first));
double min = 0;
double max = 0;
for (auto& ind : current_pop.get_individuals())
{
if (ind.raw_fitness < min)
min = ind.raw_fitness;
if (ind.raw_fitness > max)
max = ind.raw_fitness;
}
double overall_fitness = 0;
double best_fitness = 2;
double worst_fitness = 0;
individual* best = nullptr;
individual* worst = nullptr;
auto diff = -min;
for (auto& ind : current_pop.get_individuals())
{
// make standardized fitness [0, +inf)
ind.standardized_fitness = ind.raw_fitness + diff;
//BLT_WARN(ind.standardized_fitness);
if (larger_better)
ind.standardized_fitness = (max + diff) - ind.standardized_fitness;
//BLT_WARN(ind.standardized_fitness);
//ind.adjusted_fitness = (1.0 / (1.0 + ind.standardized_fitness));
if (ind.standardized_fitness > worst_fitness)
{
worst_fitness = ind.standardized_fitness;
worst = &ind;
}
if (ind.standardized_fitness < best_fitness)
{
best_fitness = ind.standardized_fitness;
best = &ind;
}
overall_fitness += ind.standardized_fitness / static_cast<double>(config.population_size);
}
current_stats = {overall_fitness, overall_fitness, best_fitness, worst_fitness, best,
worst};
current_pop = config.pop_initializer.get().generate(
{*this, root_type, config.population_size, config.initial_min_tree_size, config.initial_max_tree_size});
evaluate_fitness_func = [this, &fitness_function]() {
evaluate_fitness_internal(fitness_function);
};
evaluate_fitness_func();
}
void next_generation()
@ -347,10 +306,10 @@ namespace blt::gp
values.reserve(current_pop.get_individuals().size());
for (const auto& ind : blt::enumerate(current_pop.get_individuals()))
values.emplace_back(ind.first, ind.second.standardized_fitness);
values.emplace_back(ind.first, ind.second.fitness.adjusted_fitness);
std::sort(values.begin(), values.end(), [](const auto& a, const auto& b) {
return a.second < b.second;
return a.second > b.second;
});
for (blt::size_t i = 0; i < size; i++)
@ -360,9 +319,23 @@ namespace blt::gp
}
template<blt::size_t size>
std::array<std::reference_wrapper<tree_t>, size> get_best()
auto get_best_trees()
{
return convert_array(get_best_indexes<size>(), std::make_integer_sequence<blt::size_t, size>());
return convert_array<std::array<std::reference_wrapper<tree_t>, size>>(get_best_indexes<size>(),
[this](auto&& arr, blt::size_t index) -> tree_t& {
return current_pop.get_individuals()[arr[index]].tree;
},
std::make_integer_sequence<blt::size_t, size>());
}
template<blt::size_t size>
auto get_best_individuals()
{
return convert_array<std::array<std::reference_wrapper<individual>, size>>(get_best_indexes<size>(),
[this](auto&& arr, blt::size_t index) -> individual& {
return current_pop.get_individuals()[arr[index]];
},
std::make_integer_sequence<blt::size_t, size>());
}
[[nodiscard]] bool should_terminate() const
@ -452,16 +425,87 @@ namespace blt::gp
random_t engine;
prog_config_t config;
// for convenience, shouldn't decrease performance too much
std::function<void()> evaluate_fitness_func;
inline selector_args get_selector_args()
{
return {*this, next_pop, current_pop, current_stats, config, engine};
}
template<blt::size_t size, blt::size_t... indexes>
inline std::array<std::reference_wrapper<tree_t>, size> convert_array(std::array<blt::size_t, size>&& arr,
std::integer_sequence<blt::size_t, indexes...>)
template<typename Return, blt::size_t size, typename Accessor, blt::size_t... indexes>
inline Return convert_array(std::array<blt::size_t, size>&& arr, Accessor&& accessor,
std::integer_sequence<blt::size_t, indexes...>)
{
return {current_pop.get_individuals()[arr[indexes]].tree...};
return Return{accessor(arr, indexes)...};
}
template<typename Callable>
void evaluate_fitness_internal(Callable&& fitness_function)
{
current_stats = {};
for (const auto& ind : blt::enumerate(current_pop.get_individuals()))
{
fitness_function(ind.second.tree, ind.second.fitness, ind.first);
if (ind.second.fitness.adjusted_fitness > current_stats.best_fitness)
{
current_stats.best_fitness = ind.second.fitness.adjusted_fitness;
current_stats.best_individual = &ind.second;
}
if (ind.second.fitness.adjusted_fitness < current_stats.worst_fitness)
{
current_stats.worst_fitness = ind.second.fitness.adjusted_fitness;
current_stats.worst_individual = &ind.second;
}
current_stats.overall_fitness += ind.second.fitness.adjusted_fitness;
}
current_stats.average_fitness /= static_cast<double>(config.population_size);
// double min = 0;
// double max = 0;
// for (auto& ind : current_pop.get_individuals())
// {
// if (ind.raw_fitness < min)
// min = ind.raw_fitness;
// if (ind.raw_fitness > max)
// max = ind.raw_fitness;
// }
//
// double overall_fitness = 0;
// double best_fitness = 2;
// double worst_fitness = 0;
// individual* best = nullptr;
// individual* worst = nullptr;
//
// auto diff = -min;
// for (auto& ind : current_pop.get_individuals())
// {
// // make standardized fitness [0, +inf)
// ind.standardized_fitness = ind.raw_fitness + diff;
// //BLT_WARN(ind.standardized_fitness);
// if (larger_better)
// ind.standardized_fitness = (max + diff) - ind.standardized_fitness;
// //BLT_WARN(ind.standardized_fitness);
// //ind.adjusted_fitness = (1.0 / (1.0 + ind.standardized_fitness));
//
// if (ind.standardized_fitness > worst_fitness)
// {
// worst_fitness = ind.standardized_fitness;
// worst = &ind;
// }
//
// if (ind.standardized_fitness < best_fitness)
// {
// best_fitness = ind.standardized_fitness;
// best = &ind;
// }
//
// overall_fitness += ind.standardized_fitness / static_cast<double>(config.population_size);
// }
//
// current_stats = {overall_fitness, overall_fitness, best_fitness, worst_fitness, best,
// worst};
}
};

View File

@ -52,14 +52,14 @@ namespace blt::gp
std::vector<std::pair<std::size_t, double>> values;
for (blt::size_t i = 0; i < config.elites; i++)
values.emplace_back(i, current_pop.get_individuals()[i].standardized_fitness);
values.emplace_back(i, current_pop.get_individuals()[i].fitness.adjusted_fitness);
for (const auto& ind : blt::enumerate(current_pop.get_individuals()))
{
for (blt::size_t i = 0; i < config.elites; i++)
{
// BLT_INFO("%lf < %lf? // %lf", ind.second.standardized_fitness, values[i].second, ind.second.raw_fitness);
if (ind.second.standardized_fitness < values[i].second)
if (ind.second.fitness.adjusted_fitness > values[i].second)
{
bool doesnt_contain = true;
for (blt::size_t j = 0; j < config.elites; j++)
@ -68,7 +68,7 @@ namespace blt::gp
doesnt_contain = false;
}
if (doesnt_contain)
values[i] = {ind.first, ind.second.standardized_fitness};
values[i] = {ind.first, ind.second.fitness.adjusted_fitness};
break;
}
}
@ -185,6 +185,8 @@ namespace blt::gp
void pre_process(gp_program& program, population_t& pop, population_stats& stats) final;
tree_t& select(gp_program& program, population_t& pop, population_stats& stats) final;
private:
std::vector<double> probabilities;
};
}

View File

@ -111,13 +111,18 @@ namespace blt::gp
blt::gp::stack_allocator values;
};
struct fitness_t
{
double raw_fitness = 0;
double standardized_fitness = 0;
double adjusted_fitness = 0;
blt::i64 hits = 0;
};
struct individual
{
tree_t tree;
double raw_fitness = 0;
double standardized_fitness = 0;
//double adjusted_fitness = 0;
double probability = 0;
fitness_t fitness;
individual() = default;
@ -140,8 +145,8 @@ namespace blt::gp
{
double overall_fitness = 0;
double average_fitness = 0;
double best_fitness = 1;
double worst_fitness = 0;
double best_fitness = 0;
double worst_fitness = 1;
// these will never be null unless your pop is not initialized / fitness eval was not called!
individual* best_individual = nullptr;
individual* worst_individual = nullptr;

@ -1 +1 @@
Subproject commit 4de3aeb87c78a13f8e493e48477220084610c076
Subproject commit 456eeb12ac416a4ac4b5e72213f5a93fa576607c

View File

@ -24,13 +24,13 @@ namespace blt::gp
tree_t& select_best_t::select(gp_program&, population_t& pop, population_stats&)
{
auto& first = pop.get_individuals()[0];
double best_fitness = first.standardized_fitness;
double best_fitness = first.fitness.adjusted_fitness;
tree_t* tree = &first.tree;
for (auto& ind : pop.get_individuals())
{
if (ind.standardized_fitness < best_fitness)
if (ind.fitness.adjusted_fitness > best_fitness)
{
best_fitness = ind.standardized_fitness;
best_fitness = ind.fitness.adjusted_fitness;
tree = &ind.tree;
}
}
@ -40,13 +40,13 @@ namespace blt::gp
tree_t& select_worst_t::select(gp_program&, population_t& pop, population_stats&)
{
auto& first = pop.get_individuals()[0];
double worst_fitness = first.standardized_fitness;
double worst_fitness = first.fitness.adjusted_fitness;
tree_t* tree = &first.tree;
for (auto& ind : pop.get_individuals())
{
if (ind.standardized_fitness > worst_fitness)
if (ind.fitness.adjusted_fitness < worst_fitness)
{
worst_fitness = ind.standardized_fitness;
worst_fitness = ind.fitness.adjusted_fitness;
tree = &ind.tree;
}
}
@ -63,13 +63,13 @@ namespace blt::gp
auto& first = pop.get_individuals()[program.get_random().get_size_t(0ul, pop.get_individuals().size())];
individual* ind = &first;
double best_guy = first.standardized_fitness;
double best_guy = first.fitness.adjusted_fitness;
for (blt::size_t i = 0; i < selection_size - 1; i++)
{
auto& sel = pop.get_individuals()[program.get_random().get_size_t(0ul, pop.get_individuals().size())];
if (sel.standardized_fitness < best_guy)
if (sel.fitness.adjusted_fitness > best_guy)
{
best_guy = sel.standardized_fitness;
best_guy = sel.fitness.adjusted_fitness;
ind = &sel;
}
}
@ -82,9 +82,9 @@ namespace blt::gp
auto choice = program.get_random().get_double();
for (const auto& ind : blt::enumerate(pop))
{
if (ind.first == pop.get_individuals().size()-1)
if (ind.first == 0)
return ind.second.tree;
if (choice > ind.second.probability && pop.get_individuals()[ind.first+1].probability < choice)
if (choice >= probabilities[ind.first] && choice >= probabilities[ind.first - 1])
return ind.second.tree;
}
BLT_WARN("Unable to find individual with fitness proportionate. This should not be a possible code path!");
@ -94,11 +94,13 @@ namespace blt::gp
void select_fitness_proportionate_t::pre_process(gp_program&, population_t& pop, population_stats& stats)
{
probabilities.clear();
double sum_of_prob = 0;
for (auto& ind : pop)
{
ind.probability = sum_of_prob + (ind.standardized_fitness / stats.overall_fitness);
sum_of_prob += ind.probability;
auto prob = (ind.fitness.adjusted_fitness / stats.overall_fitness);
probabilities.push_back(sum_of_prob + prob);
sum_of_prob += prob;
}
}
}

View File

@ -107,7 +107,10 @@ namespace blt::gp
if (print_literals)
{
create_indent(out, indent, pretty_print);
program.get_print_func(v.id)(out, reversed);
if (program.is_static(v.id))
program.get_print_func(v.id)(out, reversed);
else
out << name;
out << return_type << end_indent(pretty_print);
} else
create_indent(out, indent, pretty_print) << name << return_type << end_indent(pretty_print);