everything slow!

thread
Brett 2024-07-11 21:14:23 -04:00
parent 63d6e89136
commit ee3dc8d766
14 changed files with 36427 additions and 93 deletions

View File

@ -1,5 +1,5 @@
cmake_minimum_required(VERSION 3.25)
project(blt-gp VERSION 0.0.60)
project(blt-gp VERSION 0.0.61)
include(CTest)

36207
callgrind.out.14232 Normal file

File diff suppressed because it is too large Load Diff

View File

@ -16,9 +16,11 @@
* along with this program. If not, see <https://www.gnu.org/licenses/>.
*/
#include <blt/gp/program.h>
#include <blt/profiling/profiler_v2.h>
#include <blt/gp/tree.h>
#include <blt/std/logging.h>
#include <iostream>
#include <thread>
static constexpr long SEED = 41912;
@ -34,10 +36,11 @@ blt::gp::prog_config_t config = blt::gp::prog_config_t()
.set_initial_max_tree_size(6)
.set_elite_count(0)
.set_max_generations(50)
.set_pop_size(500);
.set_pop_size(500)
.set_thread_count(0);
blt::gp::type_provider type_system;
blt::gp::gp_program program(type_system, blt::gp::random_t{SEED}, config); // NOLINT
blt::gp::gp_program program{type_system, SEED, config};
blt::gp::operation_t add([](float a, float b) { return a + b; }, "add");
blt::gp::operation_t sub([](float a, float b) { return a - b; }, "sub");
@ -55,7 +58,7 @@ blt::gp::operation_t op_x([](const context& context) {
return context.x;
}, "x");
constexpr auto fitness_function = [](blt::gp::tree_t& current_tree, blt::gp::fitness_t& fitness, blt::size_t index) {
constexpr auto fitness_function = [](blt::gp::tree_t& current_tree, blt::gp::fitness_t& fitness, blt::size_t) {
constexpr double value_cutoff = 1.e15;
for (auto& fitness_case : fitness_cases)
{
@ -80,6 +83,7 @@ float example_function(float x)
int main()
{
BLT_START_INTERVAL("Symbolic Regression", "Main");
for (auto& fitness_case : fitness_cases)
{
constexpr float range = 10;
@ -110,11 +114,17 @@ int main()
while (!program.should_terminate())
{
BLT_START_INTERVAL("Symbolic Regression", "Gen");
program.create_next_generation(blt::gp::select_tournament_t{}, blt::gp::select_tournament_t{}, blt::gp::select_tournament_t{});
BLT_END_INTERVAL("Symbolic Regression", "Gen");
BLT_START_INTERVAL("Symbolic Regression", "Fitness");
program.next_generation();
program.evaluate_fitness();
BLT_END_INTERVAL("Symbolic Regression", "Fitness");
}
BLT_END_INTERVAL("Symbolic Regression", "Main");
auto best = program.get_best_individuals<3>();
BLT_INFO("Best approximations:");
@ -125,8 +135,15 @@ int main()
i.tree.print(program, std::cout);
std::cout << "\n";
}
BLT_INFO("");
auto& stats = program.get_population_stats();
BLT_INFO("Stats:");
BLT_INFO("Average fitness: %lf", stats.average_fitness.load());
BLT_INFO("Best fitness: %lf", stats.best_fitness.load());
BLT_INFO("Worst fitness: %lf", stats.worst_fitness.load());
BLT_INFO("Overall fitness: %lf", stats.overall_fitness.load());
// TODO: make stats helper
BLT_PRINT_PROFILE("Symbolic Regression", blt::PRINT_CYCLES | blt::PRINT_THREAD | blt::PRINT_WALL);
return 0;
}

View File

@ -24,7 +24,7 @@
static constexpr long SEED = 41912;
blt::gp::type_provider type_system;
blt::gp::gp_program program(type_system, blt::gp::random_t{SEED}); // NOLINT
blt::gp::gp_program program(type_system, SEED); // NOLINT
blt::gp::operation_t add([](float a, float b) {
BLT_TRACE("a: %f + b: %f = %f", a, b, a + b);

View File

@ -23,7 +23,7 @@
static constexpr long SEED = 41912;
blt::gp::type_provider type_system;
blt::gp::gp_program program(type_system, blt::gp::random_t{SEED}); // NOLINT
blt::gp::gp_program program(type_system, SEED); // NOLINT
blt::gp::operation_t add([](float a, float b) { return a + b; });
blt::gp::operation_t sub([](float a, float b) { return a - b; });

View File

@ -23,7 +23,7 @@
static constexpr long SEED = 41912;
blt::gp::type_provider type_system;
blt::gp::gp_program program(type_system, blt::gp::random_t{SEED}); // NOLINT
blt::gp::gp_program program(type_system, SEED); // NOLINT
blt::gp::operation_t add([](float a, float b) { return a + b; });
blt::gp::operation_t sub([](float a, float b) { return a - b; });

View File

@ -44,7 +44,7 @@ static constexpr long SEED = 41912;
blt::gp::type_provider type_system;
blt::gp::gp_program program(type_system, blt::gp::random_t{SEED}); // NOLINT
blt::gp::gp_program program(type_system, SEED); // NOLINT
blt::gp::operation_t add([](float a, float b) { return a + b; }, "add"); // 0
blt::gp::operation_t sub([](float a, float b) { return a - b; }, "sub"); // 1

View File

@ -42,7 +42,7 @@ static constexpr long SEED = 41912;
blt::gp::type_provider type_system;
blt::gp::gp_program program(type_system, blt::gp::random_t{SEED}); // NOLINT
blt::gp::gp_program program(type_system, SEED); // NOLINT
blt::gp::operation_t add([](float a, float b) { return a + b; }, "add"); // 0
blt::gp::operation_t sub([](float a, float b) { return a - b; }, "sub"); // 1

View File

@ -24,7 +24,7 @@ static constexpr long SEED = 41912;
blt::gp::prog_config_t config = blt::gp::prog_config_t().set_elite_count(2);
blt::gp::type_provider type_system;
blt::gp::gp_program program(type_system, blt::gp::random_t{SEED}, config); // NOLINT
blt::gp::gp_program program(type_system, SEED, config); // NOLINT
std::array<float, 500> result_container;
blt::gp::operation_t add([](float a, float b) { return a + b; }, "add"); // 0
@ -59,7 +59,7 @@ void print_best()
auto& tree = v.tree;
auto size = tree.get_values().size();
BLT_TRACE("%lf [index %ld] (fitness: %lf, raw: %lf) (depth: %ld) (blocks: %ld) (size: t: %ld m: %ld u: %ld r: %ld) filled: %f%%",
tree.get_evaluation_value<float>(nullptr), i, v.standardized_fitness, v.raw_fitness,
tree.get_evaluation_value<float>(nullptr), i, v.fitness.adjusted_fitness, v.fitness.raw_fitness,
tree.get_depth(program), size.blocks, size.total_size_bytes, size.total_no_meta_bytes, size.total_used_bytes,
size.total_remaining_bytes,
static_cast<double>(size.total_used_bytes) / static_cast<double>(size.total_no_meta_bytes));

View File

@ -20,6 +20,7 @@
#define BLT_GP_CONFIG_H
#include <utility>
#include <thread>
#include <blt/std/types.h>
#include <blt/gp/generators.h>
#include <blt/gp/transformers.h>
@ -47,6 +48,10 @@ namespace blt::gp
std::reference_wrapper<crossover_t> crossover;
std::reference_wrapper<population_initializer_t> pop_initializer;
blt::size_t threads = std::thread::hardware_concurrency() - 1;
// number of elements each thread should pull per execution. this is for granularity performance and can be optimized for better results!
blt::size_t evaluation_size = 4;
// default config (ramped half-and-half init) or for buildering
prog_config_t();
@ -60,6 +65,7 @@ namespace blt::gp
prog_config_t& set_pop_size(blt::size_t pop)
{
population_size = pop;
//evaluation_size = (population_size / threads) / 2;
return *this;
}
@ -122,6 +128,19 @@ namespace blt::gp
try_mutation_on_crossover_failure = new_try_mutation_on_crossover_failure;
return *this;
}
prog_config_t& set_thread_count(blt::size_t t)
{
threads = t;
//evaluation_size = (population_size / threads) / 2;
return *this;
}
prog_config_t& set_evaluation_size(blt::size_t s)
{
evaluation_size = s;
return *this;
}
};
}

View File

@ -30,6 +30,9 @@
#include <algorithm>
#include <memory>
#include <array>
#include <thread>
#include <mutex>
#include <atomic>
#include <blt/std/ranges.h>
#include <blt/std/hashmap.h>
@ -235,16 +238,16 @@ namespace blt::gp
* call to one of the evaluator functions. This was the nicest way to provide this as C++ lacks reflection
*
* @param system type system to use in tree generation
* @param engine random engine to use throughout the program. TODO replace this with something better
* @param engine random engine to use throughout the program.
* @param context_size number of arguments which are always present as "context" to the GP system / operators
*/
explicit gp_program(type_provider& system, random_t engine):
system(system), engine(engine)
{}
explicit gp_program(type_provider& system, blt::u64 seed):
system(system), seed(seed)
{ create_threads(); }
explicit gp_program(type_provider& system, random_t engine, prog_config_t config):
system(system), engine(engine), config(config)
{}
explicit gp_program(type_provider& system, blt::u64 seed, prog_config_t config):
system(system), seed(seed), config(config)
{ create_threads(); }
template<typename Crossover, typename Mutation, typename Reproduction, typename CreationFunc = decltype(default_next_pop_creator<Crossover, Mutation, Reproduction>)>
void create_next_generation(Crossover&& crossover_selection, Mutation&& mutation_selection, Reproduction&& reproduction_selection,
@ -262,7 +265,7 @@ namespace blt::gp
void evaluate_fitness()
{
evaluate_fitness_func();
evaluate_fitness_internal();
}
/**
@ -280,10 +283,10 @@ namespace blt::gp
{
current_pop = config.pop_initializer.get().generate(
{*this, root_type, config.population_size, config.initial_min_tree_size, config.initial_max_tree_size});
evaluate_fitness_func = [this, &fitness_function]() {
evaluate_fitness_internal(fitness_function);
evaluate_fitness_func = [&fitness_function](tree_t& current_tree, fitness_t& fitness, blt::size_t index) {
fitness_function(current_tree, fitness, index);
};
evaluate_fitness_func();
evaluate_fitness_internal();
}
void next_generation()
@ -343,11 +346,13 @@ namespace blt::gp
return current_generation >= config.max_generations;
}
[[nodiscard]] inline random_t& get_random()
[[nodiscard]] bool should_thread_terminate() const
{
return engine;
return should_terminate() && thread_helper.lifetime_over;
}
[[nodiscard]] random_t& get_random() const;
[[nodiscard]] inline type_provider& get_typesystem()
{
return system;
@ -358,17 +363,17 @@ namespace blt::gp
// we wanted a terminal, but could not find one, so we will select from a function that has a terminal
if (storage.terminals[id].empty())
return select_non_terminal_too_deep(id);
return storage.terminals[id][engine.get_size_t(0, storage.terminals[id].size())];
return get_random().select(storage.terminals[id]);
}
inline operator_id select_non_terminal(type_id id)
{
return storage.non_terminals[id][engine.get_size_t(0, storage.non_terminals[id].size())];
return get_random().select(storage.non_terminals[id]);
}
inline operator_id select_non_terminal_too_deep(type_id id)
{
return storage.operators_ordered_terminals[id][engine.get_size_t(0, storage.operators_ordered_terminals[id].size())].first;
return get_random().select(storage.operators_ordered_terminals[id]).first;
}
inline operator_info& get_operator_info(operator_id id)
@ -408,29 +413,52 @@ namespace blt::gp
[[nodiscard]] inline auto get_current_generation() const
{
return current_generation;
return current_generation.load();
}
[[nodiscard]] inline auto& get_population_stats()
{
return current_stats;
}
~gp_program()
{
thread_helper.lifetime_over = true;
for (auto& thread : thread_helper.threads)
{
if (thread->joinable())
thread->join();
}
}
private:
type_provider& system;
blt::gp::stack_allocator alloc;
operator_storage storage;
population_t current_pop;
population_stats current_stats;
population_t next_pop;
blt::size_t current_generation = 0;
std::atomic_uint64_t current_generation = 0;
random_t engine;
blt::u64 seed;
prog_config_t config;
struct concurrency_storage
{
std::vector<std::unique_ptr<std::thread>> threads;
std::mutex evaluation_control;
std::atomic_uint64_t evaluation_left = 0;
std::atomic_uint64_t threads_left = 0;
std::atomic_bool lifetime_over = false;
} thread_helper;
// for convenience, shouldn't decrease performance too much
std::function<void()> evaluate_fitness_func;
std::function<void(tree_t&, fitness_t&, blt::size_t)> evaluate_fitness_func;
inline selector_args get_selector_args()
{
return {*this, next_pop, current_pop, current_stats, config, engine};
return {*this, next_pop, current_pop, current_stats, config, get_random()};
}
template<typename Return, blt::size_t size, typename Accessor, blt::size_t... indexes>
@ -440,10 +468,46 @@ namespace blt::gp
return Return{accessor(arr, indexes)...};
}
template<typename Callable>
void evaluate_fitness_internal(Callable&& fitness_function)
void create_threads();
void execute_thread();
void evaluate_fitness_internal()
{
current_stats = {};
current_stats.clear();
{
std::scoped_lock lock(thread_helper.evaluation_control);
thread_helper.evaluation_left = current_pop.get_individuals().size();
thread_helper.threads_left = config.threads + 1;
}
while (thread_helper.threads_left > 0)
execute_thread();
// for (auto& ind : current_pop.get_individuals())
// {
// if (ind.fitness.adjusted_fitness > current_stats.best_fitness)
// {
// current_stats.best_fitness = ind.fitness.adjusted_fitness;
// }
//
// if (ind.fitness.adjusted_fitness < current_stats.worst_fitness)
// {
// current_stats.worst_fitness = ind.fitness.adjusted_fitness;
// }
//
// current_stats.overall_fitness = current_stats.overall_fitness + ind.fitness.adjusted_fitness;
// }
current_stats.average_fitness = current_stats.overall_fitness / static_cast<double>(config.population_size);
//
// BLT_INFO("Stats:");
// BLT_INFO("Average fitness: %lf", current_stats.average_fitness.load());
// BLT_INFO("Best fitness: %lf", current_stats.best_fitness.load());
// BLT_INFO("Worst fitness: %lf", current_stats.worst_fitness.load());
// BLT_INFO("Overall fitness: %lf", current_stats.overall_fitness.load());
/*current_stats = {};
for (const auto& ind : blt::enumerate(current_pop.get_individuals()))
{
fitness_function(ind.second.tree, ind.second.fitness, ind.first);
@ -461,51 +525,7 @@ namespace blt::gp
current_stats.overall_fitness += ind.second.fitness.adjusted_fitness;
}
current_stats.average_fitness /= static_cast<double>(config.population_size);
// double min = 0;
// double max = 0;
// for (auto& ind : current_pop.get_individuals())
// {
// if (ind.raw_fitness < min)
// min = ind.raw_fitness;
// if (ind.raw_fitness > max)
// max = ind.raw_fitness;
// }
//
// double overall_fitness = 0;
// double best_fitness = 2;
// double worst_fitness = 0;
// individual* best = nullptr;
// individual* worst = nullptr;
//
// auto diff = -min;
// for (auto& ind : current_pop.get_individuals())
// {
// // make standardized fitness [0, +inf)
// ind.standardized_fitness = ind.raw_fitness + diff;
// //BLT_WARN(ind.standardized_fitness);
// if (larger_better)
// ind.standardized_fitness = (max + diff) - ind.standardized_fitness;
// //BLT_WARN(ind.standardized_fitness);
// //ind.adjusted_fitness = (1.0 / (1.0 + ind.standardized_fitness));
//
// if (ind.standardized_fitness > worst_fitness)
// {
// worst_fitness = ind.standardized_fitness;
// worst = &ind;
// }
//
// if (ind.standardized_fitness < best_fitness)
// {
// best_fitness = ind.standardized_fitness;
// best = &ind;
// }
//
// overall_fitness += ind.standardized_fitness / static_cast<double>(config.population_size);
// }
//
// current_stats = {overall_fitness, overall_fitness, best_fitness, worst_fitness, best,
// worst};
current_stats.average_fitness = current_stats.overall_fitness / static_cast<double>(config.population_size);*/
}
};

View File

@ -27,6 +27,7 @@
#include <utility>
#include <stack>
#include <ostream>
#include <atomic>
namespace blt::gp
{
@ -143,14 +144,20 @@ namespace blt::gp
struct population_stats
{
double overall_fitness = 0;
double average_fitness = 0;
double best_fitness = 0;
double worst_fitness = 1;
// these will never be null unless your pop is not initialized / fitness eval was not called!
individual* best_individual = nullptr;
individual* worst_individual = nullptr;
std::atomic<double> overall_fitness = 0;
std::atomic<double> average_fitness = 0;
std::atomic<double> best_fitness = 0;
std::atomic<double> worst_fitness = 1;
std::vector<double> normalized_fitness;
void clear()
{
overall_fitness = 0;
average_fitness = 0;
best_fitness = 0;
worst_fitness = 0;
normalized_fitness.clear();
}
};
class population_t

@ -1 +1 @@
Subproject commit 456eeb12ac416a4ac4b5e72213f5a93fa576607c
Subproject commit c5f3d9ba3b805d16c44cca020eeeec8abcee443f

View File

@ -16,10 +16,10 @@
* along with this program. If not, see <https://www.gnu.org/licenses/>.
*/
#include <blt/gp/program.h>
#include <iostream>
namespace blt::gp
{
// default static references for mutation, crossover, and initializer
// this is largely to not break the tests :3
// it's also to allow for quick setup of a gp program if you don't care how crossover or mutation is handled
@ -43,4 +43,68 @@ namespace blt::gp
prog_config_t::prog_config_t(size_t populationSize):
population_size(populationSize), mutator(s_mutator), crossover(s_crossover), pop_initializer(s_init)
{}
random_t& gp_program::get_random() const
{
thread_local static blt::gp::random_t random_engine{seed};
return random_engine;
}
void gp_program::create_threads()
{
if (config.threads == 0)
config.set_thread_count(std::thread::hardware_concurrency() - 1);
for (blt::size_t i = 0; i < config.threads; i++)
{
thread_helper.threads.emplace_back(new std::thread([this]() {
while (!should_thread_terminate())
{
execute_thread();
}
std::cout << "Ending Thread!" << std::endl;
}));
}
}
void gp_program::execute_thread()
{
if (thread_helper.evaluation_left > 0)
{
while (thread_helper.evaluation_left > 0)
{
blt::size_t begin = 0;
blt::size_t end = 0;
{
std::scoped_lock lock(thread_helper.evaluation_control);
end = thread_helper.evaluation_left;
auto size = std::min(thread_helper.evaluation_left.load(), config.evaluation_size);
begin = thread_helper.evaluation_left - size;
thread_helper.evaluation_left -= size;
}
//std::cout << "Processing " << begin << " to " << end << " with " << thread_helper.evaluation_left << " left" << std::endl;
for (blt::size_t i = begin; i < end; i++)
{
auto& ind = current_pop.get_individuals()[i];
evaluate_fitness_func(ind.tree, ind.fitness, i);
auto old_best = current_stats.best_fitness.load();
while (ind.fitness.adjusted_fitness > old_best &&
!current_stats.best_fitness.compare_exchange_weak(old_best, ind.fitness.adjusted_fitness,
std::memory_order_release,
std::memory_order_relaxed));
auto old_worst = current_stats.worst_fitness.load();
while (ind.fitness.adjusted_fitness < old_worst &&
!current_stats.worst_fitness.compare_exchange_weak(old_worst, ind.fitness.adjusted_fitness,
std::memory_order_release, std::memory_order_relaxed));
auto old_overall = current_stats.overall_fitness.load();
while (!current_stats.overall_fitness.compare_exchange_weak(old_overall, ind.fitness.adjusted_fitness + old_overall,
std::memory_order_release, std::memory_order_relaxed));
}
}
thread_helper.threads_left--;
}
}
}