Compare commits
10 Commits
b041d1ffba
...
76de033fe8
Author | SHA1 | Date |
---|---|---|
Brett | 76de033fe8 | |
Brett | 858a7f5cfe | |
Brett | 7c3b8c050b | |
Brett | a8b81bc7a6 | |
Brett | 58b3ed02c3 | |
Brett | 18ef85c1ce | |
Brett | 1b83d6b4a8 | |
Brett | 3dd3e6fc9e | |
Brett | 96f9ded1c5 | |
Brett | ac3bc8d10b |
|
@ -1,5 +1,5 @@
|
||||||
cmake_minimum_required(VERSION 3.25)
|
cmake_minimum_required(VERSION 3.25)
|
||||||
project(blt-gp VERSION 0.0.145)
|
project(blt-gp VERSION 0.1.9)
|
||||||
|
|
||||||
include(CTest)
|
include(CTest)
|
||||||
|
|
||||||
|
|
|
@ -0,0 +1,41 @@
|
||||||
|
Performance counter stats for './cmake-build-release/blt-symbolic-regression-example' (30 runs):
|
||||||
|
|
||||||
|
24,277,728,279 branches ( +- 19.01% ) (20.47%)
|
||||||
|
76,457,616 branch-misses # 0.31% of all branches ( +- 17.97% ) (21.41%)
|
||||||
|
14,213,192 cache-misses # 4.73% of all cache refs ( +- 14.24% ) (22.52%)
|
||||||
|
300,581,049 cache-references ( +- 21.08% ) (23.68%)
|
||||||
|
48,914,779,668 cycles ( +- 19.65% ) (24.80%)
|
||||||
|
123,068,193,359 instructions # 2.52 insn per cycle ( +- 19.44% ) (25.09%)
|
||||||
|
0 alignment-faults
|
||||||
|
4,202 cgroup-switches ( +- 13.56% )
|
||||||
|
115,962 faults ( +- 10.95% )
|
||||||
|
871,101,993 ns duration_time ( +- 13.40% )
|
||||||
|
11,507,605,674 ns user_time ( +- 3.56% )
|
||||||
|
299,016,204 ns system_time ( +- 3.32% )
|
||||||
|
41,446,831,795 L1-dcache-loads ( +- 19.28% ) (24.69%)
|
||||||
|
167,603,194 L1-dcache-load-misses # 0.40% of all L1-dcache accesses ( +- 22.47% ) (23.95%)
|
||||||
|
81,992,073 L1-dcache-prefetches ( +- 25.34% ) (23.24%)
|
||||||
|
350,398,072 L1-icache-loads ( +- 15.30% ) (22.70%)
|
||||||
|
909,504 L1-icache-load-misses # 0.26% of all L1-icache accesses ( +- 14.46% ) (22.18%)
|
||||||
|
14,271,381 dTLB-loads ( +- 20.04% ) (21.90%)
|
||||||
|
1,559,972 dTLB-load-misses # 10.93% of all dTLB cache accesses ( +- 14.74% ) (21.39%)
|
||||||
|
246,888 iTLB-loads ( +- 21.69% ) (20.54%)
|
||||||
|
403,152 iTLB-load-misses # 163.29% of all iTLB cache accesses ( +- 13.35% ) (19.94%)
|
||||||
|
210,585,840 l2_request_g1.all_no_prefetch ( +- 20.07% ) (19.93%)
|
||||||
|
115,962 page-faults ( +- 10.95% )
|
||||||
|
115,958 page-faults:u ( +- 10.95% )
|
||||||
|
3 page-faults:k ( +- 4.54% )
|
||||||
|
41,209,739,257 L1-dcache-loads ( +- 19.02% ) (19.60%)
|
||||||
|
181,755,898 L1-dcache-load-misses # 0.44% of all L1-dcache accesses ( +- 20.60% ) (20.01%)
|
||||||
|
<not supported> LLC-loads
|
||||||
|
<not supported> LLC-load-misses
|
||||||
|
425,056,352 L1-icache-loads ( +- 12.27% ) (20.43%)
|
||||||
|
1,076,486 L1-icache-load-misses # 0.31% of all L1-icache accesses ( +- 10.84% ) (20.98%)
|
||||||
|
15,418,419 dTLB-loads ( +- 17.74% ) (21.24%)
|
||||||
|
1,648,473 dTLB-load-misses # 11.55% of all dTLB cache accesses ( +- 13.11% ) (20.94%)
|
||||||
|
325,141 iTLB-loads ( +- 26.87% ) (20.80%)
|
||||||
|
459,828 iTLB-load-misses # 186.25% of all iTLB cache accesses ( +- 11.50% ) (20.34%)
|
||||||
|
94,270,593 L1-dcache-prefetches ( +- 22.82% ) (20.09%)
|
||||||
|
<not supported> L1-dcache-prefetch-misses
|
||||||
|
|
||||||
|
0.871 +- 0.117 seconds time elapsed ( +- 13.40% )
|
|
@ -31,6 +31,8 @@ struct context
|
||||||
|
|
||||||
std::array<context, 200> fitness_cases;
|
std::array<context, 200> fitness_cases;
|
||||||
|
|
||||||
|
blt::gp::mutation_t mut;
|
||||||
|
|
||||||
blt::gp::prog_config_t config = blt::gp::prog_config_t()
|
blt::gp::prog_config_t config = blt::gp::prog_config_t()
|
||||||
.set_initial_min_tree_size(2)
|
.set_initial_min_tree_size(2)
|
||||||
.set_initial_max_tree_size(6)
|
.set_initial_max_tree_size(6)
|
||||||
|
@ -117,15 +119,15 @@ int main()
|
||||||
program.set_operations(builder.build());
|
program.set_operations(builder.build());
|
||||||
|
|
||||||
BLT_DEBUG("Generate Initial Population");
|
BLT_DEBUG("Generate Initial Population");
|
||||||
program.generate_population(type_system.get_type<float>().id(), fitness_function);
|
auto sel = blt::gp::select_fitness_proportionate_t{};
|
||||||
|
program.generate_population(type_system.get_type<float>().id(), fitness_function, sel, sel, sel);
|
||||||
|
|
||||||
BLT_DEBUG("Begin Generation Loop");
|
BLT_DEBUG("Begin Generation Loop");
|
||||||
while (!program.should_terminate())
|
while (!program.should_terminate())
|
||||||
{
|
{
|
||||||
BLT_TRACE("------------{Begin Generation %ld}------------", program.get_current_generation());
|
BLT_TRACE("------------{Begin Generation %ld}------------", program.get_current_generation());
|
||||||
BLT_START_INTERVAL("Symbolic Regression", "Gen");
|
BLT_START_INTERVAL("Symbolic Regression", "Gen");
|
||||||
auto sel = blt::gp::select_fitness_proportionate_t{};
|
program.create_next_generation();
|
||||||
program.create_next_generation(sel, sel, sel);
|
|
||||||
BLT_END_INTERVAL("Symbolic Regression", "Gen");
|
BLT_END_INTERVAL("Symbolic Regression", "Gen");
|
||||||
BLT_TRACE("Move to next generation");
|
BLT_TRACE("Move to next generation");
|
||||||
BLT_START_INTERVAL("Symbolic Regression", "Fitness");
|
BLT_START_INTERVAL("Symbolic Regression", "Fitness");
|
||||||
|
|
|
@ -264,19 +264,12 @@ namespace blt::gp
|
||||||
system(system), seed(seed), config(config)
|
system(system), seed(seed), config(config)
|
||||||
{ create_threads(); }
|
{ create_threads(); }
|
||||||
|
|
||||||
template<typename Crossover, typename Mutation, typename Reproduction, typename CreationFunc = decltype(default_next_pop_creator<Crossover, Mutation, Reproduction>)>
|
void create_next_generation()
|
||||||
void create_next_generation(Crossover&& crossover_selection, Mutation&& mutation_selection, Reproduction&& reproduction_selection,
|
|
||||||
CreationFunc& func = default_next_pop_creator<Crossover, Mutation, Reproduction>)
|
|
||||||
{
|
{
|
||||||
// should already be empty
|
// should already be empty
|
||||||
next_pop.clear();
|
next_pop.clear();
|
||||||
crossover_selection.pre_process(*this, current_pop, current_stats);
|
thread_helper.next_gen_left.store(config.population_size, std::memory_order_release);
|
||||||
mutation_selection.pre_process(*this, current_pop, current_stats);
|
(*thread_execution_service)(0);
|
||||||
reproduction_selection.pre_process(*this, current_pop, current_stats);
|
|
||||||
|
|
||||||
auto args = get_selector_args();
|
|
||||||
func(args, std::forward<Crossover>(crossover_selection), std::forward<Mutation>(mutation_selection),
|
|
||||||
std::forward<Reproduction>(reproduction_selection));
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void evaluate_fitness()
|
void evaluate_fitness()
|
||||||
|
@ -294,8 +287,10 @@ namespace blt::gp
|
||||||
*
|
*
|
||||||
* NOTE: 0 is considered the best, in terms of standardized fitness
|
* NOTE: 0 is considered the best, in terms of standardized fitness
|
||||||
*/
|
*/
|
||||||
template<typename FitnessFunc>
|
template<typename FitnessFunc, typename Crossover, typename Mutation, typename Reproduction, typename CreationFunc = decltype(default_next_pop_creator<Crossover, Mutation, Reproduction>)>
|
||||||
void generate_population(type_id root_type, FitnessFunc& fitness_function, bool eval_fitness_now = true)
|
void generate_population(type_id root_type, FitnessFunc& fitness_function,
|
||||||
|
Crossover& crossover_selection, Mutation& mutation_selection, Reproduction& reproduction_selection,
|
||||||
|
CreationFunc& func = default_next_pop_creator<Crossover, Mutation, Reproduction>, bool eval_fitness_now = true)
|
||||||
{
|
{
|
||||||
using LambdaReturn = typename decltype(blt::meta::lambda_helper(fitness_function))::Return;
|
using LambdaReturn = typename decltype(blt::meta::lambda_helper(fitness_function))::Return;
|
||||||
current_pop = config.pop_initializer.get().generate(
|
current_pop = config.pop_initializer.get().generate(
|
||||||
|
@ -303,7 +298,10 @@ namespace blt::gp
|
||||||
if (config.threads == 1)
|
if (config.threads == 1)
|
||||||
{
|
{
|
||||||
BLT_INFO("Starting with single thread variant!");
|
BLT_INFO("Starting with single thread variant!");
|
||||||
thread_execution_service = new std::function([this, &fitness_function](blt::size_t) {
|
thread_execution_service = new std::function(
|
||||||
|
[this, &fitness_function, &crossover_selection, &mutation_selection, &reproduction_selection, &func](blt::size_t) {
|
||||||
|
if (thread_helper.evaluation_left > 0)
|
||||||
|
{
|
||||||
for (const auto& ind : blt::enumerate(current_pop.get_individuals()))
|
for (const auto& ind : blt::enumerate(current_pop.get_individuals()))
|
||||||
{
|
{
|
||||||
if constexpr (std::is_same_v<LambdaReturn, bool> || std::is_convertible_v<LambdaReturn, bool>)
|
if constexpr (std::is_same_v<LambdaReturn, bool> || std::is_convertible_v<LambdaReturn, bool>)
|
||||||
|
@ -324,12 +322,35 @@ namespace blt::gp
|
||||||
|
|
||||||
current_stats.overall_fitness = current_stats.overall_fitness + ind.second.fitness.adjusted_fitness;
|
current_stats.overall_fitness = current_stats.overall_fitness + ind.second.fitness.adjusted_fitness;
|
||||||
}
|
}
|
||||||
|
thread_helper.evaluation_left = 0;
|
||||||
|
}
|
||||||
|
if (thread_helper.next_gen_left > 0)
|
||||||
|
{
|
||||||
|
static thread_local std::vector<tree_t> new_children;
|
||||||
|
new_children.clear();
|
||||||
|
auto args = get_selector_args(new_children);
|
||||||
|
|
||||||
|
crossover_selection.pre_process(*this, current_pop, current_stats);
|
||||||
|
mutation_selection.pre_process(*this, current_pop, current_stats);
|
||||||
|
reproduction_selection.pre_process(*this, current_pop, current_stats);
|
||||||
|
|
||||||
|
perform_elitism(args);
|
||||||
|
|
||||||
|
while (new_children.size() < config.population_size)
|
||||||
|
func(args, crossover_selection, mutation_selection, reproduction_selection);
|
||||||
|
|
||||||
|
for (auto& i : new_children)
|
||||||
|
next_pop.get_individuals().emplace_back(std::move(i));
|
||||||
|
|
||||||
|
thread_helper.next_gen_left = 0;
|
||||||
|
}
|
||||||
});
|
});
|
||||||
} else
|
} else
|
||||||
{
|
{
|
||||||
BLT_INFO("Starting thread execution service!");
|
BLT_INFO("Starting thread execution service!");
|
||||||
std::scoped_lock lock(thread_helper.thread_function_control);
|
std::scoped_lock lock(thread_helper.thread_function_control);
|
||||||
thread_execution_service = new std::function([this, &fitness_function](blt::size_t) {
|
thread_execution_service = new std::function(
|
||||||
|
[this, &fitness_function, &crossover_selection, &mutation_selection, &reproduction_selection, &func](blt::size_t id) {
|
||||||
thread_helper.barrier.wait();
|
thread_helper.barrier.wait();
|
||||||
if (thread_helper.evaluation_left > 0)
|
if (thread_helper.evaluation_left > 0)
|
||||||
{
|
{
|
||||||
|
@ -363,12 +384,14 @@ namespace blt::gp
|
||||||
auto old_best = current_stats.best_fitness.load(std::memory_order_relaxed);
|
auto old_best = current_stats.best_fitness.load(std::memory_order_relaxed);
|
||||||
while (ind.fitness.adjusted_fitness > old_best &&
|
while (ind.fitness.adjusted_fitness > old_best &&
|
||||||
!current_stats.best_fitness.compare_exchange_weak(old_best, ind.fitness.adjusted_fitness,
|
!current_stats.best_fitness.compare_exchange_weak(old_best, ind.fitness.adjusted_fitness,
|
||||||
std::memory_order_relaxed, std::memory_order_relaxed));
|
std::memory_order_relaxed,
|
||||||
|
std::memory_order_relaxed));
|
||||||
|
|
||||||
auto old_worst = current_stats.worst_fitness.load(std::memory_order_relaxed);
|
auto old_worst = current_stats.worst_fitness.load(std::memory_order_relaxed);
|
||||||
while (ind.fitness.adjusted_fitness < old_worst &&
|
while (ind.fitness.adjusted_fitness < old_worst &&
|
||||||
!current_stats.worst_fitness.compare_exchange_weak(old_worst, ind.fitness.adjusted_fitness,
|
!current_stats.worst_fitness.compare_exchange_weak(old_worst, ind.fitness.adjusted_fitness,
|
||||||
std::memory_order_relaxed, std::memory_order_relaxed));
|
std::memory_order_relaxed,
|
||||||
|
std::memory_order_relaxed));
|
||||||
|
|
||||||
auto old_overall = current_stats.overall_fitness.load(std::memory_order_relaxed);
|
auto old_overall = current_stats.overall_fitness.load(std::memory_order_relaxed);
|
||||||
while (!current_stats.overall_fitness.compare_exchange_weak(old_overall,
|
while (!current_stats.overall_fitness.compare_exchange_weak(old_overall,
|
||||||
|
@ -378,6 +401,54 @@ namespace blt::gp
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
if (thread_helper.next_gen_left > 0)
|
||||||
|
{
|
||||||
|
static thread_local std::vector<tree_t> new_children;
|
||||||
|
new_children.clear();
|
||||||
|
auto args = get_selector_args(new_children);
|
||||||
|
if (id == 0)
|
||||||
|
{
|
||||||
|
crossover_selection.pre_process(*this, current_pop, current_stats);
|
||||||
|
if (&crossover_selection != &mutation_selection)
|
||||||
|
mutation_selection.pre_process(*this, current_pop, current_stats);
|
||||||
|
if (&crossover_selection != &reproduction_selection)
|
||||||
|
reproduction_selection.pre_process(*this, current_pop, current_stats);
|
||||||
|
|
||||||
|
perform_elitism(args);
|
||||||
|
|
||||||
|
for (auto& i : new_children)
|
||||||
|
next_pop.get_individuals().emplace_back(std::move(i));
|
||||||
|
thread_helper.next_gen_left -= new_children.size();
|
||||||
|
new_children.clear();
|
||||||
|
}
|
||||||
|
thread_helper.barrier.wait();
|
||||||
|
|
||||||
|
while (thread_helper.next_gen_left > 0)
|
||||||
|
{
|
||||||
|
blt::size_t size = 0;
|
||||||
|
blt::size_t begin = 0;
|
||||||
|
blt::size_t end = thread_helper.next_gen_left.load(std::memory_order_relaxed);
|
||||||
|
do
|
||||||
|
{
|
||||||
|
size = std::min(end, config.evaluation_size);
|
||||||
|
begin = end - size;
|
||||||
|
} while (!thread_helper.next_gen_left.compare_exchange_weak(end, end - size,
|
||||||
|
std::memory_order::memory_order_relaxed,
|
||||||
|
std::memory_order::memory_order_relaxed));
|
||||||
|
|
||||||
|
for (blt::size_t i = begin; i < end; i++)
|
||||||
|
func(args, crossover_selection, mutation_selection, reproduction_selection);
|
||||||
|
|
||||||
|
{
|
||||||
|
std::scoped_lock lock(thread_helper.thread_generation_lock);
|
||||||
|
for (auto& i : new_children)
|
||||||
|
{
|
||||||
|
if (next_pop.get_individuals().size() < config.population_size)
|
||||||
|
next_pop.get_individuals().emplace_back(i);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
thread_helper.barrier.wait();
|
thread_helper.barrier.wait();
|
||||||
});
|
});
|
||||||
thread_helper.thread_function_condition.notify_all();
|
thread_helper.thread_function_condition.notify_all();
|
||||||
|
@ -581,9 +652,11 @@ namespace blt::gp
|
||||||
std::vector<std::unique_ptr<std::thread>> threads;
|
std::vector<std::unique_ptr<std::thread>> threads;
|
||||||
|
|
||||||
std::mutex thread_function_control;
|
std::mutex thread_function_control;
|
||||||
|
std::mutex thread_generation_lock;
|
||||||
std::condition_variable thread_function_condition{};
|
std::condition_variable thread_function_condition{};
|
||||||
|
|
||||||
std::atomic_uint64_t evaluation_left = 0;
|
std::atomic_uint64_t evaluation_left = 0;
|
||||||
|
std::atomic_uint64_t next_gen_left = 0;
|
||||||
|
|
||||||
std::atomic_bool lifetime_over = false;
|
std::atomic_bool lifetime_over = false;
|
||||||
blt::barrier barrier;
|
blt::barrier barrier;
|
||||||
|
@ -595,9 +668,9 @@ namespace blt::gp
|
||||||
// for convenience, shouldn't decrease performance too much
|
// for convenience, shouldn't decrease performance too much
|
||||||
std::atomic<std::function<void(blt::size_t)>*> thread_execution_service = nullptr;
|
std::atomic<std::function<void(blt::size_t)>*> thread_execution_service = nullptr;
|
||||||
|
|
||||||
inline selector_args get_selector_args()
|
inline selector_args get_selector_args(std::vector<tree_t>& next_pop_trees)
|
||||||
{
|
{
|
||||||
return {*this, next_pop, current_pop, current_stats, config, get_random()};
|
return {*this, next_pop_trees, current_pop, current_stats, config, get_random()};
|
||||||
}
|
}
|
||||||
|
|
||||||
template<typename Return, blt::size_t size, typename Accessor, blt::size_t... indexes>
|
template<typename Return, blt::size_t size, typename Accessor, blt::size_t... indexes>
|
||||||
|
@ -612,7 +685,6 @@ namespace blt::gp
|
||||||
void evaluate_fitness_internal()
|
void evaluate_fitness_internal()
|
||||||
{
|
{
|
||||||
current_stats.clear();
|
current_stats.clear();
|
||||||
if (config.threads != 1)
|
|
||||||
thread_helper.evaluation_left.store(current_pop.get_individuals().size(), std::memory_order_release);
|
thread_helper.evaluation_left.store(current_pop.get_individuals().size(), std::memory_order_release);
|
||||||
(*thread_execution_service)(0);
|
(*thread_execution_service)(0);
|
||||||
|
|
||||||
|
|
|
@ -31,7 +31,7 @@ namespace blt::gp
|
||||||
struct selector_args
|
struct selector_args
|
||||||
{
|
{
|
||||||
gp_program& program;
|
gp_program& program;
|
||||||
population_t& next_pop;
|
std::vector<tree_t>& next_pop;
|
||||||
population_t& current_pop;
|
population_t& current_pop;
|
||||||
population_stats& current_stats;
|
population_stats& current_stats;
|
||||||
prog_config_t& config;
|
prog_config_t& config;
|
||||||
|
@ -52,8 +52,6 @@ namespace blt::gp
|
||||||
{
|
{
|
||||||
for (blt::size_t i = 0; i < config.elites; i++)
|
for (blt::size_t i = 0; i < config.elites; i++)
|
||||||
{
|
{
|
||||||
// BLT_INFO("%lf >= %lf? // %lf (indexes: %ld %ld)", ind.second.fitness.adjusted_fitness, values[i].second,
|
|
||||||
// ind.second.fitness.raw_fitness, ind.first, values[i].first);
|
|
||||||
if (ind.second.fitness.adjusted_fitness >= values[i].second)
|
if (ind.second.fitness.adjusted_fitness >= values[i].second)
|
||||||
{
|
{
|
||||||
bool doesnt_contain = true;
|
bool doesnt_contain = true;
|
||||||
|
@ -70,77 +68,15 @@ namespace blt::gp
|
||||||
}
|
}
|
||||||
|
|
||||||
for (blt::size_t i = 0; i < config.elites; i++)
|
for (blt::size_t i = 0; i < config.elites; i++)
|
||||||
next_pop.get_individuals().push_back(current_pop.get_individuals()[values[i].first]);
|
next_pop.push_back(current_pop.get_individuals()[values[i].first].tree);
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
template<typename Crossover, typename Mutation, typename Reproduction>
|
|
||||||
constexpr inline auto proportionate_next_pop_creator = [](
|
|
||||||
const selector_args& args, Crossover crossover_selection, Mutation mutation_selection, Reproduction reproduction_selection) {
|
|
||||||
auto& [program, next_pop, current_pop, current_stats, config, random] = args;
|
|
||||||
|
|
||||||
double total_prob = config.mutation_chance + config.crossover_chance;
|
|
||||||
double crossover_chance = config.crossover_chance / total_prob;
|
|
||||||
double mutation_chance = crossover_chance + config.mutation_chance / total_prob;
|
|
||||||
|
|
||||||
perform_elitism(args);
|
|
||||||
|
|
||||||
while (next_pop.get_individuals().size() < config.population_size)
|
|
||||||
{
|
|
||||||
auto type = random.get_double();
|
|
||||||
if (type > crossover_chance && type < mutation_chance)
|
|
||||||
{
|
|
||||||
// crossover
|
|
||||||
auto& p1 = crossover_selection.select(program, current_pop, current_stats);
|
|
||||||
auto& p2 = crossover_selection.select(program, current_pop, current_stats);
|
|
||||||
|
|
||||||
auto results = config.crossover.get().apply(program, p1, p2);
|
|
||||||
|
|
||||||
// if crossover fails, we can check for mutation on these guys. otherwise straight copy them into the next pop
|
|
||||||
if (results)
|
|
||||||
{
|
|
||||||
next_pop.get_individuals().emplace_back(std::move(results->child1));
|
|
||||||
// annoying check
|
|
||||||
if (next_pop.get_individuals().size() < config.population_size)
|
|
||||||
next_pop.get_individuals().emplace_back(std::move(results->child2));
|
|
||||||
} else
|
|
||||||
{
|
|
||||||
if (config.try_mutation_on_crossover_failure && random.choice(config.mutation_chance))
|
|
||||||
next_pop.get_individuals().emplace_back(std::move(config.mutator.get().apply(program, p1)));
|
|
||||||
else
|
|
||||||
next_pop.get_individuals().push_back(individual{p1});
|
|
||||||
// annoying check.
|
|
||||||
if (next_pop.get_individuals().size() < config.population_size)
|
|
||||||
{
|
|
||||||
if (config.try_mutation_on_crossover_failure && random.choice(config.mutation_chance))
|
|
||||||
next_pop.get_individuals().emplace_back(std::move(config.mutator.get().apply(program, p2)));
|
|
||||||
else
|
|
||||||
next_pop.get_individuals().push_back(individual{p2});
|
|
||||||
}
|
|
||||||
}
|
|
||||||
} else if (type > mutation_chance)
|
|
||||||
{
|
|
||||||
// mutation
|
|
||||||
auto& p = mutation_selection.select(program, current_pop, current_stats);
|
|
||||||
next_pop.get_individuals().emplace_back(std::move(config.mutator.get().apply(program, p)));
|
|
||||||
} else
|
|
||||||
{
|
|
||||||
// reproduction
|
|
||||||
auto& p = reproduction_selection.select(program, current_pop, current_stats);
|
|
||||||
next_pop.get_individuals().push_back(individual{p});
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
template<typename Crossover, typename Mutation, typename Reproduction>
|
template<typename Crossover, typename Mutation, typename Reproduction>
|
||||||
constexpr inline auto default_next_pop_creator = [](
|
constexpr inline auto default_next_pop_creator = [](
|
||||||
const blt::gp::selector_args& args, Crossover crossover_selection, Mutation mutation_selection, Reproduction reproduction_selection) {
|
blt::gp::selector_args& args, Crossover& crossover_selection, Mutation& mutation_selection, Reproduction& reproduction_selection) {
|
||||||
auto& [program, next_pop, current_pop, current_stats, config, random] = args;
|
auto& [program, next_pop, current_pop, current_stats, config, random] = args;
|
||||||
|
|
||||||
perform_elitism(args);
|
|
||||||
|
|
||||||
while (next_pop.get_individuals().size() < config.population_size)
|
|
||||||
{
|
|
||||||
int sel = random.get_i32(0, 3);
|
int sel = random.get_i32(0, 3);
|
||||||
switch (sel)
|
switch (sel)
|
||||||
{
|
{
|
||||||
|
@ -157,10 +93,8 @@ namespace blt::gp
|
||||||
// if crossover fails, we can check for mutation on these guys. otherwise straight copy them into the next pop
|
// if crossover fails, we can check for mutation on these guys. otherwise straight copy them into the next pop
|
||||||
if (results)
|
if (results)
|
||||||
{
|
{
|
||||||
next_pop.get_individuals().emplace_back(std::move(results->child1));
|
next_pop.push_back(std::move(results->child1));
|
||||||
// annoying check
|
next_pop.push_back(std::move(results->child2));
|
||||||
if (next_pop.get_individuals().size() < config.population_size)
|
|
||||||
next_pop.get_individuals().emplace_back(std::move(results->child2));
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
|
@ -169,7 +103,7 @@ namespace blt::gp
|
||||||
{
|
{
|
||||||
// mutation
|
// mutation
|
||||||
auto& p = mutation_selection.select(program, current_pop, current_stats);
|
auto& p = mutation_selection.select(program, current_pop, current_stats);
|
||||||
next_pop.get_individuals().emplace_back(std::move(config.mutator.get().apply(program, p)));
|
next_pop.push_back(std::move(config.mutator.get().apply(program, p)));
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
case 2:
|
case 2:
|
||||||
|
@ -177,12 +111,15 @@ namespace blt::gp
|
||||||
{
|
{
|
||||||
// reproduction
|
// reproduction
|
||||||
auto& p = reproduction_selection.select(program, current_pop, current_stats);
|
auto& p = reproduction_selection.select(program, current_pop, current_stats);
|
||||||
next_pop.get_individuals().push_back(individual{p});
|
next_pop.push_back(p);
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
default:
|
default:
|
||||||
|
#if BLT_DEBUG_LEVEL > 0
|
||||||
BLT_ABORT("This is not possible!");
|
BLT_ABORT("This is not possible!");
|
||||||
}
|
#else
|
||||||
|
BLT_UNREACHABLE;
|
||||||
|
#endif
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
|
@ -23,6 +23,7 @@
|
||||||
#include <blt/std/assert.h>
|
#include <blt/std/assert.h>
|
||||||
#include <blt/std/logging.h>
|
#include <blt/std/logging.h>
|
||||||
#include <blt/std/allocator.h>
|
#include <blt/std/allocator.h>
|
||||||
|
#include <blt/std/ranges.h>
|
||||||
#include <blt/std/meta.h>
|
#include <blt/std/meta.h>
|
||||||
#include <blt/gp/fwdecl.h>
|
#include <blt/gp/fwdecl.h>
|
||||||
#include <utility>
|
#include <utility>
|
||||||
|
@ -53,426 +54,18 @@ namespace blt::gp
|
||||||
blt::size_t total_size_bytes = 0;
|
blt::size_t total_size_bytes = 0;
|
||||||
blt::size_t total_used_bytes = 0;
|
blt::size_t total_used_bytes = 0;
|
||||||
blt::size_t total_remaining_bytes = 0;
|
blt::size_t total_remaining_bytes = 0;
|
||||||
blt::size_t total_no_meta_bytes = 0;
|
|
||||||
|
|
||||||
blt::size_t total_dealloc = 0;
|
|
||||||
blt::size_t total_dealloc_used = 0;
|
|
||||||
blt::size_t total_dealloc_remaining = 0;
|
|
||||||
blt::size_t total_dealloc_no_meta = 0;
|
|
||||||
|
|
||||||
blt::size_t blocks = 0;
|
|
||||||
|
|
||||||
friend std::ostream& operator<<(std::ostream& stream, const size_data_t& data)
|
friend std::ostream& operator<<(std::ostream& stream, const size_data_t& data)
|
||||||
{
|
{
|
||||||
stream << "[";
|
stream << "[";
|
||||||
stream << data.total_used_bytes << "/";
|
stream << data.total_used_bytes << " / " << data.total_size_bytes;
|
||||||
stream << data.total_size_bytes << "(";
|
stream << " ("
|
||||||
stream << (static_cast<double>(data.total_used_bytes) / static_cast<double>(data.total_size_bytes) * 100) << "%), ";
|
<< (data.total_size_bytes != 0 ? (static_cast<double>(data.total_used_bytes) / static_cast<double>(data.total_size_bytes) *
|
||||||
stream << data.total_used_bytes << "/";
|
100) : 0) << "%); space left: " << data.total_remaining_bytes << "]";
|
||||||
stream << data.total_no_meta_bytes << "(";
|
|
||||||
stream << (static_cast<double>(data.total_used_bytes) / static_cast<double>(data.total_no_meta_bytes) * 100)
|
|
||||||
<< "%), (empty space: ";
|
|
||||||
stream << data.total_remaining_bytes << ") blocks: " << data.blocks << " || unallocated space: ";
|
|
||||||
stream << data.total_dealloc_used << "/";
|
|
||||||
stream << data.total_dealloc;
|
|
||||||
if (static_cast<double>(data.total_dealloc) > 0)
|
|
||||||
stream << "(" << (static_cast<double>(data.total_dealloc_used) / static_cast<double>(data.total_dealloc) * 100) << "%)";
|
|
||||||
stream << ", ";
|
|
||||||
stream << data.total_dealloc_used << "/";
|
|
||||||
stream << data.total_dealloc_no_meta;
|
|
||||||
if (data.total_dealloc_no_meta > 0)
|
|
||||||
stream << "(" << (static_cast<double>(data.total_dealloc_used) / static_cast<double>(data.total_dealloc_no_meta * 100))
|
|
||||||
<< "%)";
|
|
||||||
stream << ", (empty space: " << data.total_dealloc_remaining << ")]";
|
|
||||||
return stream;
|
return stream;
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
void insert(stack_allocator stack)
|
|
||||||
{
|
|
||||||
if (stack.empty())
|
|
||||||
return;
|
|
||||||
// take a copy of the pointer to this stack's blocks
|
|
||||||
auto old_head = stack.head;
|
|
||||||
// stack is now empty, we have the last reference to it.
|
|
||||||
stack.head = nullptr;
|
|
||||||
// we don't have any nodes to search through or re-point, we can just assign the head
|
|
||||||
if (head == nullptr)
|
|
||||||
{
|
|
||||||
head = old_head;
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
// find the beginning of the stack
|
|
||||||
auto begin = old_head;
|
|
||||||
while (begin->metadata.prev != nullptr)
|
|
||||||
begin = begin->metadata.prev;
|
|
||||||
|
|
||||||
// move along blocks with free space, attempt to insert bytes from one stack to another
|
|
||||||
auto insert = head;
|
|
||||||
while (insert->metadata.next != nullptr && begin != nullptr)
|
|
||||||
{
|
|
||||||
if (begin->used_bytes_in_block() <= insert->remaining_bytes_in_block())
|
|
||||||
{
|
|
||||||
std::memcpy(insert->metadata.offset, begin->buffer, begin->used_bytes_in_block());
|
|
||||||
insert->metadata.offset += begin->used_bytes_in_block();
|
|
||||||
auto old_begin = begin;
|
|
||||||
begin = begin->metadata.next;
|
|
||||||
free_block(old_begin);
|
|
||||||
}
|
|
||||||
head = insert;
|
|
||||||
insert = insert->metadata.next;
|
|
||||||
}
|
|
||||||
if (begin == nullptr)
|
|
||||||
return;
|
|
||||||
while (insert->metadata.next != nullptr)
|
|
||||||
insert = insert->metadata.next;
|
|
||||||
// if here is space left we can move the pointers around
|
|
||||||
insert->metadata.next = begin;
|
|
||||||
begin->metadata.prev = insert;
|
|
||||||
// find where the head is now and set the head to this new point.
|
|
||||||
auto new_head = begin;
|
|
||||||
while (new_head->metadata.next != nullptr)
|
|
||||||
new_head = new_head->metadata.next;
|
|
||||||
head = new_head;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Bytes must be the number of bytes to move, all types must have alignment accounted for
|
|
||||||
*/
|
|
||||||
void copy_from(const stack_allocator& stack, blt::size_t bytes)
|
|
||||||
{
|
|
||||||
if (bytes == 0)
|
|
||||||
return;
|
|
||||||
if (stack.empty())
|
|
||||||
{
|
|
||||||
BLT_WARN("This stack is empty, we will copy no bytes from it!");
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
auto [start_block, bytes_left, start_point] = get_start_from_bytes(stack, bytes);
|
|
||||||
|
|
||||||
if (bytes_left > 0)
|
|
||||||
{
|
|
||||||
allocate_block_to_head_for_size(bytes_left);
|
|
||||||
std::memcpy(head->metadata.offset, start_point, bytes_left);
|
|
||||||
head->metadata.offset += bytes_left;
|
|
||||||
start_block = start_block->metadata.next;
|
|
||||||
}
|
|
||||||
// we now copy whole blocks at a time.
|
|
||||||
while (start_block != nullptr)
|
|
||||||
{
|
|
||||||
allocate_block_to_head_for_size(start_block->used_bytes_in_block());
|
|
||||||
std::memcpy(head->metadata.offset, start_block->buffer, start_block->used_bytes_in_block());
|
|
||||||
head->metadata.offset += start_block->used_bytes_in_block();
|
|
||||||
start_block = start_block->metadata.next;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
void copy_from(blt::u8* data, blt::size_t bytes)
|
|
||||||
{
|
|
||||||
if (bytes == 0 || data == nullptr)
|
|
||||||
return;
|
|
||||||
allocate_block_to_head_for_size(bytes);
|
|
||||||
std::memcpy(head->metadata.offset, data, bytes);
|
|
||||||
head->metadata.offset += bytes;
|
|
||||||
}
|
|
||||||
|
|
||||||
void copy_to(blt::u8* data, blt::size_t bytes) const
|
|
||||||
{
|
|
||||||
if (bytes == 0 || data == nullptr)
|
|
||||||
return;
|
|
||||||
auto [start_block, bytes_left, start_point] = get_start_from_bytes(*this, bytes);
|
|
||||||
|
|
||||||
blt::size_t write_point = 0;
|
|
||||||
if (bytes_left > 0)
|
|
||||||
{
|
|
||||||
std::memcpy(data + write_point, start_point, bytes_left);
|
|
||||||
write_point += bytes_left;
|
|
||||||
start_block = start_block->metadata.next;
|
|
||||||
}
|
|
||||||
// we now copy whole blocks at a time.
|
|
||||||
while (start_block != nullptr)
|
|
||||||
{
|
|
||||||
std::memcpy(data + write_point, start_block->buffer, start_block->used_bytes_in_block());
|
|
||||||
write_point += start_block->used_bytes_in_block();
|
|
||||||
start_block = start_block->metadata.next;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Pushes an instance of an object on to the stack
|
|
||||||
* @tparam T type to push
|
|
||||||
* @param value universal reference to the object to push
|
|
||||||
*/
|
|
||||||
template<typename T>
|
|
||||||
void push(const T& value)
|
|
||||||
{
|
|
||||||
using NO_REF_T = std::remove_cv_t<std::remove_reference_t<T>>;
|
|
||||||
static_assert(std::is_trivially_copyable_v<NO_REF_T> && "Type must be bitwise copyable!");
|
|
||||||
static_assert(alignof(NO_REF_T) <= MAX_ALIGNMENT && "Type must not be greater than the max alignment!");
|
|
||||||
auto ptr = allocate_bytes<NO_REF_T>();
|
|
||||||
head->metadata.offset = static_cast<blt::u8*>(ptr) + aligned_size<NO_REF_T>();
|
|
||||||
//new(ptr) NO_REF_T(std::forward<T>(value));
|
|
||||||
std::memcpy(ptr, &value, sizeof(NO_REF_T));
|
|
||||||
}
|
|
||||||
|
|
||||||
template<typename T>
|
|
||||||
T pop()
|
|
||||||
{
|
|
||||||
using NO_REF_T = std::remove_cv_t<std::remove_reference_t<T>>;
|
|
||||||
static_assert(std::is_trivially_copyable_v<NO_REF_T> && "Type must be bitwise copyable!");
|
|
||||||
constexpr static auto TYPE_SIZE = aligned_size<NO_REF_T>();
|
|
||||||
|
|
||||||
while (head->used_bytes_in_block() == 0 && move_back());
|
|
||||||
if (empty())
|
|
||||||
throw std::runtime_error("Silly boi the stack is empty!");
|
|
||||||
|
|
||||||
if (head->used_bytes_in_block() < static_cast<blt::ptrdiff_t>(aligned_size<NO_REF_T>()))
|
|
||||||
throw std::runtime_error((std::string("Mismatched Types! Not enough space left in block! Bytes: ") += std::to_string(
|
|
||||||
head->used_bytes_in_block()) += " Size: " + std::to_string(sizeof(NO_REF_T))).c_str());
|
|
||||||
// make copy
|
|
||||||
NO_REF_T t = *reinterpret_cast<NO_REF_T*>(head->metadata.offset - TYPE_SIZE);
|
|
||||||
// call destructor
|
|
||||||
if constexpr (detail::has_func_drop_v<T>)
|
|
||||||
call_drop<NO_REF_T>(0, 0, nullptr);
|
|
||||||
// move offset back
|
|
||||||
head->metadata.offset -= TYPE_SIZE;
|
|
||||||
// moving back allows us to allocate with other data, if there is room.
|
|
||||||
while (head->used_bytes_in_block() == 0 && move_back());
|
|
||||||
return t;
|
|
||||||
}
|
|
||||||
|
|
||||||
template<typename T>
|
|
||||||
T& from(blt::size_t bytes)
|
|
||||||
{
|
|
||||||
using NO_REF_T = std::remove_cv_t<std::remove_reference_t<T>>;
|
|
||||||
|
|
||||||
constexpr static auto TYPE_SIZE = aligned_size<NO_REF_T>();
|
|
||||||
|
|
||||||
auto remaining_bytes = static_cast<blt::ptrdiff_t>(bytes + TYPE_SIZE);
|
|
||||||
|
|
||||||
block* blk = head;
|
|
||||||
while (remaining_bytes > 0)
|
|
||||||
{
|
|
||||||
if (blk == nullptr)
|
|
||||||
{
|
|
||||||
BLT_WARN_STREAM << "Stack state: " << size() << "\n";
|
|
||||||
BLT_WARN_STREAM << "Requested " << bytes << " bytes which becomes " << (bytes + TYPE_SIZE) << "\n";
|
|
||||||
throw std::runtime_error("Requested size is beyond the scope of this stack!");
|
|
||||||
}
|
|
||||||
|
|
||||||
auto bytes_available = blk->used_bytes_in_block() - remaining_bytes;
|
|
||||||
|
|
||||||
if (bytes_available < 0)
|
|
||||||
{
|
|
||||||
remaining_bytes -= blk->used_bytes_in_block();
|
|
||||||
blk = blk->metadata.prev;
|
|
||||||
} else
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
if (blk == nullptr)
|
|
||||||
throw std::runtime_error("Some nonsense is going on. This function already smells");
|
|
||||||
if (blk->used_bytes_in_block() < static_cast<blt::ptrdiff_t>(TYPE_SIZE))
|
|
||||||
{
|
|
||||||
BLT_WARN_STREAM << size() << "\n";
|
|
||||||
BLT_WARN_STREAM << "Requested " << bytes << " bytes which becomes " << (bytes + TYPE_SIZE) << "\n";
|
|
||||||
BLT_WARN_STREAM << "Block size: " << blk->storage_size() << "\n";
|
|
||||||
BLT_ABORT((std::string("Mismatched Types! Not enough space left in block! Bytes: ") += std::to_string(
|
|
||||||
blk->used_bytes_in_block()) += " Size: " + std::to_string(sizeof(NO_REF_T))).c_str());
|
|
||||||
}
|
|
||||||
return *reinterpret_cast<NO_REF_T*>(blk->metadata.offset - remaining_bytes);
|
|
||||||
}
|
|
||||||
|
|
||||||
void pop_bytes(blt::ptrdiff_t bytes)
|
|
||||||
{
|
|
||||||
if (bytes == 0)
|
|
||||||
return;
|
|
||||||
if (empty())
|
|
||||||
{
|
|
||||||
BLT_WARN("Cannot pop %ld bytes", bytes);
|
|
||||||
BLT_ABORT("Stack is empty, we cannot pop!");
|
|
||||||
}
|
|
||||||
while (bytes > 0)
|
|
||||||
{
|
|
||||||
if (head == nullptr)
|
|
||||||
{
|
|
||||||
BLT_WARN("The head is null, this stack doesn't contain enough data inside to pop %ld bytes!", bytes);
|
|
||||||
BLT_WARN_STREAM << "Stack State: " << size() << "\n";
|
|
||||||
BLT_ABORT("Stack doesn't contain enough data to preform a pop!");
|
|
||||||
}
|
|
||||||
auto diff = head->used_bytes_in_block() - bytes;
|
|
||||||
// if there is not enough room left to pop completely off the block, then move to the next previous block
|
|
||||||
// and pop from it, update the amount of bytes to reflect the amount removed from the current block
|
|
||||||
if (diff < 0)
|
|
||||||
{
|
|
||||||
bytes -= head->used_bytes_in_block();
|
|
||||||
// reset this head's buffer.
|
|
||||||
head->metadata.offset = head->buffer;
|
|
||||||
move_back();
|
|
||||||
} else
|
|
||||||
{
|
|
||||||
// otherwise update the offset pointer
|
|
||||||
head->metadata.offset -= bytes;
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
while (head != nullptr && head->used_bytes_in_block() == 0 && move_back());
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Warning this function should be used to transfer types, not arrays of types! It will produce an error if you attempt to pass more
|
|
||||||
* than one type # of bytes at a time!
|
|
||||||
* @param to stack to push to
|
|
||||||
* @param bytes number of bytes to transfer out.
|
|
||||||
*/
|
|
||||||
void transfer_bytes(stack_allocator& to, blt::size_t bytes)
|
|
||||||
{
|
|
||||||
while (head->used_bytes_in_block() == 0 && move_back());
|
|
||||||
if (empty())
|
|
||||||
throw std::runtime_error("This stack is empty!");
|
|
||||||
|
|
||||||
auto type_size = aligned_size(bytes);
|
|
||||||
if (head->used_bytes_in_block() < static_cast<blt::ptrdiff_t>(type_size))
|
|
||||||
{
|
|
||||||
BLT_ERROR_STREAM << "Stack State:\n" << size() << "\n" << "Bytes in head: " << bytes_in_head() << "\n";
|
|
||||||
BLT_ABORT(("This stack doesn't contain enough data for this type! " + std::to_string(head->used_bytes_in_block()) + " / " +
|
|
||||||
std::to_string(bytes) + " This is an invalid runtime state!").c_str());
|
|
||||||
}
|
|
||||||
|
|
||||||
auto ptr = to.allocate_bytes(type_size);
|
|
||||||
to.head->metadata.offset = static_cast<blt::u8*>(ptr) + type_size;
|
|
||||||
std::memcpy(ptr, head->metadata.offset - type_size, type_size);
|
|
||||||
head->metadata.offset -= type_size;
|
|
||||||
while (head->used_bytes_in_block() == 0 && move_back());
|
|
||||||
}
|
|
||||||
|
|
||||||
template<typename... Args>
|
|
||||||
void call_destructors(detail::bitmask_t* mask)
|
|
||||||
{
|
|
||||||
if constexpr (sizeof...(Args) > 0) {
|
|
||||||
blt::size_t offset = (stack_allocator::aligned_size<NO_REF_T<Args>>() + ...) -
|
|
||||||
stack_allocator::aligned_size<NO_REF_T<typename blt::meta::arg_helper<Args...>::First>>();
|
|
||||||
blt::size_t index = 0;
|
|
||||||
if (mask != nullptr)
|
|
||||||
index = mask->size() - sizeof...(Args);
|
|
||||||
((call_drop<Args>(offset, index, mask), offset -= stack_allocator::aligned_size<NO_REF_T<Args>>(), ++index), ...);
|
|
||||||
if (mask != nullptr)
|
|
||||||
{
|
|
||||||
auto& mask_r = *mask;
|
|
||||||
for (blt::size_t i = 0; i < sizeof...(Args); i++)
|
|
||||||
mask_r.pop_back();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
[[nodiscard]] bool empty() const noexcept
|
|
||||||
{
|
|
||||||
if (head == nullptr)
|
|
||||||
return true;
|
|
||||||
if (head->metadata.prev != nullptr)
|
|
||||||
return false;
|
|
||||||
return head->used_bytes_in_block() == 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
[[nodiscard]] blt::ptrdiff_t bytes_in_head() const noexcept
|
|
||||||
{
|
|
||||||
if (head == nullptr)
|
|
||||||
return 0;
|
|
||||||
return head->used_bytes_in_block();
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Warning this function is slow!
|
|
||||||
* @return the size of the stack allocator in bytes
|
|
||||||
*/
|
|
||||||
[[nodiscard]] size_data_t size() const noexcept
|
|
||||||
{
|
|
||||||
size_data_t size_data;
|
|
||||||
auto* prev = head;
|
|
||||||
while (prev != nullptr)
|
|
||||||
{
|
|
||||||
size_data.total_size_bytes += prev->metadata.size;
|
|
||||||
size_data.total_no_meta_bytes += prev->storage_size();
|
|
||||||
size_data.total_remaining_bytes += prev->remaining_bytes_in_block();
|
|
||||||
size_data.total_used_bytes += prev->used_bytes_in_block();
|
|
||||||
size_data.blocks++;
|
|
||||||
prev = prev->metadata.prev;
|
|
||||||
}
|
|
||||||
if (head != nullptr)
|
|
||||||
{
|
|
||||||
auto next = head->metadata.next;
|
|
||||||
while (next != nullptr)
|
|
||||||
{
|
|
||||||
size_data.total_dealloc += next->metadata.size;
|
|
||||||
size_data.total_dealloc_no_meta += next->storage_size();
|
|
||||||
size_data.total_dealloc_remaining += next->remaining_bytes_in_block();
|
|
||||||
size_data.total_dealloc_used += next->used_bytes_in_block();
|
|
||||||
size_data.blocks++;
|
|
||||||
next = next->metadata.next;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return size_data;
|
|
||||||
}
|
|
||||||
|
|
||||||
stack_allocator() = default;
|
|
||||||
|
|
||||||
// TODO: cleanup this allocator!
|
|
||||||
// if you keep track of type size information you can memcpy between stack allocators as you already only allow trivially copyable types
|
|
||||||
stack_allocator(const stack_allocator& copy) noexcept
|
|
||||||
{
|
|
||||||
if (copy.empty())
|
|
||||||
return;
|
|
||||||
|
|
||||||
head = nullptr;
|
|
||||||
block* list_itr = nullptr;
|
|
||||||
|
|
||||||
// start at the beginning of the list
|
|
||||||
block* current = copy.head;
|
|
||||||
while (current != nullptr)
|
|
||||||
{
|
|
||||||
list_itr = current;
|
|
||||||
current = current->metadata.prev;
|
|
||||||
}
|
|
||||||
// copy all the blocks
|
|
||||||
while (list_itr != nullptr)
|
|
||||||
{
|
|
||||||
push_block(list_itr->metadata.size);
|
|
||||||
std::memcpy(head->buffer, list_itr->buffer, list_itr->storage_size());
|
|
||||||
head->metadata.size = list_itr->metadata.size;
|
|
||||||
head->metadata.offset = head->buffer + list_itr->used_bytes_in_block();
|
|
||||||
list_itr = list_itr->metadata.next;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
stack_allocator& operator=(const stack_allocator& copy) = delete;
|
|
||||||
|
|
||||||
stack_allocator(stack_allocator&& move) noexcept
|
|
||||||
{
|
|
||||||
head = move.head;
|
|
||||||
move.head = nullptr;
|
|
||||||
}
|
|
||||||
|
|
||||||
stack_allocator& operator=(stack_allocator&& move) noexcept
|
|
||||||
{
|
|
||||||
move.head = std::exchange(head, move.head);
|
|
||||||
return *this;
|
|
||||||
}
|
|
||||||
|
|
||||||
~stack_allocator() noexcept
|
|
||||||
{
|
|
||||||
if (head != nullptr)
|
|
||||||
{
|
|
||||||
auto blk = head->metadata.next;
|
|
||||||
while (blk != nullptr)
|
|
||||||
{
|
|
||||||
auto ptr = blk;
|
|
||||||
blk = blk->metadata.next;
|
|
||||||
free_block(ptr);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
free_chain(head);
|
|
||||||
}
|
|
||||||
|
|
||||||
template<typename T>
|
template<typename T>
|
||||||
static inline constexpr blt::size_t aligned_size() noexcept
|
static inline constexpr blt::size_t aligned_size() noexcept
|
||||||
{
|
{
|
||||||
|
@ -484,83 +77,218 @@ namespace blt::gp
|
||||||
return (size + (MAX_ALIGNMENT - 1)) & ~(MAX_ALIGNMENT - 1);
|
return (size + (MAX_ALIGNMENT - 1)) & ~(MAX_ALIGNMENT - 1);
|
||||||
}
|
}
|
||||||
|
|
||||||
inline static constexpr auto metadata_size() noexcept
|
stack_allocator() = default;
|
||||||
|
|
||||||
|
stack_allocator(const stack_allocator& copy)
|
||||||
{
|
{
|
||||||
return sizeof(typename block::block_metadata_t);
|
if (copy.data_ == nullptr || copy.bytes_stored == 0)
|
||||||
|
return;
|
||||||
|
expand(copy.size_);
|
||||||
|
std::memcpy(data_, copy.data_, copy.bytes_stored);
|
||||||
|
bytes_stored = copy.bytes_stored;
|
||||||
}
|
}
|
||||||
|
|
||||||
inline static constexpr auto block_size() noexcept
|
stack_allocator(stack_allocator&& move) noexcept:
|
||||||
|
data_(std::exchange(move.data_, nullptr)), bytes_stored(move.bytes_stored), size_(move.size_)
|
||||||
|
{}
|
||||||
|
|
||||||
|
stack_allocator& operator=(const stack_allocator& copy) = delete;
|
||||||
|
|
||||||
|
stack_allocator& operator=(stack_allocator&& move) noexcept
|
||||||
{
|
{
|
||||||
return sizeof(block);
|
data_ = std::exchange(move.data_, data_);
|
||||||
|
size_ = std::exchange(move.size_, size_);
|
||||||
|
bytes_stored = std::exchange(move.bytes_stored, bytes_stored);
|
||||||
|
return *this;
|
||||||
}
|
}
|
||||||
|
|
||||||
inline static constexpr auto page_size() noexcept
|
~stack_allocator()
|
||||||
{
|
{
|
||||||
return PAGE_SIZE;
|
std::free(data_);
|
||||||
}
|
}
|
||||||
|
|
||||||
inline static constexpr auto page_size_no_meta() noexcept
|
void insert(const stack_allocator& stack)
|
||||||
{
|
{
|
||||||
return page_size() - metadata_size();
|
if (stack.empty())
|
||||||
|
return;
|
||||||
|
if (size_ < stack.bytes_stored + bytes_stored)
|
||||||
|
expand(stack.bytes_stored + bytes_stored);
|
||||||
|
std::memcpy(data_ + bytes_stored, stack.data_, stack.bytes_stored);
|
||||||
|
bytes_stored += stack.bytes_stored;
|
||||||
}
|
}
|
||||||
|
|
||||||
inline static constexpr auto page_size_no_block() noexcept
|
void copy_from(const stack_allocator& stack, blt::size_t bytes)
|
||||||
{
|
{
|
||||||
return page_size() - block_size();
|
if (bytes == 0)
|
||||||
|
return;
|
||||||
|
if (size_ < bytes + bytes_stored)
|
||||||
|
expand(bytes + bytes_stored);
|
||||||
|
std::memcpy(data_ + bytes_stored, stack.data_ + (stack.bytes_stored - bytes), bytes);
|
||||||
|
bytes_stored += bytes;
|
||||||
}
|
}
|
||||||
|
|
||||||
private:
|
void copy_from(blt::u8* data, blt::size_t bytes)
|
||||||
struct block
|
|
||||||
{
|
{
|
||||||
struct block_metadata_t
|
if (bytes == 0 || data == nullptr)
|
||||||
{
|
return;
|
||||||
blt::size_t size = 0;
|
if (size_ < bytes + bytes_stored)
|
||||||
block* next = nullptr;
|
expand(bytes + bytes_stored);
|
||||||
block* prev = nullptr;
|
std::memcpy(data_ + bytes_stored, data, bytes);
|
||||||
blt::u8* offset = nullptr;
|
bytes_stored += bytes;
|
||||||
} metadata;
|
}
|
||||||
blt::u8 buffer[8]{};
|
|
||||||
|
|
||||||
explicit block(blt::size_t size) noexcept
|
void copy_to(blt::u8* data, blt::size_t bytes)
|
||||||
|
{
|
||||||
|
if (bytes == 0 || data == nullptr)
|
||||||
|
return;
|
||||||
|
std::memcpy(data, data_ + (bytes_stored - bytes), bytes);
|
||||||
|
}
|
||||||
|
|
||||||
|
template<typename T, typename NO_REF = NO_REF_T<T>>
|
||||||
|
void push(const T& t)
|
||||||
|
{
|
||||||
|
static_assert(std::is_trivially_copyable_v<NO_REF> && "Type must be bitwise copyable!");
|
||||||
|
static_assert(alignof(NO_REF) <= MAX_ALIGNMENT && "Type alignment must not be greater than the max alignment!");
|
||||||
|
auto ptr = allocate_bytes_for_size(sizeof(NO_REF));
|
||||||
|
std::memcpy(ptr, &t, sizeof(NO_REF));
|
||||||
|
}
|
||||||
|
|
||||||
|
template<typename T, typename NO_REF = NO_REF_T<T>>
|
||||||
|
T pop()
|
||||||
|
{
|
||||||
|
static_assert(std::is_trivially_copyable_v<NO_REF> && "Type must be bitwise copyable!");
|
||||||
|
static_assert(alignof(NO_REF) <= MAX_ALIGNMENT && "Type alignment must not be greater than the max alignment!");
|
||||||
|
constexpr auto size = aligned_size(sizeof(NO_REF));
|
||||||
|
#if BLT_DEBUG_LEVEL > 0
|
||||||
|
if (bytes_stored < size)
|
||||||
|
BLT_ABORT("Not enough bytes left to pop!");
|
||||||
|
#endif
|
||||||
|
bytes_stored -= size;
|
||||||
|
return *reinterpret_cast<T*>(data_ + bytes_stored);
|
||||||
|
}
|
||||||
|
|
||||||
|
template<typename T, typename NO_REF = NO_REF_T<T>>
|
||||||
|
T& from(blt::size_t bytes)
|
||||||
|
{
|
||||||
|
static_assert(std::is_trivially_copyable_v<NO_REF> && "Type must be bitwise copyable!");
|
||||||
|
static_assert(alignof(NO_REF) <= MAX_ALIGNMENT && "Type alignment must not be greater than the max alignment!");
|
||||||
|
auto size = aligned_size(sizeof(NO_REF)) + bytes;
|
||||||
|
#if BLT_DEBUG_LEVEL > 0
|
||||||
|
if (bytes_stored < size)
|
||||||
|
BLT_ABORT(("Not enough bytes in stack to reference " + std::to_string(size) + " bytes requested but " + std::to_string(bytes) +
|
||||||
|
" bytes stored!").c_str());
|
||||||
|
#endif
|
||||||
|
return *reinterpret_cast<NO_REF*>(data_ + bytes_stored - size);
|
||||||
|
}
|
||||||
|
|
||||||
|
void pop_bytes(blt::size_t bytes)
|
||||||
{
|
{
|
||||||
#if BLT_DEBUG_LEVEL > 0
|
#if BLT_DEBUG_LEVEL > 0
|
||||||
if (size < PAGE_SIZE)
|
if (bytes_stored < bytes)
|
||||||
{
|
BLT_ABORT(("Not enough bytes in stack to pop " + std::to_string(bytes) + " bytes requested but " + std::to_string(bytes) +
|
||||||
BLT_WARN("Hey this block is too small, who allocated it?");
|
" bytes stored!").c_str());
|
||||||
std::abort();
|
|
||||||
}
|
|
||||||
#endif
|
#endif
|
||||||
metadata.size = size;
|
bytes_stored -= bytes;
|
||||||
metadata.offset = buffer;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void reset() noexcept
|
void transfer_bytes(stack_allocator& to, blt::size_t bytes)
|
||||||
{
|
{
|
||||||
metadata.offset = buffer;
|
#if BLT_DEBUG_LEVEL > 0
|
||||||
|
if (bytes_stored < bytes)
|
||||||
|
BLT_ABORT(("Not enough bytes in stack to transfer " + std::to_string(bytes) + " bytes requested but " + std::to_string(bytes) +
|
||||||
|
" bytes stored!").c_str());
|
||||||
|
#endif
|
||||||
|
to.copy_from(*this, aligned_size(bytes));
|
||||||
|
pop_bytes(bytes);
|
||||||
}
|
}
|
||||||
|
|
||||||
[[nodiscard]] blt::ptrdiff_t storage_size() const noexcept
|
template<typename... Args>
|
||||||
|
void call_destructors(detail::bitmask_t* mask)
|
||||||
{
|
{
|
||||||
return static_cast<blt::ptrdiff_t>(metadata.size - sizeof(typename block::block_metadata_t));
|
if constexpr (sizeof...(Args) > 0)
|
||||||
|
{
|
||||||
|
blt::size_t offset = (stack_allocator::aligned_size(sizeof(NO_REF_T<Args>)) + ...) -
|
||||||
|
stack_allocator::aligned_size(sizeof(NO_REF_T<typename blt::meta::arg_helper<Args...>::First>));
|
||||||
|
blt::size_t index = 0;
|
||||||
|
if (mask != nullptr)
|
||||||
|
index = mask->size() - sizeof...(Args);
|
||||||
|
((call_drop<Args>(offset, index, mask), offset -= stack_allocator::aligned_size(sizeof(NO_REF_T<Args>)), ++index), ...);
|
||||||
|
if (mask != nullptr)
|
||||||
|
{
|
||||||
|
auto& mask_r = *mask;
|
||||||
|
for (blt::size_t i = 0; i < sizeof...(Args); i++)
|
||||||
|
mask_r.pop_back();
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
[[nodiscard]] blt::ptrdiff_t used_bytes_in_block() const noexcept
|
[[nodiscard]] bool empty() const noexcept
|
||||||
{
|
{
|
||||||
return static_cast<blt::ptrdiff_t>(metadata.offset - buffer);
|
return bytes_stored == 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
[[nodiscard]] blt::ptrdiff_t remaining_bytes_in_block() const noexcept
|
[[nodiscard]] blt::ptrdiff_t remaining_bytes_in_block() const noexcept
|
||||||
{
|
{
|
||||||
return storage_size() - used_bytes_in_block();
|
return static_cast<blt::ptrdiff_t>(size_ - bytes_stored);
|
||||||
}
|
}
|
||||||
};
|
|
||||||
|
|
||||||
struct copy_start_point
|
[[nodiscard]] blt::ptrdiff_t bytes_in_head() const noexcept
|
||||||
{
|
{
|
||||||
block* start_block;
|
return static_cast<blt::ptrdiff_t>(bytes_stored);
|
||||||
blt::ptrdiff_t bytes_left;
|
}
|
||||||
blt::u8* start_point;
|
|
||||||
};
|
[[nodiscard]] size_data_t size() const noexcept
|
||||||
|
{
|
||||||
|
size_data_t data;
|
||||||
|
|
||||||
|
data.total_used_bytes = bytes_stored;
|
||||||
|
data.total_size_bytes = size_;
|
||||||
|
data.total_remaining_bytes = remaining_bytes_in_block();
|
||||||
|
|
||||||
|
return data;
|
||||||
|
}
|
||||||
|
|
||||||
|
private:
|
||||||
|
void expand(blt::size_t bytes)
|
||||||
|
{
|
||||||
|
bytes = to_nearest_page_size(bytes);
|
||||||
|
auto new_data = static_cast<blt::u8*>(std::malloc(bytes));
|
||||||
|
if (bytes_stored > 0)
|
||||||
|
std::memcpy(new_data, data_, bytes_stored);
|
||||||
|
std::free(data_);
|
||||||
|
data_ = new_data;
|
||||||
|
size_ = bytes;
|
||||||
|
}
|
||||||
|
|
||||||
|
static size_t to_nearest_page_size(blt::size_t bytes) noexcept
|
||||||
|
{
|
||||||
|
constexpr static blt::size_t MASK = ~(PAGE_SIZE - 1);
|
||||||
|
return (bytes & MASK) + PAGE_SIZE;
|
||||||
|
}
|
||||||
|
|
||||||
|
void* get_aligned_pointer(blt::size_t bytes) noexcept
|
||||||
|
{
|
||||||
|
if (data_ == nullptr)
|
||||||
|
return nullptr;
|
||||||
|
blt::size_t remaining_bytes = remaining_bytes_in_block();
|
||||||
|
auto* pointer = static_cast<void*>(data_ + bytes_stored);
|
||||||
|
return std::align(MAX_ALIGNMENT, bytes, pointer, remaining_bytes);
|
||||||
|
}
|
||||||
|
|
||||||
|
void* allocate_bytes_for_size(blt::size_t bytes)
|
||||||
|
{
|
||||||
|
auto aligned_ptr = get_aligned_pointer(bytes);
|
||||||
|
if (aligned_ptr == nullptr)
|
||||||
|
{
|
||||||
|
expand(bytes + MAX_ALIGNMENT);
|
||||||
|
aligned_ptr = get_aligned_pointer(bytes);
|
||||||
|
}
|
||||||
|
if (aligned_ptr == nullptr)
|
||||||
|
throw std::bad_alloc();
|
||||||
|
auto used_bytes = aligned_size(bytes);
|
||||||
|
bytes_stored += used_bytes;
|
||||||
|
return aligned_ptr;
|
||||||
|
}
|
||||||
|
|
||||||
template<typename T>
|
template<typename T>
|
||||||
inline void call_drop(blt::size_t offset, blt::size_t index, detail::bitmask_t* mask)
|
inline void call_drop(blt::size_t offset, blt::size_t index, detail::bitmask_t* mask)
|
||||||
|
@ -573,145 +301,14 @@ namespace blt::gp
|
||||||
if (!mask_r[index])
|
if (!mask_r[index])
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
from<NO_REF_T<T>>(offset).drop();
|
from<NO_REF_T<T >>(offset).drop();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
template<typename T>
|
blt::u8* data_ = nullptr;
|
||||||
void* allocate_bytes()
|
// place in the data_ array which has a free spot.
|
||||||
{
|
blt::size_t bytes_stored = 0;
|
||||||
return allocate_bytes(sizeof(NO_REF_T<T>));
|
blt::size_t size_ = 0;
|
||||||
}
|
|
||||||
|
|
||||||
void* allocate_bytes(blt::size_t size)
|
|
||||||
{
|
|
||||||
auto ptr = get_aligned_pointer(size);
|
|
||||||
if (ptr == nullptr)
|
|
||||||
allocate_block_to_head_for_size(aligned_size(size));
|
|
||||||
ptr = get_aligned_pointer(size);
|
|
||||||
if (ptr == nullptr)
|
|
||||||
throw std::bad_alloc();
|
|
||||||
return ptr;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Moves forward through the list of "deallocated" blocks, if none meet size requirements it'll allocate a new block.
|
|
||||||
* This function will take into account the size of the block metadata, but requires the size input to be aligned.
|
|
||||||
* It will perform no modification to the size value.
|
|
||||||
*
|
|
||||||
* The block which allows for size is now at head.
|
|
||||||
*/
|
|
||||||
void allocate_block_to_head_for_size(const blt::size_t size) noexcept
|
|
||||||
{
|
|
||||||
while (head != nullptr && head->metadata.next != nullptr)
|
|
||||||
{
|
|
||||||
head = head->metadata.next;
|
|
||||||
if (head != nullptr)
|
|
||||||
head->reset();
|
|
||||||
if (head->remaining_bytes_in_block() >= static_cast<blt::ptrdiff_t>(size))
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
if (head == nullptr || head->remaining_bytes_in_block() < static_cast<blt::ptrdiff_t>(size))
|
|
||||||
push_block(size + sizeof(typename block::block_metadata_t));
|
|
||||||
}
|
|
||||||
|
|
||||||
void* get_aligned_pointer(blt::size_t bytes) noexcept
|
|
||||||
{
|
|
||||||
if (head == nullptr)
|
|
||||||
return nullptr;
|
|
||||||
blt::size_t remaining_bytes = head->remaining_bytes_in_block();
|
|
||||||
auto* pointer = static_cast<void*>(head->metadata.offset);
|
|
||||||
return std::align(MAX_ALIGNMENT, bytes, pointer, remaining_bytes);
|
|
||||||
}
|
|
||||||
|
|
||||||
void push_block(blt::size_t size) noexcept
|
|
||||||
{
|
|
||||||
auto blk = allocate_block(size);
|
|
||||||
if (head == nullptr)
|
|
||||||
{
|
|
||||||
head = blk;
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
head->metadata.next = blk;
|
|
||||||
blk->metadata.prev = head;
|
|
||||||
head = blk;
|
|
||||||
}
|
|
||||||
|
|
||||||
static size_t to_nearest_page_size(blt::size_t bytes) noexcept
|
|
||||||
{
|
|
||||||
constexpr static blt::size_t MASK = ~(PAGE_SIZE - 1);
|
|
||||||
return (bytes & MASK) + PAGE_SIZE;
|
|
||||||
}
|
|
||||||
|
|
||||||
static block* allocate_block(blt::size_t bytes) noexcept
|
|
||||||
{
|
|
||||||
auto size = to_nearest_page_size(bytes);
|
|
||||||
auto* data = std::aligned_alloc(PAGE_SIZE, size);
|
|
||||||
//auto* data = get_allocator().allocate(size);
|
|
||||||
new(data) block{size};
|
|
||||||
return reinterpret_cast<block*>(data);
|
|
||||||
}
|
|
||||||
|
|
||||||
static void free_chain(block* current) noexcept
|
|
||||||
{
|
|
||||||
while (current != nullptr)
|
|
||||||
{
|
|
||||||
block* ptr = current;
|
|
||||||
current = current->metadata.prev;
|
|
||||||
free_block(ptr);
|
|
||||||
//get_allocator().deallocate(ptr);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
static void free_block(block* ptr) noexcept
|
|
||||||
{
|
|
||||||
std::free(ptr);
|
|
||||||
}
|
|
||||||
|
|
||||||
inline bool move_back() noexcept
|
|
||||||
{
|
|
||||||
auto old = head;
|
|
||||||
head = head->metadata.prev;
|
|
||||||
if (head == nullptr)
|
|
||||||
{
|
|
||||||
head = old;
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
|
|
||||||
[[nodiscard]] inline static copy_start_point get_start_from_bytes(const stack_allocator& stack, blt::size_t bytes)
|
|
||||||
{
|
|
||||||
auto start_block = stack.head;
|
|
||||||
auto bytes_left = static_cast<blt::ptrdiff_t>(bytes);
|
|
||||||
blt::u8* start_point = nullptr;
|
|
||||||
while (bytes_left > 0)
|
|
||||||
{
|
|
||||||
if (start_block == nullptr)
|
|
||||||
{
|
|
||||||
BLT_WARN("This stack doesn't contain enough space to copy %ld bytes!", bytes);
|
|
||||||
BLT_WARN_STREAM << "State: " << stack.size() << "\n";
|
|
||||||
BLT_ABORT("Stack doesn't contain enough data for this copy operation!");
|
|
||||||
}
|
|
||||||
if (start_block->used_bytes_in_block() < bytes_left)
|
|
||||||
{
|
|
||||||
bytes_left -= start_block->used_bytes_in_block();
|
|
||||||
start_block = start_block->metadata.prev;
|
|
||||||
} else if (start_block->used_bytes_in_block() == bytes_left)
|
|
||||||
{
|
|
||||||
start_point = start_block->buffer;
|
|
||||||
break;
|
|
||||||
} else
|
|
||||||
{
|
|
||||||
start_point = start_block->metadata.offset - bytes_left;
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return copy_start_point{start_block, bytes_left, start_point};
|
|
||||||
}
|
|
||||||
|
|
||||||
private:
|
|
||||||
block* head = nullptr;
|
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
2
lib/blt
2
lib/blt
|
@ -1 +1 @@
|
||||||
Subproject commit 941aa6809c92f05c64ca6624d5898958cfac496d
|
Subproject commit 97990401e2332276b5397060a3ccaf19f07fb999
|
|
@ -0,0 +1,41 @@
|
||||||
|
Performance counter stats for './cmake-build-release/blt-symbolic-regression-example' (30 runs):
|
||||||
|
|
||||||
|
81,986,993,284 branches ( +- 15.89% ) (19.93%)
|
||||||
|
194,632,894 branch-misses # 0.24% of all branches ( +- 21.10% ) (19.84%)
|
||||||
|
32,561,539 cache-misses # 0.89% of all cache refs ( +- 10.21% ) (19.95%)
|
||||||
|
3,645,509,810 cache-references ( +- 15.93% ) (20.11%)
|
||||||
|
169,957,442,648 cycles ( +- 15.85% ) (20.26%)
|
||||||
|
426,558,894,577 instructions # 2.51 insn per cycle ( +- 16.24% ) (20.29%)
|
||||||
|
0 alignment-faults
|
||||||
|
9,103 cgroup-switches ( +- 13.62% )
|
||||||
|
52,586 faults ( +- 5.74% )
|
||||||
|
1,823,320,688 ns duration_time ( +- 12.76% )
|
||||||
|
41,213,439,537 ns user_time ( +- 3.68% )
|
||||||
|
219,435,124 ns system_time ( +- 2.44% )
|
||||||
|
132,928,139,347 L1-dcache-loads ( +- 15.55% ) (20.40%)
|
||||||
|
2,559,138,346 L1-dcache-load-misses # 1.93% of all L1-dcache accesses ( +- 15.53% ) (20.37%)
|
||||||
|
852,474,938 L1-dcache-prefetches ( +- 19.61% ) (20.44%)
|
||||||
|
1,035,909,753 L1-icache-loads ( +- 11.73% ) (20.45%)
|
||||||
|
1,451,589 L1-icache-load-misses # 0.14% of all L1-icache accesses ( +- 13.61% ) (20.50%)
|
||||||
|
37,722,800 dTLB-loads ( +- 14.93% ) (20.52%)
|
||||||
|
4,119,243 dTLB-load-misses # 10.92% of all dTLB cache accesses ( +- 10.99% ) (20.55%)
|
||||||
|
1,318,136 iTLB-loads ( +- 20.32% ) (20.51%)
|
||||||
|
367,939 iTLB-load-misses # 27.91% of all iTLB cache accesses ( +- 12.34% ) (20.42%)
|
||||||
|
2,730,214,946 l2_request_g1.all_no_prefetch ( +- 15.32% ) (20.43%)
|
||||||
|
52,586 page-faults ( +- 5.74% )
|
||||||
|
52,583 page-faults:u ( +- 5.75% )
|
||||||
|
3 page-faults:k ( +- 3.96% )
|
||||||
|
132,786,226,560 L1-dcache-loads ( +- 15.54% ) (20.33%)
|
||||||
|
2,581,181,694 L1-dcache-load-misses # 1.94% of all L1-dcache accesses ( +- 15.34% ) (20.26%)
|
||||||
|
<not supported> LLC-loads
|
||||||
|
<not supported> LLC-load-misses
|
||||||
|
1,021,814,075 L1-icache-loads ( +- 11.67% ) (20.19%)
|
||||||
|
1,376,958 L1-icache-load-misses # 0.13% of all L1-icache accesses ( +- 13.76% ) (20.09%)
|
||||||
|
38,065,494 dTLB-loads ( +- 14.76% ) (20.09%)
|
||||||
|
4,174,010 dTLB-load-misses # 11.06% of all dTLB cache accesses ( +- 10.90% ) (20.14%)
|
||||||
|
1,407,386 iTLB-loads ( +- 20.45% ) (20.09%)
|
||||||
|
338,781 iTLB-load-misses # 25.70% of all iTLB cache accesses ( +- 12.61% ) (20.05%)
|
||||||
|
873,873,406 L1-dcache-prefetches ( +- 19.41% ) (20.00%)
|
||||||
|
<not supported> L1-dcache-prefetch-misses
|
||||||
|
|
||||||
|
1.823 +- 0.233 seconds time elapsed ( +- 12.76% )
|
|
@ -243,7 +243,7 @@ namespace blt::gp
|
||||||
vals_r.pop_bytes(static_cast<blt::ptrdiff_t>(total_bytes_after + accumulate_type_sizes(begin_itr, end_itr)));
|
vals_r.pop_bytes(static_cast<blt::ptrdiff_t>(total_bytes_after + accumulate_type_sizes(begin_itr, end_itr)));
|
||||||
|
|
||||||
// insert the new tree then move back the data from after the original mutation point.
|
// insert the new tree then move back the data from after the original mutation point.
|
||||||
vals_r.insert(std::move(new_vals_r));
|
vals_r.insert(new_vals_r);
|
||||||
vals_r.copy_from(stack_after_data, total_bytes_after);
|
vals_r.copy_from(stack_after_data, total_bytes_after);
|
||||||
|
|
||||||
auto before = begin_itr - 1;
|
auto before = begin_itr - 1;
|
||||||
|
@ -252,7 +252,7 @@ namespace blt::gp
|
||||||
|
|
||||||
// this will check to make sure that the tree is in a correct and executable state. it requires that the evaluation is context free!
|
// this will check to make sure that the tree is in a correct and executable state. it requires that the evaluation is context free!
|
||||||
#if BLT_DEBUG_LEVEL >= 2
|
#if BLT_DEBUG_LEVEL >= 2
|
||||||
BLT_ASSERT(new_vals_r.empty());
|
// BLT_ASSERT(new_vals_r.empty());
|
||||||
//BLT_ASSERT(stack_after.empty());
|
//BLT_ASSERT(stack_after.empty());
|
||||||
blt::size_t bytes_expected = 0;
|
blt::size_t bytes_expected = 0;
|
||||||
auto bytes_size = vals_r.size().total_used_bytes;
|
auto bytes_size = vals_r.size().total_used_bytes;
|
||||||
|
@ -690,7 +690,7 @@ namespace blt::gp
|
||||||
vals.copy_from(from_ptr, from_bytes);
|
vals.copy_from(from_ptr, from_bytes);
|
||||||
vals.copy_from(after_ptr, after_to_bytes);
|
vals.copy_from(after_ptr, after_to_bytes);
|
||||||
|
|
||||||
static std::vector<op_container_t> op_copy;
|
static thread_local std::vector<op_container_t> op_copy;
|
||||||
op_copy.clear();
|
op_copy.clear();
|
||||||
op_copy.insert(op_copy.begin(), ops.begin() + from_child.start, ops.begin() + from_child.end);
|
op_copy.insert(op_copy.begin(), ops.begin() + from_child.start, ops.begin() + from_child.end);
|
||||||
|
|
||||||
|
|
|
@ -0,0 +1,41 @@
|
||||||
|
Performance counter stats for './cmake-build-release/blt-symbolic-regression-example' (30 runs):
|
||||||
|
|
||||||
|
35,671,860,546 branches ( +- 5.05% ) (20.11%)
|
||||||
|
130,603,525 branch-misses # 0.37% of all branches ( +- 4.61% ) (20.67%)
|
||||||
|
43,684,408 cache-misses # 9.61% of all cache refs ( +- 3.08% ) (20.97%)
|
||||||
|
454,604,804 cache-references ( +- 4.53% ) (21.30%)
|
||||||
|
72,861,649,501 cycles ( +- 5.33% ) (22.00%)
|
||||||
|
170,811,735,018 instructions # 2.34 insn per cycle ( +- 5.59% ) (22.84%)
|
||||||
|
0 alignment-faults
|
||||||
|
33,002 cgroup-switches ( +- 1.71% )
|
||||||
|
293,932 faults ( +- 4.09% )
|
||||||
|
1,130,322,318 ns duration_time ( +- 3.73% )
|
||||||
|
16,750,942,537 ns user_time ( +- 1.71% )
|
||||||
|
1,165,192,903 ns system_time ( +- 0.87% )
|
||||||
|
57,551,179,178 L1-dcache-loads ( +- 5.63% ) (22.36%)
|
||||||
|
214,283,064 L1-dcache-load-misses # 0.37% of all L1-dcache accesses ( +- 5.58% ) (22.13%)
|
||||||
|
75,685,527 L1-dcache-prefetches ( +- 7.55% ) (22.07%)
|
||||||
|
1,115,360,458 L1-icache-loads ( +- 3.91% ) (21.67%)
|
||||||
|
2,868,754 L1-icache-load-misses # 0.26% of all L1-icache accesses ( +- 3.34% ) (21.34%)
|
||||||
|
65,107,178 dTLB-loads ( +- 8.94% ) (21.00%)
|
||||||
|
4,971,480 dTLB-load-misses # 7.64% of all dTLB cache accesses ( +- 3.70% ) (20.90%)
|
||||||
|
452,351 iTLB-loads ( +- 4.80% ) (20.62%)
|
||||||
|
1,600,933 iTLB-load-misses # 353.91% of all iTLB cache accesses ( +- 3.68% ) (20.62%)
|
||||||
|
332,075,460 l2_request_g1.all_no_prefetch ( +- 4.59% ) (20.73%)
|
||||||
|
293,932 page-faults ( +- 4.09% )
|
||||||
|
293,928 page-faults:u ( +- 4.09% )
|
||||||
|
3 page-faults:k ( +- 4.92% )
|
||||||
|
58,806,652,381 L1-dcache-loads ( +- 5.44% ) (20.61%)
|
||||||
|
216,591,223 L1-dcache-load-misses # 0.38% of all L1-dcache accesses ( +- 5.39% ) (21.02%)
|
||||||
|
<not supported> LLC-loads
|
||||||
|
<not supported> LLC-load-misses
|
||||||
|
1,059,748,012 L1-icache-loads ( +- 4.29% ) (21.55%)
|
||||||
|
2,615,017 L1-icache-load-misses # 0.23% of all L1-icache accesses ( +- 3.34% ) (21.85%)
|
||||||
|
65,917,126 dTLB-loads ( +- 8.89% ) (21.78%)
|
||||||
|
4,717,351 dTLB-load-misses # 7.25% of all dTLB cache accesses ( +- 3.52% ) (22.05%)
|
||||||
|
459,796 iTLB-loads ( +- 5.92% ) (21.77%)
|
||||||
|
1,512,986 iTLB-load-misses # 334.47% of all iTLB cache accesses ( +- 3.64% ) (21.26%)
|
||||||
|
74,656,433 L1-dcache-prefetches ( +- 7.94% ) (20.50%)
|
||||||
|
<not supported> L1-dcache-prefetch-misses
|
||||||
|
|
||||||
|
1.1303 +- 0.0422 seconds time elapsed ( +- 3.73% )
|
Loading…
Reference in New Issue