diff --git a/.gitignore b/.gitignore
index c1d1eb6..c0f2ba8 100644
--- a/.gitignore
+++ b/.gitignore
@@ -7,7 +7,5 @@ out/
massif.*
callgrind.*
*.out.*
-<<<<<<< HEAD
heaptrack.*
-=======
->>>>>>> refs/remotes/origin/main
+Rice_Cammeo_Osmancik.arff
diff --git a/CMakeLists.txt b/CMakeLists.txt
index f41377d..6ac753d 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -1,5 +1,5 @@
cmake_minimum_required(VERSION 3.25)
-project(blt-gp VERSION 0.1.26)
+project(blt-gp VERSION 0.1.27)
include(CTest)
@@ -106,6 +106,7 @@ endmacro()
if (${BUILD_EXAMPLES})
blt_add_project(blt-symbolic-regression examples/symbolic_regression.cpp example)
+ blt_add_project(blt-rice-classification examples/rice_classification.cpp example)
endif ()
diff --git a/examples/operations_common.h b/examples/operations_common.h
new file mode 100644
index 0000000..5140595
--- /dev/null
+++ b/examples/operations_common.h
@@ -0,0 +1,33 @@
+#pragma once
+/*
+ * Copyright (C) 2024 Brett Terpstra
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see .
+ */
+
+#ifndef BLT_GP_OPERATIONS_COMMON_H
+#define BLT_GP_OPERATIONS_COMMON_H
+
+#include
+
+blt::gp::operation_t add([](float a, float b) { return a + b; }, "add");
+blt::gp::operation_t sub([](float a, float b) { return a - b; }, "sub");
+blt::gp::operation_t mul([](float a, float b) { return a * b; }, "mul");
+blt::gp::operation_t pro_div([](float a, float b) { return b == 0.0f ? 1.0f : a / b; }, "div");
+blt::gp::operation_t op_sin([](float a) { return std::sin(a); }, "sin");
+blt::gp::operation_t op_cos([](float a) { return std::cos(a); }, "cos");
+blt::gp::operation_t op_exp([](float a) { return std::exp(a); }, "exp");
+blt::gp::operation_t op_log([](float a) { return a == 0.0f ? 0.0f : std::log(a); }, "log");
+
+#endif //BLT_GP_OPERATIONS_COMMON_H
diff --git a/examples/rice_classification.cpp b/examples/rice_classification.cpp
new file mode 100644
index 0000000..1a0a0f0
--- /dev/null
+++ b/examples/rice_classification.cpp
@@ -0,0 +1,245 @@
+/*
+ *
+ * Copyright (C) 2024 Brett Terpstra
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see .
+ */
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+#include "operations_common.h"
+#include "blt/fs/loader.h"
+
+
+//static constexpr long SEED = 41912;
+static const unsigned long SEED = std::random_device()();
+
+enum class rice_type_t
+{
+ Cammeo,
+ Osmancik
+};
+
+struct rice_record
+{
+ float area;
+ float perimeter;
+ float major_axis_length;
+ float minor_axis_length;
+ float eccentricity;
+ float convex_area;
+ float extent;
+ rice_type_t type;
+};
+
+std::vector fitness_cases;
+std::vector testing_cases;
+
+blt::gp::prog_config_t config = blt::gp::prog_config_t()
+ .set_initial_min_tree_size(2)
+ .set_initial_max_tree_size(6)
+ .set_elite_count(2)
+ .set_crossover_chance(0.9)
+ .set_mutation_chance(0.1)
+ .set_reproduction_chance(0)
+ .set_max_generations(50)
+ .set_pop_size(500)
+ .set_thread_count(0);
+
+blt::gp::type_provider type_system;
+blt::gp::gp_program program{type_system, SEED, config};
+
+auto lit = blt::gp::operation_t([]() {
+ return program.get_random().get_float(-32000.0f, 32000.0f);
+}, "lit").set_ephemeral();
+
+blt::gp::operation_t op_area([](const rice_record& rice_data) {
+ return rice_data.area;
+}, "area");
+
+blt::gp::operation_t op_perimeter([](const rice_record& rice_data) {
+ return rice_data.perimeter;
+}, "perimeter");
+
+blt::gp::operation_t op_major_axis_length([](const rice_record& rice_data) {
+ return rice_data.major_axis_length;
+}, "major_axis_length");
+
+blt::gp::operation_t op_minor_axis_length([](const rice_record& rice_data) {
+ return rice_data.minor_axis_length;
+}, "minor_axis_length");
+
+blt::gp::operation_t op_eccentricity([](const rice_record& rice_data) {
+ return rice_data.eccentricity;
+}, "eccentricity");
+
+blt::gp::operation_t op_convex_area([](const rice_record& rice_data) {
+ return rice_data.convex_area;
+}, "convex_area");
+
+blt::gp::operation_t op_extent([](const rice_record& rice_data) {
+ return rice_data.extent;
+}, "extent");
+
+constexpr auto fitness_function = [](blt::gp::tree_t& current_tree, blt::gp::fitness_t& fitness, blt::size_t) {
+ constexpr double value_cutoff = 1.e15;
+ for (auto& fitness_case : fitness_cases)
+ {
+ auto diff = std::abs(fitness_case.y - current_tree.get_evaluation_value(&fitness_case));
+ if (diff < value_cutoff)
+ {
+ fitness.raw_fitness += diff;
+ if (diff < 0.01)
+ fitness.hits++;
+ } else
+ fitness.raw_fitness += value_cutoff;
+ }
+ fitness.standardized_fitness = fitness.raw_fitness;
+ fitness.adjusted_fitness = (1.0 / (1.0 + fitness.standardized_fitness));
+ return static_cast(fitness.hits) == fitness_cases.size();
+};
+
+void load_rice_data(std::string_view rice_file_path)
+{
+ auto rice_file_data = blt::fs::getLinesFromFile(rice_file_path);
+ size_t index = 0;
+ while (!blt::string::contains(rice_file_data[index++], "@DATA"))
+ {}
+ std::vector c;
+ std::vector o;
+ for (std::string_view v : blt::itr_offset(rice_file_data, index))
+ {
+ auto data = blt::string::split(v, ',');
+ rice_record r{std::stof(data[0]), std::stof(data[1]), std::stof(data[2]), std::stof(data[3]), std::stof(data[4]), std::stof(data[5]),
+ std::stof(data[6])};
+ if (blt::string::contains(data[7], "Cammeo"))
+ {
+ r.type = rice_type_t::Cammeo;
+ c.push_back(r);
+ } else
+ {
+ r.type = rice_type_t::Osmancik;
+ o.push_back(r);
+ }
+ }
+ blt::size_t total_records = c.size() + o.size();
+ blt::size_t training_size = total_records / 3;
+ for (blt::size_t i = 0; i < training_size; i++)
+ {
+ auto& random = program.get_random();
+ auto& vec = random.choice() ? c : o;
+ auto pos = random.get_i64(0, static_cast(vec.size()));
+ fitness_cases.push_back(vec[pos]);
+ vec.erase(vec.begin() + pos);
+ }
+ testing_cases.insert(testing_cases.end(), c.begin(), c.end());
+ testing_cases.insert(testing_cases.end(), o.begin(), o.end());
+ std::shuffle(testing_cases.begin(), testing_cases.end(), program.get_random());
+}
+
+int main(int argc, const char** argv)
+{
+ blt::arg_parse parser;
+ parser.addArgument(blt::arg_builder{"-f", "--file"}.setHelp("File for rice data. Should be in .arff format.").setRequired().build());
+
+ auto args = parser.parse_args(argc, argv);
+
+ auto rice_file_path = args.get("-f");
+
+ BLT_INFO("Starting BLT-GP Rice Classification Example");
+ BLT_START_INTERVAL("Rice Classification", "Main");
+ BLT_DEBUG("Setup Fitness cases");
+ load_rice_data(rice_file_path);
+
+ BLT_DEBUG("Setup Types and Operators");
+ type_system.register_type();
+
+ blt::gp::operator_builder builder{type_system};
+ program.set_operations(builder.build(add, sub, mul, pro_div, op_sin, op_cos, op_exp, op_log, lit, op_x));
+
+ BLT_DEBUG("Generate Initial Population");
+ auto sel = blt::gp::select_tournament_t{};
+ program.generate_population(type_system.get_type().id(), fitness_function, sel, sel, sel);
+
+ BLT_DEBUG("Begin Generation Loop");
+ while (!program.should_terminate())
+ {
+ BLT_TRACE("------------{Begin Generation %ld}------------", program.get_current_generation());
+ BLT_TRACE("Creating next generation");
+
+#ifdef BLT_TRACK_ALLOCATIONS
+ auto gen_alloc = blt::gp::tracker.start_measurement();
+#endif
+
+ BLT_START_INTERVAL("Rice Classification", "Gen");
+ program.create_next_generation();
+ BLT_END_INTERVAL("Rice Classification", "Gen");
+
+#ifdef BLT_TRACK_ALLOCATIONS
+ blt::gp::tracker.stop_measurement(gen_alloc);
+ BLT_TRACE("Generation Allocated %ld times with a total of %s", gen_alloc.getAllocationDifference(),
+ blt::byte_convert_t(gen_alloc.getAllocatedByteDifference()).convert_to_nearest_type().to_pretty_string().c_str());
+ auto fitness_alloc = blt::gp::tracker.start_measurement();
+#endif
+
+ BLT_TRACE("Move to next generation");
+ BLT_START_INTERVAL("Rice Classification", "Fitness");
+ program.next_generation();
+ BLT_TRACE("Evaluate Fitness");
+ program.evaluate_fitness();
+ BLT_END_INTERVAL("Rice Classification", "Fitness");
+
+#ifdef BLT_TRACK_ALLOCATIONS
+ blt::gp::tracker.stop_measurement(fitness_alloc);
+ BLT_TRACE("Fitness Allocated %ld times with a total of %s", fitness_alloc.getAllocationDifference(),
+ blt::byte_convert_t(fitness_alloc.getAllocatedByteDifference()).convert_to_nearest_type().to_pretty_string().c_str());
+#endif
+
+ BLT_TRACE("----------------------------------------------");
+ std::cout << std::endl;
+ }
+
+ BLT_END_INTERVAL("Rice Classification", "Main");
+
+ auto best = program.get_best_individuals<3>();
+
+ BLT_INFO("Best approximations:");
+ for (auto& i_ref : best)
+ {
+ auto& i = i_ref.get();
+ BLT_DEBUG("Fitness: %lf, stand: %lf, raw: %lf", i.fitness.adjusted_fitness, i.fitness.standardized_fitness, i.fitness.raw_fitness);
+ i.tree.print(program, std::cout);
+ std::cout << "\n";
+ }
+ auto& stats = program.get_population_stats();
+ BLT_INFO("Stats:");
+ BLT_INFO("Average fitness: %lf", stats.average_fitness.load());
+ BLT_INFO("Best fitness: %lf", stats.best_fitness.load());
+ BLT_INFO("Worst fitness: %lf", stats.worst_fitness.load());
+ BLT_INFO("Overall fitness: %lf", stats.overall_fitness.load());
+ // TODO: make stats helper
+
+ BLT_PRINT_PROFILE("Rice Classification", blt::PRINT_CYCLES | blt::PRINT_THREAD | blt::PRINT_WALL);
+
+#ifdef BLT_TRACK_ALLOCATIONS
+ BLT_TRACE("Total Allocations: %ld times with a total of %s", blt::gp::tracker.getAllocations(),
+ blt::byte_convert_t(blt::gp::tracker.getAllocatedBytes()).convert_to_nearest_type().to_pretty_string().c_str());
+#endif
+
+ return 0;
+}
\ No newline at end of file
diff --git a/examples/symbolic_regression.cpp b/examples/symbolic_regression.cpp
index 7bb1160..3e2d24c 100644
--- a/examples/symbolic_regression.cpp
+++ b/examples/symbolic_regression.cpp
@@ -21,6 +21,7 @@
#include
#include
#include
+#include "operations_common.h"
//static constexpr long SEED = 41912;
static const unsigned long SEED = std::random_device()();
@@ -40,21 +41,12 @@ blt::gp::prog_config_t config = blt::gp::prog_config_t()
.set_mutation_chance(0.1)
.set_reproduction_chance(0)
.set_max_generations(50)
- .set_pop_size(5000)
+ .set_pop_size(500)
.set_thread_count(0);
blt::gp::type_provider type_system;
blt::gp::gp_program program{type_system, SEED, config};
-blt::gp::operation_t add([](float a, float b) { return a + b; }, "add");
-blt::gp::operation_t sub([](float a, float b) { return a - b; }, "sub");
-blt::gp::operation_t mul([](float a, float b) { return a * b; }, "mul");
-blt::gp::operation_t pro_div([](float a, float b) { return b == 0.0f ? 1.0f : a / b; }, "div");
-blt::gp::operation_t op_sin([](float a) { return std::sin(a); }, "sin");
-blt::gp::operation_t op_cos([](float a) { return std::cos(a); }, "cos");
-blt::gp::operation_t op_exp([](float a) { return std::exp(a); }, "exp");
-blt::gp::operation_t op_log([](float a) { return a == 0.0f ? 0.0f : std::log(a); }, "log");
-
auto lit = blt::gp::operation_t([]() {
return program.get_random().get_float(-320.0f, 320.0f);
}, "lit").set_ephemeral();
@@ -107,7 +99,7 @@ int main()
program.set_operations(builder.build(add, sub, mul, pro_div, op_sin, op_cos, op_exp, op_log, lit, op_x));
BLT_DEBUG("Generate Initial Population");
- auto sel = blt::gp::select_fitness_proportionate_t{};
+ auto sel = blt::gp::select_tournament_t{};
program.generate_population(type_system.get_type().id(), fitness_function, sel, sel, sel);
BLT_DEBUG("Begin Generation Loop");
@@ -174,14 +166,6 @@ int main()
BLT_TRACE("Total Allocations: %ld times with a total of %s", blt::gp::tracker.getAllocations(),
blt::byte_convert_t(blt::gp::tracker.getAllocatedBytes()).convert_to_nearest_type().to_pretty_string().c_str());
#endif
-
-// BLT_TRACE("Allocations:");
-// auto h = static_cast(blt::gp::hello.load());
-// auto u = static_cast(blt::gp::unhello.load());
-// BLT_TRACE("Allocated: %ld", h);
-// BLT_TRACE("Deallocated: %ld", u);
-// BLT_TRACE("Ratio: %lf Difference: %ld", static_cast(h) / static_cast(u), std::abs(h - u));
-// BLT_TRACE("Total Allocated Bytes: %ld", blt::gp::hello_bytes.load());
return 0;
}
\ No newline at end of file
diff --git a/include/blt/gp/random.h b/include/blt/gp/random.h
index eecc3bd..4875a57 100644
--- a/include/blt/gp/random.h
+++ b/include/blt/gp/random.h
@@ -25,93 +25,7 @@
namespace blt::gp
{
-#define BLT_RANDOM_FUNCTION blt::random::murmur_random64
-#define BLT_RANDOM_FLOAT blt::random::murmur_float64
-#define BLT_RANDOM_DOUBLE blt::random::murmur_double64
-
- class random_t
- {
- public:
- explicit random_t(blt::u64 seed): seed(seed)
- {}
-
- void set_seed(blt::u64 s)
- {
- seed = s;
- }
-
- float get_float()
- {
- return BLT_RANDOM_FLOAT(seed);
- }
-
- double get_double()
- {
- return BLT_RANDOM_DOUBLE(seed);
- }
-
- // [min, max)
- double get_double(double min, double max)
- {
- return BLT_RANDOM_FUNCTION(seed, min, max);
- }
-
- // [min, max)
- float get_float(float min, float max)
- {
- return BLT_RANDOM_FUNCTION(seed, min, max);
- }
-
- i32 get_i32(i32 min, i32 max)
- {
- return BLT_RANDOM_FUNCTION(seed, min, max);
- }
-
- u32 get_u32(u32 min, u32 max)
- {
- return BLT_RANDOM_FUNCTION(seed, min, max);
- }
-
- i64 get_i64(i64 min, i64 max)
- {
- return BLT_RANDOM_FUNCTION(seed, min, max);
- }
-
- u64 get_u64(u64 min, u64 max)
- {
- return BLT_RANDOM_FUNCTION(seed, min, max);
- }
-
- blt::size_t get_size_t(blt::size_t min, blt::size_t max)
- {
- return BLT_RANDOM_FUNCTION(seed, min, max);
- }
-
- bool choice()
- {
- return BLT_RANDOM_DOUBLE(seed) < 0.5;
- }
-
- bool choice(double cutoff)
- {
- return BLT_RANDOM_DOUBLE(seed) <= cutoff;
- }
-
- template
- auto& select(Container& container)
- {
- return container[get_u64(0, container.size())];
- }
-
- template
- const auto& select(const Container& container)
- {
- return container[get_u64(0, container.size())];
- }
-
- private:
- blt::u64 seed;
- };
+ using random_t = blt::random::random_t;
}
diff --git a/lib/blt b/lib/blt
index 6632d04..b6354be 160000
--- a/lib/blt
+++ b/lib/blt
@@ -1 +1 @@
-Subproject commit 6632d045286b42d257eb3783e96256c13b588186
+Subproject commit b6354bed7846078e863767ce5afc7daa53b93988