#pragma once /* * Copyright (C) 2024 Brett Terpstra * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program. If not, see . */ #include "blt/std/hashmap.h" #include "blt/std/memory.h" #ifndef FINALPROJECT_RUNNER_AGGREGATION_H #define FINALPROJECT_RUNNER_AGGREGATION_H #include #include #include #include #include #include #include #include template static inline void fill_value(T& v, const std::string& str) { try { if constexpr (std::is_floating_point_v) { v = std::stod(str); } else if constexpr (std::is_integral_v) { v = std::stoi(str); } else { static_assert("Unsupported type!"); } } catch (const std::exception& e) { BLT_ERROR("Failed to convert value from string '%s' to type '%s' what(): %s", str.c_str(), blt::type_string().c_str(), e.what()); } } /** * Structure used to store information loaded from the .stt file */ struct stt_record { public: stt_record() = default; int gen, sub; double mean_fitness; double best_fitness, worse_fitness, mean_tree_size, mean_tree_depth; int best_tree_size, best_tree_depth, worse_tree_size, worse_tree_depth; double mean_fitness_run; double best_fitness_run, worse_fitness_run, mean_tree_size_run, mean_tree_depth_run; int best_tree_size_run, best_tree_depth_run, worse_tree_size_run, worse_tree_depth_run; stt_record& operator+=(const stt_record& r); stt_record& operator/=(int i); static stt_record from_string_array(int generation, size_t& idx, blt::span values); }; /** * Structure used to store information loaded from the .fn file */ struct fn_record { // real value = 'cammeo' predicted value = 'cammeo' blt::size_t cc = 0; // real value = 'cammeo' predicted value = 'osmancik' blt::size_t co = 0; // real value = 'osmancik' predicted value = 'osmancik' blt::size_t oo = 0; // real value = 'osmancik' predicted value = 'cammeo' blt::size_t oc = 0; double fitness = 0; // hits from the training data, kinda useless blt::size_t hits = 0; }; struct runs_stt_data { // per generation (map stores from gen -> list of rows (records)) blt::hashmap_t> averages; // per RUN generation size std::vector runs_generation_size; // count of the number of generations that use the same number of generations (used for calculating mode) exists [0, largest_generation] blt::scoped_buffer generations_size_count; // largest number of generations from all runs int largest_generation = 0; // total number of generations across all runs (r1...rn) int total_generations = 0; // total / runs int generations_average = 0; // # of run lengths value is `data.generations_size_count[data.mode_generation]` int generations_mode = 0; // the generation that is the mode int mode_generation = 0; }; struct runs_fn_data { std::vector runs; // index of the best recorded run blt::size_t best = 0; // total number of hits from all runs blt::size_t total_hits = 0; // total number of rice testing data blt::size_t total_tests = 0; // hits / tests for all runs blt::size_t average_hits = 0; blt::size_t average_tests = 0; // percent of valid tests double average_valid = 0; double total_fitness = 0; double average_fitness = 0; }; void process_stt_file(runs_stt_data& data, int& max_gen, std::string_view file); runs_stt_data get_per_generation_averages(const std::string& outfile, int runs); // in case you are wondering why all these functions are using template parameters, it is so that I can pass BLT_?*_STREAM into them // allowing for output to stdout template inline void write_record(T& writer, const stt_record& r) { writer << r.gen << '\t'; writer << r.sub << '\t'; writer << r.mean_fitness << '\t'; writer << r.best_fitness << '\t'; writer << r.worse_fitness << '\t'; writer << r.mean_tree_size << '\t'; writer << r.mean_tree_depth << '\t'; writer << r.best_tree_size << '\t'; writer << r.best_tree_depth << '\t'; writer << r.worse_tree_size << '\t'; writer << r.worse_tree_depth << '\t'; writer << r.mean_fitness_run << '\t'; writer << r.best_fitness_run << '\t'; writer << r.worse_fitness_run << '\t'; writer << r.mean_tree_size_run << '\t'; writer << r.mean_tree_depth_run << '\t'; writer << r.best_tree_size_run << '\t'; writer << r.best_tree_depth_run << '\t'; writer << r.worse_tree_size_run << '\t'; writer << r.worse_tree_depth_run << '\n'; } template inline void write_data_values(T& writer, const std::string& function, const bool end, FUNC func, Args... args) { const char BASE = 'a'; for (int i = 0; i < 20; i++) { char c = static_cast(BASE + i); writer << function; func(writer, c, args...); if (i != 19 || !end) writer << '\t'; // spacing tab if (i == 19 && !end) writer << '\t'; } } // writes functions operating on runs per generation template inline void write_func_gens(T& writer, const char c, const int current_gen, runs_stt_data& data, const blt::size_t gen_offset, const blt::size_t offset) { auto runs = data.averages[current_gen].size(); for (size_t j = 0; j < runs; j++) { writer << c << ((gen_offset) + (offset + j)); if (j != runs - 1) writer << ", "; else writer << ')'; } } // operates on the aggregated data created by the above function giving totals for the entire population template inline void write_func_pop(T& writer, const char c, const int gen_size, const blt::size_t offset) { for (int j = 0; j < gen_size; j++) { // get the position of our aggregated data, which the offset contains writer << c << (offset - gen_size - 1 + j); if (j != gen_size - 1) writer << ", "; else writer << ')'; } } void write_averaged_output(const std::string& writefile, const runs_stt_data& data, const std::vector& generation_averages); void write_full_output(const std::string& writefile, const runs_stt_data& data, const std::vector>& ordered_records); void process_stt(const std::string& outfile, const std::string& writefile, const std::string& writefile_run, int runs); void process_fn(const std::string& outfile, const std::string& writefile, int runs); void process_files(const std::string& outfile, const std::string writefile, int runs); #endif //FINALPROJECT_RUNNER_AGGREGATION_H