diff --git a/.idea/copyright/profiles_settings.xml b/.idea/copyright/profiles_settings.xml index bc72265..e2dcaed 100644 --- a/.idea/copyright/profiles_settings.xml +++ b/.idea/copyright/profiles_settings.xml @@ -1,5 +1,5 @@ - + diff --git a/CMakeLists.txt b/CMakeLists.txt index 159f426..c61274b 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1,5 +1,5 @@ cmake_minimum_required(VERSION 3.25) -project(COSC-4P80-Assignment-2 VERSION 0.0.11) +project(COSC-4P80-Assignment-2 VERSION 0.1.0) option(ENABLE_ADDRSAN "Enable the address sanitizer" OFF) option(ENABLE_UBSAN "Enable the ub sanitizer" OFF) diff --git a/include/assign2/common.h b/include/assign2/common.h index 2633471..61aa8f2 100644 --- a/include/assign2/common.h +++ b/include/assign2/common.h @@ -21,7 +21,6 @@ #include #include -#include #ifdef BLT_USE_GRAPHICS @@ -49,17 +48,6 @@ namespace assign2 return std::cout; } - struct data_t - { - bool is_bad = false; - std::vector bins; - }; - - struct data_file_t - { - std::vector data_points; - }; - struct error_data_t { Scalar error; @@ -164,111 +152,6 @@ namespace assign2 std::vector data; }; - inline std::vector get_data_files(std::string_view path) - { - std::vector files; - - for (const auto& file : std::filesystem::recursive_directory_iterator(path)) - { - if (file.is_directory()) - continue; - auto file_path = file.path().string(); - if (blt::string::ends_with(file_path, ".out")) - files.push_back(blt::fs::getFile(file_path)); - } - - return files; - } - - inline std::vector load_data_files(const std::vector& files) - { - std::vector loaded_data; - - // load all file - for (auto file : files) - { - // we only use unix line endings here... - blt::string::replaceAll(file, "\r", ""); - auto lines = blt::string::split(file, "\n"); - auto line_it = lines.begin(); - auto meta = blt::string::split(*line_it, ' '); - - // load data inside files - data_file_t data; - data.data_points.reserve(std::stoll(meta[0])); - auto bin_count = std::stoul(meta[1]); - - for (++line_it; line_it != lines.end(); ++line_it) - { - auto line_data_meta = blt::string::split(*line_it, ' '); - if (line_data_meta.size() != bin_count + 1) - continue; - auto line_data_it = line_data_meta.begin(); - - // load bins - data_t line_data; - line_data.is_bad = std::stoi(*line_data_it) == 1; - line_data.bins.reserve(bin_count); - Scalar total = 0; - Scalar min = 1000; - Scalar max = 0; - for (++line_data_it; line_data_it != line_data_meta.end(); ++line_data_it) - { - auto v = std::stof(*line_data_it); - if (v > max) - max = v; - if (v < min) - min = v; - total += v * v; - line_data.bins.push_back(v); - } - - // normalize vector. - total = std::sqrt(total); -// -// for (auto& v : line_data.bins) -// { -// v /= total; -// v *= 2.71828; -// v -= 2.71828 / 2; -// } -// -// if (line_data.bins.size() == 32) -// print_vec(line_data.bins) << std::endl; - - data.data_points.push_back(line_data); - } - - loaded_data.push_back(data); - } - - return loaded_data; - } - - inline void save_as_csv(const std::string& file, const std::vector>>& data) - { - std::ofstream stream{file}; - stream << "epoch,"; - for (auto [i, d] : blt::enumerate(data)) - { - stream << d.first; - if (i != data.size() - 1) - stream << ','; - } - stream << '\n'; - for (blt::size_t i = 0; i < data.begin()->second.size(); i++) - { - stream << i << ','; - for (auto [j, d] : blt::enumerate(data)) - { - stream << d.second[i]; - if (j != data.size() - 1) - stream << ','; - } - stream << '\n'; - } - } - inline bool is_thinks_bad(const std::vector& out) { return out[0] < out[1]; diff --git a/include/assign2/file.h b/include/assign2/file.h new file mode 100644 index 0000000..1e6d3cb --- /dev/null +++ b/include/assign2/file.h @@ -0,0 +1,54 @@ +#pragma once +/* + * Copyright (C) 2024 Brett Terpstra + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +#ifndef COSC_4P80_ASSIGNMENT_2_FILE_H +#define COSC_4P80_ASSIGNMENT_2_FILE_H + +#include + + +namespace assign2 +{ + + struct data_t + { + bool is_bad = false; + std::vector bins; + + [[nodiscard]] data_t normalize() const; + [[nodiscard]] data_t with_padding(blt::size_t desired_size, Scalar padding_value = 0) const; + }; + + struct data_file_t + { + public: + std::vector data_points; + + static std::vector load_data_files_from_path(std::string_view path); + + private: + static std::vector get_data_file_list(std::string_view path); + + static std::vector load_data_files(const std::vector& files); + }; + + void save_as_csv(const std::string& file, const std::vector>>& data); + +} + +#endif //COSC_4P80_ASSIGNMENT_2_FILE_H diff --git a/include/assign2/global_magic.h b/include/assign2/global_magic.h index 7829136..c1c6c25 100644 --- a/include/assign2/global_magic.h +++ b/include/assign2/global_magic.h @@ -25,6 +25,7 @@ #include #include #include +#include "file.h" namespace assign2 { @@ -51,24 +52,24 @@ namespace assign2 }; - inline std::vector errors_over_time; - inline std::vector error_derivative_over_time; - inline std::vector error_of_test; - inline std::vector error_of_test_derivative; + inline std::vector training_error_epochs; + inline std::vector d_training_error_epochs; - inline std::vector error_derivative_of_test; - inline std::vector correct_over_time; - inline std::vector correct_over_time_test; + inline std::vector testing_error_epochs; + inline std::vector d_testing_error_epochs; + + inline std::vector training_correct_epochs; + inline std::vector testing_correct_epochs; inline std::vector nodes; void save_error_info(const std::string& name) { - save_as_csv("network" + name + ".csv", {{"train_error", errors_over_time}, - {"train_d_error", error_derivative_over_time}, - {"test_error", error_of_test}, - {"test_d_error", error_of_test_derivative}, - {"correct_train", correct_over_time}, - {"correct_test", correct_over_time_test}}); + save_as_csv("network" + name + ".csv", {{"train_error", training_error_epochs}, + {"train_d_error", d_training_error_epochs}, + {"test_error", testing_error_epochs}, + {"test_d_error", d_testing_error_epochs}, + {"correct_train", training_correct_epochs}, + {"correct_test", testing_correct_epochs}}); } } diff --git a/src/file.cpp b/src/file.cpp new file mode 100644 index 0000000..b72de28 --- /dev/null +++ b/src/file.cpp @@ -0,0 +1,133 @@ +// Copyright (c) 2024. Brett Terpstra +// +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with this program. If not, see . + +#include +#include +#include +#include +#include +#include + +namespace assign2 +{ + std::vector data_file_t::get_data_file_list(std::string_view path) + { + std::vector files; + + for (const auto& file : std::filesystem::recursive_directory_iterator(path)) + { + if (file.is_directory()) + continue; + auto file_path = file.path().string(); + if (blt::string::ends_with(file_path, ".out")) + files.push_back(blt::fs::getFile(file_path)); + } + + return files; + } + + std::vector data_file_t::load_data_files(const std::vector& files) + { + std::vector loaded_data; + + // load all file + for (auto file : files) + { + // we only use unix line endings here... + blt::string::replaceAll(file, "\r", ""); + auto lines = blt::string::split(file, "\n"); + auto line_it = lines.begin(); + auto meta = blt::string::split(*line_it, ' '); + + // load data inside files + data_file_t data; + data.data_points.reserve(std::stoll(meta[0])); + auto bin_count = std::stoul(meta[1]); + + for (++line_it; line_it != lines.end(); ++line_it) + { + auto line_data_meta = blt::string::split(*line_it, ' '); + if (line_data_meta.size() != bin_count + 1) + continue; + auto line_data_it = line_data_meta.begin(); + + // load bins + data_t line_data; + line_data.is_bad = std::stoi(*line_data_it) == 1; + line_data.bins.reserve(bin_count); + + for (++line_data_it; line_data_it != line_data_meta.end(); ++line_data_it) + line_data.bins.push_back(std::stof(*line_data_it)); + + data.data_points.push_back(line_data); + } + + loaded_data.push_back(data); + } + + return loaded_data; + } + + std::vector data_file_t::load_data_files_from_path(std::string_view path) + { + return load_data_files(get_data_file_list(path)); + } + + data_t data_t::with_padding(blt::size_t desired_size, Scalar padding_value) const + { + data_t data = *this; + auto amount_to_add = static_cast(data.bins.size()) - static_cast(desired_size); + for (blt::ptrdiff_t i = 0; i < amount_to_add; i++) + data.bins.push_back(padding_value); + return data; + } + + data_t data_t::normalize() const + { + data_t data = *this; + + Scalar total = 0; + for (auto v : data.bins) + total += v * v; + Scalar mag = std::sqrt(total); + for (auto& v : data.bins) + v /= mag; + return data; + } + + void save_as_csv(const std::string& file, const std::vector>>& data) + { + std::ofstream stream{file}; + stream << "epoch,"; + for (auto [i, d] : blt::enumerate(data)) + { + stream << d.first; + if (i != data.size() - 1) + stream << ','; + } + stream << '\n'; + for (blt::size_t i = 0; i < data.begin()->second.size(); i++) + { + stream << i << ','; + for (auto [j, d] : blt::enumerate(data)) + { + stream << d.second[i]; + if (j != data.size() - 1) + stream << ','; + } + stream << '\n'; + } + } +} \ No newline at end of file diff --git a/src/main.cpp b/src/main.cpp index 65eba44..9f364dc 100644 --- a/src/main.cpp +++ b/src/main.cpp @@ -7,6 +7,7 @@ #include #include #include +#include #include #include #include @@ -510,7 +511,7 @@ int main(int argc, const char** argv) std::string data_directory = blt::string::ensure_ends_with_path_separator(args.get("file")); - data_files = load_data_files(get_data_files(data_directory)); + data_files = data_file_t::load_data_files_from_path(data_directory); if (args.contains("kfold")) { @@ -586,12 +587,12 @@ int main(int argc, const char** argv) } // this is to prevent threading issues due to expanding buffers. - errors_over_time.reserve(25000); - error_derivative_over_time.reserve(25000); - correct_over_time.reserve(25000); - correct_over_time_test.reserve(25000); - error_of_test.reserve(25000); - error_of_test_derivative.reserve(25000); + training_error_epochs.reserve(25000); + d_training_error_epochs.reserve(25000); + training_correct_epochs.reserve(25000); + testing_correct_epochs.reserve(25000); + testing_error_epochs.reserve(25000); + d_testing_error_epochs.reserve(25000); #ifdef BLT_USE_GRAPHICS blt::gfx::init(blt::gfx::window_data{"Freeplay Graphics", init, update, 1440, 720}.setSyncInterval(1).setMonitor(glfwGetPrimaryMonitor())