From 2318f0edcf02dc644b764a18c1dfa10b57d0fa17 Mon Sep 17 00:00:00 2001 From: Brett Date: Mon, 4 Nov 2024 16:45:46 -0500 Subject: [PATCH] file --- CMakeLists.txt | 2 +- default.nix | 50 ++++++++++ include/assign3/file.h | 109 ++++++++++++++++++++++ lib/blt-with-graphics | 2 +- src/file.cpp | 206 +++++++++++++++++++++++++++++++++++++++++ 5 files changed, 367 insertions(+), 2 deletions(-) create mode 100644 default.nix create mode 100644 include/assign3/file.h create mode 100644 src/file.cpp diff --git a/CMakeLists.txt b/CMakeLists.txt index ccc2bdd..ee5888e 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1,5 +1,5 @@ cmake_minimum_required(VERSION 3.25) -project(COSC-4P80-Assignment-3 VERSION 0.0.2) +project(COSC-4P80-Assignment-3 VERSION 0.0.3) option(ENABLE_ADDRSAN "Enable the address sanitizer" OFF) option(ENABLE_UBSAN "Enable the ub sanitizer" OFF) diff --git a/default.nix b/default.nix new file mode 100644 index 0000000..1c2c439 --- /dev/null +++ b/default.nix @@ -0,0 +1,50 @@ +{ pkgs ? (import { + config.allowUnfree = true; + config.segger-jlink.acceptLicense = true; +}), ... }: +pkgs.mkShell +{ + buildInputs = with pkgs; [ + cmake + gcc + clang + emscripten + ninja + jetbrains.clion + renderdoc + valgrind + ]; + propagatedBuildInputs = with pkgs; [ + xorg.libX11 + xorg.libX11.dev + xorg.libXcursor + xorg.libXcursor.dev + xorg.libXext + xorg.libXext.dev + xorg.libXinerama + xorg.libXinerama.dev + xorg.libXrandr + xorg.libXrandr.dev + xorg.libXrender + xorg.libXrender.dev + xorg.libxcb + xorg.libxcb.dev + xorg.libXi + xorg.libXi.dev + harfbuzz + harfbuzz.dev + zlib + zlib.dev + bzip2 + bzip2.dev + pngpp + brotli + brotli.dev + pulseaudio.dev + git + libGL + libGL.dev + glfw + ]; + LD_LIBRARY_PATH="/run/opengl-driver/lib:/run/opengl-driver-32/lib"; +} diff --git a/include/assign3/file.h b/include/assign3/file.h new file mode 100644 index 0000000..3dcd6d7 --- /dev/null +++ b/include/assign3/file.h @@ -0,0 +1,109 @@ +#pragma once +/* + * Copyright (C) 2024 Brett Terpstra + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +#ifndef COSC_4P80_ASSIGNMENT_3_FILE_H +#define COSC_4P80_ASSIGNMENT_3_FILE_H + +#include +#include +#include +#include +#include "blt/std/assert.h" + +namespace assign3 +{ + using Scalar = float; + + struct data_t + { + bool is_bad = false; + std::vector bins; + + [[nodiscard]] data_t normalize() const; + + [[nodiscard]] data_t with_padding(blt::size_t desired_size, Scalar padding_value = 0) const; + }; + + struct data_file_t + { + public: + std::vector data_points; + + [[nodiscard]] data_file_t normalize() const; + + [[nodiscard]] data_file_t with_padding(blt::size_t desired_size, Scalar padding_value = 0) const; + + data_file_t& operator+=(const data_file_t& o); + + data_file_t friend operator+(const data_file_t& a, const data_file_t& b); + + static std::vector load_data_files_from_path(std::string_view path); + + private: + static std::vector get_data_file_list(std::string_view path); + + static std::vector load_data_files(const std::vector& files); + }; + + struct partitioned_dataset_t + { + public: + explicit partitioned_dataset_t(std::vector groups): + groups(std::move(groups)), bins(this->groups.begin()->data_points.begin()->bins.size()) + {} + + [[nodiscard]] const std::vector& getGroups() const + { + return groups; + } + + [[nodiscard]] blt::size_t bin_size() const + { + return bins; + } + + private: + std::vector groups; + blt::size_t bins; + }; + + struct dataset_partitioner + { + public: + explicit dataset_partitioner(const data_file_t& file) + { + with(file); + } + + dataset_partitioner& with(const data_file_t& data) + { + BLT_ASSERT(data.data_points.begin()->bins.size() == files.begin()->data_points.begin()->bins.size()); + files.push_back(data); + return *this; + } + + [[nodiscard]] partitioned_dataset_t partition(blt::size_t groups) const; + + private: + std::vector files; + }; + + void save_as_csv(const std::string& file, const std::vector>>& data); +} + +#endif //COSC_4P80_ASSIGNMENT_3_FILE_H diff --git a/lib/blt-with-graphics b/lib/blt-with-graphics index 7860470..a933928 160000 --- a/lib/blt-with-graphics +++ b/lib/blt-with-graphics @@ -1 +1 @@ -Subproject commit 78604708fa52beab06efac5f49fe60f6df2bd588 +Subproject commit a9339280a05f432e41005c2a964767cb37f692a7 diff --git a/src/file.cpp b/src/file.cpp new file mode 100644 index 0000000..85455b9 --- /dev/null +++ b/src/file.cpp @@ -0,0 +1,206 @@ +/* + * + * Copyright (C) 2024 Brett Terpstra + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "blt/iterator/enumerate.h" + +namespace assign3 +{ + std::vector data_file_t::get_data_file_list(std::string_view path) + { + std::vector files; + + for (const auto& file : std::filesystem::recursive_directory_iterator(path)) + { + if (file.is_directory()) + continue; + auto file_path = file.path().string(); + if (blt::string::ends_with(file_path, ".out")) + files.push_back(blt::fs::getFile(file_path)); + } + + return files; + } + + std::vector data_file_t::load_data_files(const std::vector& files) + { + std::vector loaded_data; + + // load all file + for (auto file : files) + { + // we only use unix line endings here... + blt::string::replaceAll(file, "\r", ""); + auto lines = blt::string::split(file, "\n"); + auto line_it = lines.begin(); + auto meta = blt::string::split(*line_it, ' '); + + // load data inside files + data_file_t data; + data.data_points.reserve(std::stoll(meta[0])); + auto bin_count = std::stoul(meta[1]); + + for (++line_it; line_it != lines.end(); ++line_it) + { + auto line_data_meta = blt::string::split(*line_it, ' '); + if (line_data_meta.size() != bin_count + 1) + continue; + auto line_data_it = line_data_meta.begin(); + + // load bins + data_t line_data; + line_data.is_bad = std::stoi(*line_data_it) == 1; + line_data.bins.reserve(bin_count); + + for (++line_data_it; line_data_it != line_data_meta.end(); ++line_data_it) + line_data.bins.push_back(std::stof(*line_data_it)); + + data.data_points.push_back(line_data); + } + + loaded_data.push_back(data); + } + + return loaded_data; + } + + std::vector data_file_t::load_data_files_from_path(std::string_view path) + { + return load_data_files(get_data_file_list(path)); + } + + data_t data_t::with_padding(blt::size_t desired_size, Scalar padding_value) const + { + data_t data = *this; + auto amount_to_add = static_cast(data.bins.size()) - static_cast(desired_size); + for (blt::ptrdiff_t i = 0; i < amount_to_add; i++) + data.bins.push_back(padding_value); + return data; + } + + data_t data_t::normalize() const + { + data_t data = *this; + + Scalar total = 0; + for (auto v : data.bins) + total += v * v; + Scalar mag = std::sqrt(total); + for (auto& v : data.bins) + v /= mag; + return data; + } + + data_file_t data_file_t::normalize() const + { + auto copy = *this; + + for (auto& v : copy.data_points) + v = v.normalize(); + + return copy; + } + + data_file_t data_file_t::with_padding(blt::size_t desired_size, Scalar padding_value) const + { + auto copy = *this; + + for (auto& v : copy.data_points) + v = v.with_padding(desired_size, padding_value); + + return copy; + } + + data_file_t& data_file_t::operator+=(const data_file_t& o) + { + data_points.insert(data_points.end(), o.data_points.begin(), o.data_points.end()); + return *this; + } + + data_file_t operator+(const data_file_t& a, const data_file_t& b) + { + data_file_t file = a; + file.data_points.insert(file.data_points.end(), b.data_points.begin(), b.data_points.end()); + return file; + } + + partitioned_dataset_t dataset_partitioner::partition(blt::size_t groups) const + { + std::vector good_data; + std::vector bad_data; + + for (const auto& f : files) + { + for (const auto& v : f.data_points) + { + if (v.is_bad) + bad_data.push_back(v); + else + good_data.push_back(v); + } + } + + blt::random::random_t rand{std::random_device{}()}; + + std::shuffle(good_data.begin(), good_data.end(), rand); + std::shuffle(bad_data.begin(), bad_data.end(), rand); + + std::vector grouped_data; + grouped_data.resize(groups); + + blt::size_t insert_group = 0; + for (const auto& good : good_data) + grouped_data[insert_group++ % groups].data_points.push_back(good); + + for (const auto& bad : bad_data) + grouped_data[insert_group++ % groups].data_points.push_back(bad); + + return partitioned_dataset_t{std::move(grouped_data)}; + } + + void save_as_csv(const std::string& file, const std::vector>>& data) + { + std::ofstream stream{file}; + stream << "epoch,"; + for (auto [i, d] : blt::enumerate(data)) + { + stream << d.first; + if (i != data.size() - 1) + stream << ','; + } + stream << '\n'; + for (blt::size_t i = 0; i < data.begin()->second.size(); i++) + { + stream << i << ','; + for (auto [j, d] : blt::enumerate(data)) + { + stream << d.second[i]; + if (j != data.size() - 1) + stream << ','; + } + stream << '\n'; + } + } +} \ No newline at end of file