main
Brett 2024-11-04 16:45:46 -05:00
parent 12512b4698
commit 2318f0edcf
5 changed files with 367 additions and 2 deletions

View File

@ -1,5 +1,5 @@
cmake_minimum_required(VERSION 3.25) cmake_minimum_required(VERSION 3.25)
project(COSC-4P80-Assignment-3 VERSION 0.0.2) project(COSC-4P80-Assignment-3 VERSION 0.0.3)
option(ENABLE_ADDRSAN "Enable the address sanitizer" OFF) option(ENABLE_ADDRSAN "Enable the address sanitizer" OFF)
option(ENABLE_UBSAN "Enable the ub sanitizer" OFF) option(ENABLE_UBSAN "Enable the ub sanitizer" OFF)

50
default.nix Normal file
View File

@ -0,0 +1,50 @@
{ pkgs ? (import <nixpkgs> {
config.allowUnfree = true;
config.segger-jlink.acceptLicense = true;
}), ... }:
pkgs.mkShell
{
buildInputs = with pkgs; [
cmake
gcc
clang
emscripten
ninja
jetbrains.clion
renderdoc
valgrind
];
propagatedBuildInputs = with pkgs; [
xorg.libX11
xorg.libX11.dev
xorg.libXcursor
xorg.libXcursor.dev
xorg.libXext
xorg.libXext.dev
xorg.libXinerama
xorg.libXinerama.dev
xorg.libXrandr
xorg.libXrandr.dev
xorg.libXrender
xorg.libXrender.dev
xorg.libxcb
xorg.libxcb.dev
xorg.libXi
xorg.libXi.dev
harfbuzz
harfbuzz.dev
zlib
zlib.dev
bzip2
bzip2.dev
pngpp
brotli
brotli.dev
pulseaudio.dev
git
libGL
libGL.dev
glfw
];
LD_LIBRARY_PATH="/run/opengl-driver/lib:/run/opengl-driver-32/lib";
}

109
include/assign3/file.h Normal file
View File

@ -0,0 +1,109 @@
#pragma once
/*
* Copyright (C) 2024 Brett Terpstra
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <https://www.gnu.org/licenses/>.
*/
#ifndef COSC_4P80_ASSIGNMENT_3_FILE_H
#define COSC_4P80_ASSIGNMENT_3_FILE_H
#include <blt/std/types.h>
#include <vector>
#include <string>
#include <string_view>
#include "blt/std/assert.h"
namespace assign3
{
using Scalar = float;
struct data_t
{
bool is_bad = false;
std::vector<Scalar> bins;
[[nodiscard]] data_t normalize() const;
[[nodiscard]] data_t with_padding(blt::size_t desired_size, Scalar padding_value = 0) const;
};
struct data_file_t
{
public:
std::vector<data_t> data_points;
[[nodiscard]] data_file_t normalize() const;
[[nodiscard]] data_file_t with_padding(blt::size_t desired_size, Scalar padding_value = 0) const;
data_file_t& operator+=(const data_file_t& o);
data_file_t friend operator+(const data_file_t& a, const data_file_t& b);
static std::vector<data_file_t> load_data_files_from_path(std::string_view path);
private:
static std::vector<std::string> get_data_file_list(std::string_view path);
static std::vector<data_file_t> load_data_files(const std::vector<std::string>& files);
};
struct partitioned_dataset_t
{
public:
explicit partitioned_dataset_t(std::vector<data_file_t> groups):
groups(std::move(groups)), bins(this->groups.begin()->data_points.begin()->bins.size())
{}
[[nodiscard]] const std::vector<data_file_t>& getGroups() const
{
return groups;
}
[[nodiscard]] blt::size_t bin_size() const
{
return bins;
}
private:
std::vector<data_file_t> groups;
blt::size_t bins;
};
struct dataset_partitioner
{
public:
explicit dataset_partitioner(const data_file_t& file)
{
with(file);
}
dataset_partitioner& with(const data_file_t& data)
{
BLT_ASSERT(data.data_points.begin()->bins.size() == files.begin()->data_points.begin()->bins.size());
files.push_back(data);
return *this;
}
[[nodiscard]] partitioned_dataset_t partition(blt::size_t groups) const;
private:
std::vector<data_file_t> files;
};
void save_as_csv(const std::string& file, const std::vector<std::pair<std::string, std::vector<Scalar>>>& data);
}
#endif //COSC_4P80_ASSIGNMENT_3_FILE_H

@ -1 +1 @@
Subproject commit 78604708fa52beab06efac5f49fe60f6df2bd588 Subproject commit a9339280a05f432e41005c2a964767cb37f692a7

206
src/file.cpp Normal file
View File

@ -0,0 +1,206 @@
/*
* <Short Description>
* Copyright (C) 2024 Brett Terpstra
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <https://www.gnu.org/licenses/>.
*/
#include <assign3/file.h>
#include <blt/std/string.h>
#include <blt/std/random.h>
#include <blt/fs/loader.h>
#include <filesystem>
#include <cmath>
#include <fstream>
#include <algorithm>
#include <random>
#include "blt/iterator/enumerate.h"
namespace assign3
{
std::vector<std::string> data_file_t::get_data_file_list(std::string_view path)
{
std::vector<std::string> files;
for (const auto& file : std::filesystem::recursive_directory_iterator(path))
{
if (file.is_directory())
continue;
auto file_path = file.path().string();
if (blt::string::ends_with(file_path, ".out"))
files.push_back(blt::fs::getFile(file_path));
}
return files;
}
std::vector<data_file_t> data_file_t::load_data_files(const std::vector<std::string>& files)
{
std::vector<data_file_t> loaded_data;
// load all file
for (auto file : files)
{
// we only use unix line endings here...
blt::string::replaceAll(file, "\r", "");
auto lines = blt::string::split(file, "\n");
auto line_it = lines.begin();
auto meta = blt::string::split(*line_it, ' ');
// load data inside files
data_file_t data;
data.data_points.reserve(std::stoll(meta[0]));
auto bin_count = std::stoul(meta[1]);
for (++line_it; line_it != lines.end(); ++line_it)
{
auto line_data_meta = blt::string::split(*line_it, ' ');
if (line_data_meta.size() != bin_count + 1)
continue;
auto line_data_it = line_data_meta.begin();
// load bins
data_t line_data;
line_data.is_bad = std::stoi(*line_data_it) == 1;
line_data.bins.reserve(bin_count);
for (++line_data_it; line_data_it != line_data_meta.end(); ++line_data_it)
line_data.bins.push_back(std::stof(*line_data_it));
data.data_points.push_back(line_data);
}
loaded_data.push_back(data);
}
return loaded_data;
}
std::vector<data_file_t> data_file_t::load_data_files_from_path(std::string_view path)
{
return load_data_files(get_data_file_list(path));
}
data_t data_t::with_padding(blt::size_t desired_size, Scalar padding_value) const
{
data_t data = *this;
auto amount_to_add = static_cast<blt::ptrdiff_t>(data.bins.size()) - static_cast<blt::ptrdiff_t>(desired_size);
for (blt::ptrdiff_t i = 0; i < amount_to_add; i++)
data.bins.push_back(padding_value);
return data;
}
data_t data_t::normalize() const
{
data_t data = *this;
Scalar total = 0;
for (auto v : data.bins)
total += v * v;
Scalar mag = std::sqrt(total);
for (auto& v : data.bins)
v /= mag;
return data;
}
data_file_t data_file_t::normalize() const
{
auto copy = *this;
for (auto& v : copy.data_points)
v = v.normalize();
return copy;
}
data_file_t data_file_t::with_padding(blt::size_t desired_size, Scalar padding_value) const
{
auto copy = *this;
for (auto& v : copy.data_points)
v = v.with_padding(desired_size, padding_value);
return copy;
}
data_file_t& data_file_t::operator+=(const data_file_t& o)
{
data_points.insert(data_points.end(), o.data_points.begin(), o.data_points.end());
return *this;
}
data_file_t operator+(const data_file_t& a, const data_file_t& b)
{
data_file_t file = a;
file.data_points.insert(file.data_points.end(), b.data_points.begin(), b.data_points.end());
return file;
}
partitioned_dataset_t dataset_partitioner::partition(blt::size_t groups) const
{
std::vector<data_t> good_data;
std::vector<data_t> bad_data;
for (const auto& f : files)
{
for (const auto& v : f.data_points)
{
if (v.is_bad)
bad_data.push_back(v);
else
good_data.push_back(v);
}
}
blt::random::random_t rand{std::random_device{}()};
std::shuffle(good_data.begin(), good_data.end(), rand);
std::shuffle(bad_data.begin(), bad_data.end(), rand);
std::vector<data_file_t> grouped_data;
grouped_data.resize(groups);
blt::size_t insert_group = 0;
for (const auto& good : good_data)
grouped_data[insert_group++ % groups].data_points.push_back(good);
for (const auto& bad : bad_data)
grouped_data[insert_group++ % groups].data_points.push_back(bad);
return partitioned_dataset_t{std::move(grouped_data)};
}
void save_as_csv(const std::string& file, const std::vector<std::pair<std::string, std::vector<Scalar>>>& data)
{
std::ofstream stream{file};
stream << "epoch,";
for (auto [i, d] : blt::enumerate(data))
{
stream << d.first;
if (i != data.size() - 1)
stream << ',';
}
stream << '\n';
for (blt::size_t i = 0; i < data.begin()->second.size(); i++)
{
stream << i << ',';
for (auto [j, d] : blt::enumerate(data))
{
stream << d.second[i];
if (j != data.size() - 1)
stream << ',';
}
stream << '\n';
}
}
}