file
parent
12512b4698
commit
2318f0edcf
|
@ -1,5 +1,5 @@
|
|||
cmake_minimum_required(VERSION 3.25)
|
||||
project(COSC-4P80-Assignment-3 VERSION 0.0.2)
|
||||
project(COSC-4P80-Assignment-3 VERSION 0.0.3)
|
||||
|
||||
option(ENABLE_ADDRSAN "Enable the address sanitizer" OFF)
|
||||
option(ENABLE_UBSAN "Enable the ub sanitizer" OFF)
|
||||
|
|
|
@ -0,0 +1,50 @@
|
|||
{ pkgs ? (import <nixpkgs> {
|
||||
config.allowUnfree = true;
|
||||
config.segger-jlink.acceptLicense = true;
|
||||
}), ... }:
|
||||
pkgs.mkShell
|
||||
{
|
||||
buildInputs = with pkgs; [
|
||||
cmake
|
||||
gcc
|
||||
clang
|
||||
emscripten
|
||||
ninja
|
||||
jetbrains.clion
|
||||
renderdoc
|
||||
valgrind
|
||||
];
|
||||
propagatedBuildInputs = with pkgs; [
|
||||
xorg.libX11
|
||||
xorg.libX11.dev
|
||||
xorg.libXcursor
|
||||
xorg.libXcursor.dev
|
||||
xorg.libXext
|
||||
xorg.libXext.dev
|
||||
xorg.libXinerama
|
||||
xorg.libXinerama.dev
|
||||
xorg.libXrandr
|
||||
xorg.libXrandr.dev
|
||||
xorg.libXrender
|
||||
xorg.libXrender.dev
|
||||
xorg.libxcb
|
||||
xorg.libxcb.dev
|
||||
xorg.libXi
|
||||
xorg.libXi.dev
|
||||
harfbuzz
|
||||
harfbuzz.dev
|
||||
zlib
|
||||
zlib.dev
|
||||
bzip2
|
||||
bzip2.dev
|
||||
pngpp
|
||||
brotli
|
||||
brotli.dev
|
||||
pulseaudio.dev
|
||||
git
|
||||
libGL
|
||||
libGL.dev
|
||||
glfw
|
||||
];
|
||||
LD_LIBRARY_PATH="/run/opengl-driver/lib:/run/opengl-driver-32/lib";
|
||||
}
|
|
@ -0,0 +1,109 @@
|
|||
#pragma once
|
||||
/*
|
||||
* Copyright (C) 2024 Brett Terpstra
|
||||
*
|
||||
* This program is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation, either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program. If not, see <https://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#ifndef COSC_4P80_ASSIGNMENT_3_FILE_H
|
||||
#define COSC_4P80_ASSIGNMENT_3_FILE_H
|
||||
|
||||
#include <blt/std/types.h>
|
||||
#include <vector>
|
||||
#include <string>
|
||||
#include <string_view>
|
||||
#include "blt/std/assert.h"
|
||||
|
||||
namespace assign3
|
||||
{
|
||||
using Scalar = float;
|
||||
|
||||
struct data_t
|
||||
{
|
||||
bool is_bad = false;
|
||||
std::vector<Scalar> bins;
|
||||
|
||||
[[nodiscard]] data_t normalize() const;
|
||||
|
||||
[[nodiscard]] data_t with_padding(blt::size_t desired_size, Scalar padding_value = 0) const;
|
||||
};
|
||||
|
||||
struct data_file_t
|
||||
{
|
||||
public:
|
||||
std::vector<data_t> data_points;
|
||||
|
||||
[[nodiscard]] data_file_t normalize() const;
|
||||
|
||||
[[nodiscard]] data_file_t with_padding(blt::size_t desired_size, Scalar padding_value = 0) const;
|
||||
|
||||
data_file_t& operator+=(const data_file_t& o);
|
||||
|
||||
data_file_t friend operator+(const data_file_t& a, const data_file_t& b);
|
||||
|
||||
static std::vector<data_file_t> load_data_files_from_path(std::string_view path);
|
||||
|
||||
private:
|
||||
static std::vector<std::string> get_data_file_list(std::string_view path);
|
||||
|
||||
static std::vector<data_file_t> load_data_files(const std::vector<std::string>& files);
|
||||
};
|
||||
|
||||
struct partitioned_dataset_t
|
||||
{
|
||||
public:
|
||||
explicit partitioned_dataset_t(std::vector<data_file_t> groups):
|
||||
groups(std::move(groups)), bins(this->groups.begin()->data_points.begin()->bins.size())
|
||||
{}
|
||||
|
||||
[[nodiscard]] const std::vector<data_file_t>& getGroups() const
|
||||
{
|
||||
return groups;
|
||||
}
|
||||
|
||||
[[nodiscard]] blt::size_t bin_size() const
|
||||
{
|
||||
return bins;
|
||||
}
|
||||
|
||||
private:
|
||||
std::vector<data_file_t> groups;
|
||||
blt::size_t bins;
|
||||
};
|
||||
|
||||
struct dataset_partitioner
|
||||
{
|
||||
public:
|
||||
explicit dataset_partitioner(const data_file_t& file)
|
||||
{
|
||||
with(file);
|
||||
}
|
||||
|
||||
dataset_partitioner& with(const data_file_t& data)
|
||||
{
|
||||
BLT_ASSERT(data.data_points.begin()->bins.size() == files.begin()->data_points.begin()->bins.size());
|
||||
files.push_back(data);
|
||||
return *this;
|
||||
}
|
||||
|
||||
[[nodiscard]] partitioned_dataset_t partition(blt::size_t groups) const;
|
||||
|
||||
private:
|
||||
std::vector<data_file_t> files;
|
||||
};
|
||||
|
||||
void save_as_csv(const std::string& file, const std::vector<std::pair<std::string, std::vector<Scalar>>>& data);
|
||||
}
|
||||
|
||||
#endif //COSC_4P80_ASSIGNMENT_3_FILE_H
|
|
@ -1 +1 @@
|
|||
Subproject commit 78604708fa52beab06efac5f49fe60f6df2bd588
|
||||
Subproject commit a9339280a05f432e41005c2a964767cb37f692a7
|
|
@ -0,0 +1,206 @@
|
|||
/*
|
||||
* <Short Description>
|
||||
* Copyright (C) 2024 Brett Terpstra
|
||||
*
|
||||
* This program is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation, either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program. If not, see <https://www.gnu.org/licenses/>.
|
||||
*/
|
||||
#include <assign3/file.h>
|
||||
#include <blt/std/string.h>
|
||||
#include <blt/std/random.h>
|
||||
#include <blt/fs/loader.h>
|
||||
#include <filesystem>
|
||||
#include <cmath>
|
||||
#include <fstream>
|
||||
#include <algorithm>
|
||||
#include <random>
|
||||
#include "blt/iterator/enumerate.h"
|
||||
|
||||
namespace assign3
|
||||
{
|
||||
std::vector<std::string> data_file_t::get_data_file_list(std::string_view path)
|
||||
{
|
||||
std::vector<std::string> files;
|
||||
|
||||
for (const auto& file : std::filesystem::recursive_directory_iterator(path))
|
||||
{
|
||||
if (file.is_directory())
|
||||
continue;
|
||||
auto file_path = file.path().string();
|
||||
if (blt::string::ends_with(file_path, ".out"))
|
||||
files.push_back(blt::fs::getFile(file_path));
|
||||
}
|
||||
|
||||
return files;
|
||||
}
|
||||
|
||||
std::vector<data_file_t> data_file_t::load_data_files(const std::vector<std::string>& files)
|
||||
{
|
||||
std::vector<data_file_t> loaded_data;
|
||||
|
||||
// load all file
|
||||
for (auto file : files)
|
||||
{
|
||||
// we only use unix line endings here...
|
||||
blt::string::replaceAll(file, "\r", "");
|
||||
auto lines = blt::string::split(file, "\n");
|
||||
auto line_it = lines.begin();
|
||||
auto meta = blt::string::split(*line_it, ' ');
|
||||
|
||||
// load data inside files
|
||||
data_file_t data;
|
||||
data.data_points.reserve(std::stoll(meta[0]));
|
||||
auto bin_count = std::stoul(meta[1]);
|
||||
|
||||
for (++line_it; line_it != lines.end(); ++line_it)
|
||||
{
|
||||
auto line_data_meta = blt::string::split(*line_it, ' ');
|
||||
if (line_data_meta.size() != bin_count + 1)
|
||||
continue;
|
||||
auto line_data_it = line_data_meta.begin();
|
||||
|
||||
// load bins
|
||||
data_t line_data;
|
||||
line_data.is_bad = std::stoi(*line_data_it) == 1;
|
||||
line_data.bins.reserve(bin_count);
|
||||
|
||||
for (++line_data_it; line_data_it != line_data_meta.end(); ++line_data_it)
|
||||
line_data.bins.push_back(std::stof(*line_data_it));
|
||||
|
||||
data.data_points.push_back(line_data);
|
||||
}
|
||||
|
||||
loaded_data.push_back(data);
|
||||
}
|
||||
|
||||
return loaded_data;
|
||||
}
|
||||
|
||||
std::vector<data_file_t> data_file_t::load_data_files_from_path(std::string_view path)
|
||||
{
|
||||
return load_data_files(get_data_file_list(path));
|
||||
}
|
||||
|
||||
data_t data_t::with_padding(blt::size_t desired_size, Scalar padding_value) const
|
||||
{
|
||||
data_t data = *this;
|
||||
auto amount_to_add = static_cast<blt::ptrdiff_t>(data.bins.size()) - static_cast<blt::ptrdiff_t>(desired_size);
|
||||
for (blt::ptrdiff_t i = 0; i < amount_to_add; i++)
|
||||
data.bins.push_back(padding_value);
|
||||
return data;
|
||||
}
|
||||
|
||||
data_t data_t::normalize() const
|
||||
{
|
||||
data_t data = *this;
|
||||
|
||||
Scalar total = 0;
|
||||
for (auto v : data.bins)
|
||||
total += v * v;
|
||||
Scalar mag = std::sqrt(total);
|
||||
for (auto& v : data.bins)
|
||||
v /= mag;
|
||||
return data;
|
||||
}
|
||||
|
||||
data_file_t data_file_t::normalize() const
|
||||
{
|
||||
auto copy = *this;
|
||||
|
||||
for (auto& v : copy.data_points)
|
||||
v = v.normalize();
|
||||
|
||||
return copy;
|
||||
}
|
||||
|
||||
data_file_t data_file_t::with_padding(blt::size_t desired_size, Scalar padding_value) const
|
||||
{
|
||||
auto copy = *this;
|
||||
|
||||
for (auto& v : copy.data_points)
|
||||
v = v.with_padding(desired_size, padding_value);
|
||||
|
||||
return copy;
|
||||
}
|
||||
|
||||
data_file_t& data_file_t::operator+=(const data_file_t& o)
|
||||
{
|
||||
data_points.insert(data_points.end(), o.data_points.begin(), o.data_points.end());
|
||||
return *this;
|
||||
}
|
||||
|
||||
data_file_t operator+(const data_file_t& a, const data_file_t& b)
|
||||
{
|
||||
data_file_t file = a;
|
||||
file.data_points.insert(file.data_points.end(), b.data_points.begin(), b.data_points.end());
|
||||
return file;
|
||||
}
|
||||
|
||||
partitioned_dataset_t dataset_partitioner::partition(blt::size_t groups) const
|
||||
{
|
||||
std::vector<data_t> good_data;
|
||||
std::vector<data_t> bad_data;
|
||||
|
||||
for (const auto& f : files)
|
||||
{
|
||||
for (const auto& v : f.data_points)
|
||||
{
|
||||
if (v.is_bad)
|
||||
bad_data.push_back(v);
|
||||
else
|
||||
good_data.push_back(v);
|
||||
}
|
||||
}
|
||||
|
||||
blt::random::random_t rand{std::random_device{}()};
|
||||
|
||||
std::shuffle(good_data.begin(), good_data.end(), rand);
|
||||
std::shuffle(bad_data.begin(), bad_data.end(), rand);
|
||||
|
||||
std::vector<data_file_t> grouped_data;
|
||||
grouped_data.resize(groups);
|
||||
|
||||
blt::size_t insert_group = 0;
|
||||
for (const auto& good : good_data)
|
||||
grouped_data[insert_group++ % groups].data_points.push_back(good);
|
||||
|
||||
for (const auto& bad : bad_data)
|
||||
grouped_data[insert_group++ % groups].data_points.push_back(bad);
|
||||
|
||||
return partitioned_dataset_t{std::move(grouped_data)};
|
||||
}
|
||||
|
||||
void save_as_csv(const std::string& file, const std::vector<std::pair<std::string, std::vector<Scalar>>>& data)
|
||||
{
|
||||
std::ofstream stream{file};
|
||||
stream << "epoch,";
|
||||
for (auto [i, d] : blt::enumerate(data))
|
||||
{
|
||||
stream << d.first;
|
||||
if (i != data.size() - 1)
|
||||
stream << ',';
|
||||
}
|
||||
stream << '\n';
|
||||
for (blt::size_t i = 0; i < data.begin()->second.size(); i++)
|
||||
{
|
||||
stream << i << ',';
|
||||
for (auto [j, d] : blt::enumerate(data))
|
||||
{
|
||||
stream << d.second[i];
|
||||
if (j != data.size() - 1)
|
||||
stream << ',';
|
||||
}
|
||||
stream << '\n';
|
||||
}
|
||||
}
|
||||
}
|
Loading…
Reference in New Issue