start work on post assignment due tests

dev
Brett 2024-10-30 03:20:22 -04:00
parent c7cf4721c8
commit 13aa5af131
7 changed files with 211 additions and 139 deletions

View File

@ -1,5 +1,5 @@
<component name="CopyrightManager">
<settings default="GPL3">
<settings>
<module2copyright>
<element module="All" copyright="GPL3" />
</module2copyright>

View File

@ -1,5 +1,5 @@
cmake_minimum_required(VERSION 3.25)
project(COSC-4P80-Assignment-2 VERSION 0.0.11)
project(COSC-4P80-Assignment-2 VERSION 0.1.0)
option(ENABLE_ADDRSAN "Enable the address sanitizer" OFF)
option(ENABLE_UBSAN "Enable the ub sanitizer" OFF)

View File

@ -21,7 +21,6 @@
#include <iostream>
#include <blt/iterator/enumerate.h>
#include <filesystem>
#ifdef BLT_USE_GRAPHICS
@ -49,17 +48,6 @@ namespace assign2
return std::cout;
}
struct data_t
{
bool is_bad = false;
std::vector<Scalar> bins;
};
struct data_file_t
{
std::vector<data_t> data_points;
};
struct error_data_t
{
Scalar error;
@ -164,111 +152,6 @@ namespace assign2
std::vector<Scalar> data;
};
inline std::vector<std::string> get_data_files(std::string_view path)
{
std::vector<std::string> files;
for (const auto& file : std::filesystem::recursive_directory_iterator(path))
{
if (file.is_directory())
continue;
auto file_path = file.path().string();
if (blt::string::ends_with(file_path, ".out"))
files.push_back(blt::fs::getFile(file_path));
}
return files;
}
inline std::vector<data_file_t> load_data_files(const std::vector<std::string>& files)
{
std::vector<data_file_t> loaded_data;
// load all file
for (auto file : files)
{
// we only use unix line endings here...
blt::string::replaceAll(file, "\r", "");
auto lines = blt::string::split(file, "\n");
auto line_it = lines.begin();
auto meta = blt::string::split(*line_it, ' ');
// load data inside files
data_file_t data;
data.data_points.reserve(std::stoll(meta[0]));
auto bin_count = std::stoul(meta[1]);
for (++line_it; line_it != lines.end(); ++line_it)
{
auto line_data_meta = blt::string::split(*line_it, ' ');
if (line_data_meta.size() != bin_count + 1)
continue;
auto line_data_it = line_data_meta.begin();
// load bins
data_t line_data;
line_data.is_bad = std::stoi(*line_data_it) == 1;
line_data.bins.reserve(bin_count);
Scalar total = 0;
Scalar min = 1000;
Scalar max = 0;
for (++line_data_it; line_data_it != line_data_meta.end(); ++line_data_it)
{
auto v = std::stof(*line_data_it);
if (v > max)
max = v;
if (v < min)
min = v;
total += v * v;
line_data.bins.push_back(v);
}
// normalize vector.
total = std::sqrt(total);
//
// for (auto& v : line_data.bins)
// {
// v /= total;
// v *= 2.71828;
// v -= 2.71828 / 2;
// }
//
// if (line_data.bins.size() == 32)
// print_vec(line_data.bins) << std::endl;
data.data_points.push_back(line_data);
}
loaded_data.push_back(data);
}
return loaded_data;
}
inline void save_as_csv(const std::string& file, const std::vector<std::pair<std::string, std::vector<Scalar>>>& data)
{
std::ofstream stream{file};
stream << "epoch,";
for (auto [i, d] : blt::enumerate(data))
{
stream << d.first;
if (i != data.size() - 1)
stream << ',';
}
stream << '\n';
for (blt::size_t i = 0; i < data.begin()->second.size(); i++)
{
stream << i << ',';
for (auto [j, d] : blt::enumerate(data))
{
stream << d.second[i];
if (j != data.size() - 1)
stream << ',';
}
stream << '\n';
}
}
inline bool is_thinks_bad(const std::vector<Scalar>& out)
{
return out[0] < out[1];

54
include/assign2/file.h Normal file
View File

@ -0,0 +1,54 @@
#pragma once
/*
* Copyright (C) 2024 Brett Terpstra
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <https://www.gnu.org/licenses/>.
*/
#ifndef COSC_4P80_ASSIGNMENT_2_FILE_H
#define COSC_4P80_ASSIGNMENT_2_FILE_H
#include <assign2/common.h>
namespace assign2
{
struct data_t
{
bool is_bad = false;
std::vector<Scalar> bins;
[[nodiscard]] data_t normalize() const;
[[nodiscard]] data_t with_padding(blt::size_t desired_size, Scalar padding_value = 0) const;
};
struct data_file_t
{
public:
std::vector<data_t> data_points;
static std::vector<data_file_t> load_data_files_from_path(std::string_view path);
private:
static std::vector<std::string> get_data_file_list(std::string_view path);
static std::vector<data_file_t> load_data_files(const std::vector<std::string>& files);
};
void save_as_csv(const std::string& file, const std::vector<std::pair<std::string, std::vector<Scalar>>>& data);
}
#endif //COSC_4P80_ASSIGNMENT_2_FILE_H

View File

@ -25,6 +25,7 @@
#include <blt/math/vectors.h>
#include <atomic>
#include <thread>
#include "file.h"
namespace assign2
{
@ -51,24 +52,24 @@ namespace assign2
};
inline std::vector<Scalar> errors_over_time;
inline std::vector<Scalar> error_derivative_over_time;
inline std::vector<Scalar> error_of_test;
inline std::vector<Scalar> error_of_test_derivative;
inline std::vector<Scalar> training_error_epochs;
inline std::vector<Scalar> d_training_error_epochs;
inline std::vector<Scalar> error_derivative_of_test;
inline std::vector<Scalar> correct_over_time;
inline std::vector<Scalar> correct_over_time_test;
inline std::vector<Scalar> testing_error_epochs;
inline std::vector<Scalar> d_testing_error_epochs;
inline std::vector<Scalar> training_correct_epochs;
inline std::vector<Scalar> testing_correct_epochs;
inline std::vector<node_data> nodes;
void save_error_info(const std::string& name)
{
save_as_csv("network" + name + ".csv", {{"train_error", errors_over_time},
{"train_d_error", error_derivative_over_time},
{"test_error", error_of_test},
{"test_d_error", error_of_test_derivative},
{"correct_train", correct_over_time},
{"correct_test", correct_over_time_test}});
save_as_csv("network" + name + ".csv", {{"train_error", training_error_epochs},
{"train_d_error", d_training_error_epochs},
{"test_error", testing_error_epochs},
{"test_d_error", d_testing_error_epochs},
{"correct_train", training_correct_epochs},
{"correct_test", testing_correct_epochs}});
}
}

133
src/file.cpp Normal file
View File

@ -0,0 +1,133 @@
// Copyright (c) 2024. Brett Terpstra
//
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with this program. If not, see <https://www.gnu.org/licenses/>.
#include <assign2/file.h>
#include <blt/std/string.h>
#include <blt/fs/loader.h>
#include <filesystem>
#include <cmath>
#include <fstream>
namespace assign2
{
std::vector<std::string> data_file_t::get_data_file_list(std::string_view path)
{
std::vector<std::string> files;
for (const auto& file : std::filesystem::recursive_directory_iterator(path))
{
if (file.is_directory())
continue;
auto file_path = file.path().string();
if (blt::string::ends_with(file_path, ".out"))
files.push_back(blt::fs::getFile(file_path));
}
return files;
}
std::vector<data_file_t> data_file_t::load_data_files(const std::vector<std::string>& files)
{
std::vector<data_file_t> loaded_data;
// load all file
for (auto file : files)
{
// we only use unix line endings here...
blt::string::replaceAll(file, "\r", "");
auto lines = blt::string::split(file, "\n");
auto line_it = lines.begin();
auto meta = blt::string::split(*line_it, ' ');
// load data inside files
data_file_t data;
data.data_points.reserve(std::stoll(meta[0]));
auto bin_count = std::stoul(meta[1]);
for (++line_it; line_it != lines.end(); ++line_it)
{
auto line_data_meta = blt::string::split(*line_it, ' ');
if (line_data_meta.size() != bin_count + 1)
continue;
auto line_data_it = line_data_meta.begin();
// load bins
data_t line_data;
line_data.is_bad = std::stoi(*line_data_it) == 1;
line_data.bins.reserve(bin_count);
for (++line_data_it; line_data_it != line_data_meta.end(); ++line_data_it)
line_data.bins.push_back(std::stof(*line_data_it));
data.data_points.push_back(line_data);
}
loaded_data.push_back(data);
}
return loaded_data;
}
std::vector<data_file_t> data_file_t::load_data_files_from_path(std::string_view path)
{
return load_data_files(get_data_file_list(path));
}
data_t data_t::with_padding(blt::size_t desired_size, Scalar padding_value) const
{
data_t data = *this;
auto amount_to_add = static_cast<blt::ptrdiff_t>(data.bins.size()) - static_cast<blt::ptrdiff_t>(desired_size);
for (blt::ptrdiff_t i = 0; i < amount_to_add; i++)
data.bins.push_back(padding_value);
return data;
}
data_t data_t::normalize() const
{
data_t data = *this;
Scalar total = 0;
for (auto v : data.bins)
total += v * v;
Scalar mag = std::sqrt(total);
for (auto& v : data.bins)
v /= mag;
return data;
}
void save_as_csv(const std::string& file, const std::vector<std::pair<std::string, std::vector<Scalar>>>& data)
{
std::ofstream stream{file};
stream << "epoch,";
for (auto [i, d] : blt::enumerate(data))
{
stream << d.first;
if (i != data.size() - 1)
stream << ',';
}
stream << '\n';
for (blt::size_t i = 0; i < data.begin()->second.size(); i++)
{
stream << i << ',';
for (auto [j, d] : blt::enumerate(data))
{
stream << d.second[i];
if (j != data.size() - 1)
stream << ',';
}
stream << '\n';
}
}
}

View File

@ -7,6 +7,7 @@
#include <assign2/layer.h>
#include <assign2/functions.h>
#include <assign2/network.h>
#include <assign2/file.h>
#include <memory>
#include <thread>
#include <algorithm>
@ -510,7 +511,7 @@ int main(int argc, const char** argv)
std::string data_directory = blt::string::ensure_ends_with_path_separator(args.get<std::string>("file"));
data_files = load_data_files(get_data_files(data_directory));
data_files = data_file_t::load_data_files_from_path(data_directory);
if (args.contains("kfold"))
{
@ -586,12 +587,12 @@ int main(int argc, const char** argv)
}
// this is to prevent threading issues due to expanding buffers.
errors_over_time.reserve(25000);
error_derivative_over_time.reserve(25000);
correct_over_time.reserve(25000);
correct_over_time_test.reserve(25000);
error_of_test.reserve(25000);
error_of_test_derivative.reserve(25000);
training_error_epochs.reserve(25000);
d_training_error_epochs.reserve(25000);
training_correct_epochs.reserve(25000);
testing_correct_epochs.reserve(25000);
testing_error_epochs.reserve(25000);
d_testing_error_epochs.reserve(25000);
#ifdef BLT_USE_GRAPHICS
blt::gfx::init(blt::gfx::window_data{"Freeplay Graphics", init, update, 1440, 720}.setSyncInterval(1).setMonitor(glfwGetPrimaryMonitor())