diff --git a/.idea/editor (conflicted copy 2024-11-28 121102).xml b/.idea/editor (conflicted copy 2024-11-28 121102).xml
new file mode 100644
index 0000000..b0d69ef
--- /dev/null
+++ b/.idea/editor (conflicted copy 2024-11-28 121102).xml
@@ -0,0 +1,483 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
\ No newline at end of file
diff --git a/.idea/editor.xml b/.idea/editor.xml
index b0d69ef..5fff85e 100644
--- a/.idea/editor.xml
+++ b/.idea/editor.xml
@@ -240,244 +240,5 @@
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
\ No newline at end of file
diff --git a/.idea/vcs.xml b/.idea/vcs.xml
index b19fe85..b7898c9 100644
--- a/.idea/vcs.xml
+++ b/.idea/vcs.xml
@@ -3,6 +3,7 @@
+
diff --git a/.idea/workspace (conflicted copy 2024-11-28 121026).xml b/.idea/workspace (conflicted copy 2024-11-28 121026).xml
new file mode 100644
index 0000000..7c73077
--- /dev/null
+++ b/.idea/workspace (conflicted copy 2024-11-28 121026).xml
@@ -0,0 +1,257 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ {
+ "useNewFormat": true
+}
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ {
+ "associatedIndex": 7
+}
+
+
+
+
+
+ {
+ "keyToString": {
+ "CMake Application.Assign3 Tests.executor": "Run",
+ "CMake Application.COSC-4P80-Assignment-3.executor": "Run",
+ "NIXITCH_NIXPKGS_CONFIG": "/etc/nix/nixpkgs-config.nix",
+ "NIXITCH_NIX_CONF_DIR": "",
+ "NIXITCH_NIX_OTHER_STORES": "",
+ "NIXITCH_NIX_PATH": "/home/brett/.nix-defexpr/channels:nixpkgs=/nix/var/nix/profiles/per-user/root/channels/nixos:nixos-config=/etc/nixos/configuration.nix:/nix/var/nix/profiles/per-user/root/channels",
+ "NIXITCH_NIX_PROFILES": "/run/current-system/sw /nix/var/nix/profiles/default /etc/profiles/per-user/brett /home/brett/.local/state/nix/profile /nix/profile /home/brett/.nix-profile",
+ "NIXITCH_NIX_REMOTE": "",
+ "NIXITCH_NIX_USER_PROFILE_DIR": "/nix/var/nix/profiles/per-user/brett",
+ "RunOnceActivity.RadMigrateCodeStyle": "true",
+ "RunOnceActivity.ShowReadmeOnStart": "true",
+ "RunOnceActivity.cidr.known.project.marker": "true",
+ "RunOnceActivity.readMode.enableVisualFormatting": "true",
+ "RunOnceActivity.west.config.association.type.startup.service": "true",
+ "SHARE_PROJECT_CONFIGURATION_FILES": "true",
+ "cf.advertisement.text.has.clang-format": "true",
+ "cf.first.check.clang-format": "false",
+ "cidr.known.project.marker": "true",
+ "git-widget-placeholder": "main",
+ "last_opened_file_path": "/home/brett/Documents/Brock/CS 4P80/COSC-4P80-Assignment-3",
+ "node.js.detected.package.eslint": "true",
+ "node.js.detected.package.tslint": "true",
+ "node.js.selected.package.eslint": "(autodetect)",
+ "node.js.selected.package.tslint": "(autodetect)",
+ "nodejs_package_manager_path": "npm",
+ "run.code.analysis.last.selected.profile": "pProject Default",
+ "settings.editor.selected.configurable": "editor.preferences.completion",
+ "structure.view.defaults.are.configured": "true",
+ "vue.rearranger.settings.migration": "true"
+ }
+}
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ 1730483030448
+
+
+ 1730483030448
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
\ No newline at end of file
diff --git a/CMakeLists.txt b/CMakeLists.txt
index f9180b4..0ed89e5 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -1,5 +1,5 @@
cmake_minimum_required(VERSION 3.25)
-project(COSC-4P80-Assignment-3 VERSION 0.0.31)
+project(COSC-4P80-Assignment-3 VERSION 0.0.32)
include(FetchContent)
option(ENABLE_ADDRSAN "Enable the address sanitizer" OFF)
diff --git a/plot_line_graph.py b/plot_line_graph.py
index c0cfa97..0c064cc 100644
--- a/plot_line_graph.py
+++ b/plot_line_graph.py
@@ -53,7 +53,7 @@ else:
plt.ylim(0, 1)
plt.suptitle("Topological Error (Bins: {})".format(bins), fontsize=16)
- plt.title(subtitle1, fontsize=11)
+ plt.title(subtitle1, fontsize=10)
plt.savefig("errors-topological{}.png".format(bins))
@@ -64,7 +64,7 @@ else:
plt.ylim(y_min, y_max)
plt.suptitle("Quantization Error (Bins: {})".format(bins), fontsize=16)
- plt.title(subtitle2, fontsize=11)
+ plt.title(subtitle2, fontsize=10)
plt.savefig("errors-quantization{}.png".format(bins))
diff --git a/src/main.cpp b/src/main.cpp
index f64996e..c9ec951 100644
--- a/src/main.cpp
+++ b/src/main.cpp
@@ -11,6 +11,8 @@
#include
#include
#include
+#include
+#include
void plot_heatmap(const std::string& path, const std::string& activations_csv, const blt::size_t bin_size, const std::string& subtitle)
{
@@ -110,6 +112,21 @@ void action_start_graphics(const std::vector& argv_vector)
blt::gfx::init(blt::gfx::window_data{"My Sexy Window", init, update, destroy}.setSyncInterval(1).setMaximized(true));
}
+void write_csv(const std::vector& vec, const std::string& path, const std::string& header = "")
+{
+ std::ofstream stream{path};
+ stream << header << std::endl;
+ for (const auto v : vec)
+ stream << v << std::endl;
+}
+
+void load_csv(std::vector& vec, const std::string& path)
+{
+ auto lines = blt::fs::getLinesFromFile(path);
+ for (const auto& [i, line] : blt::enumerate(lines).skip(1))
+ vec.push_back(std::stof(line));
+}
+
struct task_t // NOLINT
{
data_file_t* file;
@@ -132,6 +149,61 @@ struct task_t // NOLINT
std::vector> activations{};
};
+struct sortable_data_t
+{
+ std::string_view path;
+ Scalar value;
+ blt::u32 rank;
+
+ sortable_data_t(const std::string_view& path, Scalar value, blt::u32 rank): path(path), value(value), rank(rank)
+ {
+ }
+
+ friend bool operator==(const sortable_data_t& lhs, const sortable_data_t& rhs)
+ {
+ return lhs.value == rhs.value;
+ }
+
+ friend bool operator!=(const sortable_data_t& lhs, const sortable_data_t& rhs)
+ {
+ return !(lhs == rhs);
+ }
+
+ friend bool operator<(const sortable_data_t& lhs, const sortable_data_t& rhs)
+ {
+ return lhs.value < rhs.value;
+ }
+
+ friend bool operator<=(const sortable_data_t& lhs, const sortable_data_t& rhs)
+ {
+ return !(rhs < lhs);
+ }
+
+ friend bool operator>(const sortable_data_t& lhs, const sortable_data_t& rhs)
+ {
+ return rhs < lhs;
+ }
+
+ friend bool operator>=(const sortable_data_t& lhs, const sortable_data_t& rhs)
+ {
+ return !(lhs < rhs);
+ }
+};
+
+std::string make_path(const task_t& task)
+{
+ std::stringstream paths;
+ paths << "bins-" << task.file->data_points.begin()->bins.size() << "/";
+ paths << task.width << "x" << task.height << '-' << task.max_epochs << '/';
+ std::string shape_name = shape_names[static_cast(task.shape)];
+ std::string init_name = init_names[static_cast(task.init)];
+ blt::string::replaceAll(shape_name, " ", "-");
+ blt::string::replaceAll(init_name, " ", "-");
+ paths << shape_name << '/';
+ paths << init_name << '-' << task.initial_learn_rate << '/';
+ return paths.str();
+}
+
void action_test(const std::vector& argv_vector)
{
blt::arg_parse parser{};
@@ -149,6 +221,9 @@ void action_test(const std::vector& argv_vector)
std::vector threads;
std::mutex task_mutex;
+ // tasks.emplace_back(&data.files.back(), 5, 5, 2000, shape_t::GRID, init_t::COMPLETELY_RANDOM, 1);
+ // tasks.emplace_back(&data.files.back(), 5, 5, 2000, shape_t::GRID, init_t::RANDOM_DATA, 1);
+ // tasks.emplace_back(&data.files.back(), 5, 5, 2000, shape_t::GRID, init_t::SAMPLED_DATA, 1);
for (auto& file : data.files)
{
for (blt::u32 size = 5; size <= 7; size++)
@@ -181,33 +256,28 @@ void action_test(const std::vector& argv_vector)
tasks.pop_back();
}
- for (blt::size_t run = 0; run < runs; run++)
+ bool do_run = false;
+ if (do_run)
{
- gaussian_function_t func{};
- auto dist = distance_function_t::from_shape(task.shape, task.width, task.height);
- auto som = std::make_unique(*task.file, task.width, task.height, task.max_epochs, dist.get(),
- &task.topology_func, task.shape, task.init, false);
- while (som->get_current_epoch() < som->get_max_epochs())
- som->train_epoch(task.initial_learn_rate);
+ for (blt::size_t run = 0; run < runs; run++)
+ {
+ gaussian_function_t func{};
+ auto dist = distance_function_t::from_shape(task.shape, task.width, task.height);
+ auto som = std::make_unique(*task.file, task.width, task.height, task.max_epochs, dist.get(),
+ &task.topology_func, task.shape, task.init, false);
+ while (som->get_current_epoch() < som->get_max_epochs())
+ som->train_epoch(task.initial_learn_rate);
- task.topological_errors.push_back(som->get_topological_errors());
- task.quantization_errors.push_back(som->get_quantization_errors());
+ task.topological_errors.push_back(som->get_topological_errors());
+ task.quantization_errors.push_back(som->get_quantization_errors());
- std::vector acts;
- for (const auto& v : som->get_array().get_map())
- acts.push_back(v.get_activation());
- task.activations.emplace_back(std::move(acts));
+ std::vector acts;
+ for (const auto& v : som->get_array().get_map())
+ acts.push_back(v.get_activation());
+ task.activations.emplace_back(std::move(acts));
+ }
}
- std::stringstream paths;
- paths << "bins-" << task.file->data_points.begin()->bins.size() << "/";
- paths << task.width << "x" << task.height << '-' << task.max_epochs << '/';
- std::string shape_name = shape_names[static_cast(task.shape)];
- blt::string::replaceAll(shape_name, " ", "-");
- paths << shape_name << '/';
- std::string init_name = init_names[static_cast(task.init)];
- blt::string::replaceAll(init_name, " ", "-");
- paths << init_name << '-' << task.initial_learn_rate << '/';
- auto path = paths.str();
+ auto path = make_path(task);
std::filesystem::create_directories(path);
std::vector average_topological_errors;
@@ -217,56 +287,81 @@ void action_test(const std::vector& argv_vector)
std::vector stddev_quantization_errors;
std::vector min_topological_errors;
std::vector min_quantization_errors;
+ std::vector last_topological_errors;
+ std::vector last_quantization_errors;
- average_topological_errors.resize(task.topological_errors.begin()->size());
- average_quantization_errors.resize(task.quantization_errors.begin()->size());
- average_activations.resize(task.activations.begin()->size());
- stddev_topological_errors.resize(task.topological_errors.begin()->size());
- stddev_quantization_errors.resize(task.quantization_errors.begin()->size());
-
- min_topological_errors.resize(runs);
- min_quantization_errors.resize(runs);
-
- for (auto [i, v] : blt::enumerate(task.topological_errors))
- min_topological_errors[i] = *std::min_element(v.begin(), v.end());
- for (auto [i, v] : blt::enumerate(task.quantization_errors))
- min_quantization_errors[i] = *std::min_element(v.begin(), v.end());
-
- for (const auto& vec : task.topological_errors)
- for (auto [index, v] : blt::enumerate(vec))
- average_topological_errors[index] += v;
- for (const auto& vec : task.quantization_errors)
- for (auto [index, v] : blt::enumerate(vec))
- average_quantization_errors[index] += v;
- for (const auto& vec : task.activations)
- for (auto [index, v] : blt::enumerate(vec))
- average_activations[index] += v;
-
- // calculate mean per point
- for (auto& v : average_topological_errors)
- v /= static_cast(runs);
- for (auto& v : average_quantization_errors)
- v /= static_cast(runs);
-
- for (auto [i, mean] : blt::in_pairs(average_topological_errors, average_quantization_errors).enumerate())
+ if (do_run)
{
- auto [t_mean, q_mean] = mean;
- float variance_t = 0;
- float variance_q = 0;
+ average_topological_errors.resize(task.topological_errors.begin()->size());
+ average_quantization_errors.resize(task.quantization_errors.begin()->size());
+ average_activations.resize(task.activations.begin()->size());
+ stddev_topological_errors.resize(task.topological_errors.begin()->size());
+ stddev_quantization_errors.resize(task.quantization_errors.begin()->size());
+
+ min_topological_errors.resize(runs);
+ min_quantization_errors.resize(runs);
+ last_topological_errors.resize(runs);
+ last_quantization_errors.resize(runs);
+
+ for (auto [i, v] : blt::enumerate(task.topological_errors))
+ {
+ min_topological_errors[i] = *std::min_element(v.begin(), v.end());
+ last_topological_errors[i] = v.back();
+ }
+ for (auto [i, v] : blt::enumerate(task.quantization_errors))
+ {
+ min_quantization_errors[i] = *std::min_element(v.begin(), v.end());
+ last_quantization_errors[i] = v.back();
+ }
+
for (const auto& vec : task.topological_errors)
- {
- auto d = vec[i] - t_mean;
- variance_t += d * d;
- }
+ for (auto [index, v] : blt::enumerate(vec))
+ average_topological_errors[index] += v;
for (const auto& vec : task.quantization_errors)
+ for (auto [index, v] : blt::enumerate(vec))
+ average_quantization_errors[index] += v;
+ for (const auto& vec : task.activations)
+ for (auto [index, v] : blt::enumerate(vec))
+ average_activations[index] += v;
+
+ // calculate mean per point
+ for (auto& v : average_topological_errors)
+ v /= static_cast(runs);
+ for (auto& v : average_quantization_errors)
+ v /= static_cast(runs);
+
+ for (auto [i, mean] : blt::in_pairs(average_topological_errors, average_quantization_errors).enumerate())
{
- auto d = vec[i] - q_mean;
- variance_q += d * d;
+ auto [t_mean, q_mean] = mean;
+ float variance_t = 0;
+ float variance_q = 0;
+ for (const auto& vec : task.topological_errors)
+ {
+ auto d = vec[i] - t_mean;
+ variance_t += d * d;
+ }
+ for (const auto& vec : task.quantization_errors)
+ {
+ auto d = vec[i] - q_mean;
+ variance_q += d * d;
+ }
+ variance_t /= static_cast(runs);
+ variance_q /= static_cast(runs);
+ stddev_topological_errors[i] = std::sqrt(variance_t);
+ stddev_quantization_errors[i] = std::sqrt(variance_q);
}
- variance_t /= static_cast(runs);
- variance_q /= static_cast(runs);
- stddev_topological_errors[i] = std::sqrt(variance_t);
- stddev_quantization_errors[i] = std::sqrt(variance_q);
+ }
+ else
+ {
+ load_csv(average_topological_errors, path + "topological_avg.csv");
+ load_csv(average_quantization_errors, path + "quantization_avg.csv");
+ load_csv(average_activations, path + "activations_avg.csv");
+ load_csv(stddev_topological_errors, path + "topological_stddev.csv");
+ load_csv(stddev_quantization_errors, path + "quantization_stddev.csv");
+ load_csv(min_topological_errors, path + "min_topological.csv");
+ load_csv(min_quantization_errors, path + "min_quantization.csv");
+ load_csv(last_topological_errors, path + "last_topological.csv");
+ load_csv(last_quantization_errors, path + "last_quantization.csv");
}
Scalar avg_quantization_stddev = 0;
@@ -289,6 +384,7 @@ void action_test(const std::vector& argv_vector)
auto min_topo = *std::min_element(average_topological_errors.begin(), average_topological_errors.end());
auto max_topo = *std::max_element(average_topological_errors.begin(), average_topological_errors.end());
+ if (do_run)
{
std::ofstream topological{path + "topological_avg.csv"};
std::ofstream quantization{path + "quantization_avg.csv"};
@@ -296,19 +392,20 @@ void action_test(const std::vector& argv_vector)
std::ofstream activations{path + "activations.csv"};
std::ofstream topological_stddev{path + "topological_stddev.csv"};
std::ofstream quantization_stddev{path + "quantization_stddev.csv"};
- std::ofstream min_topological{path + "min_topological.csv"};
- std::ofstream min_quantization{path + "min_quantization.csv"};
+
+ write_csv(min_topological_errors, path + "min_topological.csv");
+ write_csv(min_quantization_errors, path + "min_quantization.csv");
+ write_csv(last_topological_errors, path + "last_topological.csv");
+ write_csv(last_quantization_errors, path + "last_quantization.csv");
topological_stddev << "Average topological stddev: " << avg_topological_stddev << std::endl;
quantization_stddev << "Average quantization stddev: " << avg_quantization_stddev << std::endl;
- min_topological << "Min Errors\n";
- min_quantization << "Min Errors\n";
// topological_stddev << "Stddev Over Epochs: " << std::endl;
// quantization_stddev << "Stddev Over Epochs: " << std::endl;
for (auto v : stddev_topological_errors)
topological_stddev << v << std::endl;
- for (auto v : quantization_stddev)
+ for (auto v : stddev_quantization_errors)
quantization_stddev << v << std::endl;
topological << "error\n";
@@ -339,6 +436,11 @@ void action_test(const std::vector& argv_vector)
}
}
+ std::string shape_name = shape_names[static_cast(task.shape)];
+ std::string init_name = init_names[static_cast(task.init)];
+ blt::string::replaceAll(shape_name, " ", "-");
+ blt::string::replaceAll(init_name, " ", "-");
+
plot_heatmap(path, "activations.csv", task.file->data_points.front().bins.size(),
std::to_string(task.width) + "x" + std::to_string(task.height) + " " += shape_name + ", " += init_name + ", " +
std::to_string(
@@ -347,21 +449,11 @@ void action_test(const std::vector& argv_vector)
plot_line_graph(path, "topological_avg.csv", "quantization_avg.csv", task.file->data_points.front().bins.size(),
std::to_string(task.width) + "x" + std::to_string(task.height) + " " += shape_name + ", " += init_name + ", Min: " +
- std::to_string(
- min_topo) +
- ", Max: " +
- std::to_string(
- max_topo) +
- ", " + std::to_string(
- task.max_epochs) + " Epochs",
+ std::to_string(min_topo) + ", Max: " + std::to_string(max_topo) +
+ ", " + std::to_string(task.max_epochs) + " Epochs",
std::to_string(task.width) + "x" + std::to_string(task.height) + " " += shape_name + ", " += init_name + ", Min: " +
- std::to_string(
- min_quant) +
- ", Max: " +
- std::to_string(
- max_quant) +
- ", " + std::to_string(
- task.max_epochs) +
+ std::to_string(min_quant) + ", Max: " +
+ std::to_string(max_quant) +", " + std::to_string(task.max_epochs) +
" Epochs");
BLT_INFO("Task '%s' Complete", path.c_str());
@@ -380,12 +472,251 @@ void action_test(const std::vector& argv_vector)
}
}
+struct man_whitney_t
+{
+ Scalar u1 = 0, u2 = 0;
+ Scalar U = 0, meanU = 0, sigmaU = 0;
+ Scalar z = 0, r = 0;
+
+ std::string name1, name2;
+};
+
+struct test_t
+{
+ std::vector tasks;
+ std::string path;
+
+ test_t() = default;
+
+ test_t(const std::vector& tasks, std::string path)
+ : tasks(tasks), path(std::move(path))
+ {
+ }
+};
+
+double cumulativeNormal(const double x)
+{
+ // two tailed
+ return 0.5 * std::erfc(-x * M_SQRT1_2) + (1.0 - 0.5 * std::erfc(x * M_SQRT1_2));
+}
+
+man_whitney_t do_man_whitney(const std::string& pop1_path, const std::string& pop2_path, const std::vector& pop1,
+ const std::vector& pop2)
+{
+ std::vector data;
+ data.insert(data.end(), pop1.begin(), pop1.end());
+ data.insert(data.end(), pop2.begin(), pop2.end());
+
+ std::sort(data.begin(), data.end());
+
+ Scalar T1 = 0, T2 = 0;
+ const auto n1 = static_cast(pop1.size()), n2 = static_cast(pop2.size());
+
+ blt::u32 rank = 1;
+ for (auto it = data.begin(); it != data.end();)
+ {
+ const auto begin = it;
+ blt::size_t total_count = 1;
+ blt::u32 total_rank = rank++;
+ while ((it + 1) != data.end() && *begin == *(it + 1))
+ {
+ ++total_count;
+ total_rank += rank++;
+ ++it;
+ }
+ ++it;
+ for (auto it2 = begin; it2 != it; ++it2)
+ {
+ if (it2->path == pop1_path)
+ T1 += static_cast(total_rank) / static_cast(total_count);
+ else if (it2->path == pop2_path)
+ T2 += static_cast(total_rank) / static_cast(total_count);
+ else
+ BLT_ABORT(("Impossible Path " + std::string(it2->path)).c_str());
+ }
+ }
+
+ man_whitney_t man;
+ man.u1 = n1 * n2 + ((n1 * (n1 + 1)) / 2) - T1;
+ man.u2 = n1 * n2 + ((n2 * (n2 + 1)) / 2) - T2;
+ man.U = std::min(man.u1, man.u2);
+ man.meanU = (n1 * n2) / 2;
+ man.sigmaU = std::sqrt((n1 * n2 * (n1 + n2 + 1)) / 12);
+ man.z = (man.U - man.meanU) / man.sigmaU;
+ man.r = std::abs(man.z) / std::sqrt(n1 + n2);
+
+ man.name1 = pop1_path;
+ man.name2 = pop2_path;
+
+ return man;
+}
+
void action_convert(const std::vector& argv_vector)
{
blt::arg_parse parser{};
parser.setHelpExtras("convert");
+ parser.addArgument(blt::arg_builder{"--file", "-f"}
+ .setDefault("../data")
+ .setHelp("Path to data files").build());
+
auto args = parser.parse_args(argv_vector);
+
+ load_data_files(args.get("file"));
+
+ std::vector threads;
+ std::vector tasks;
+ std::mutex task_mutex;
+
+ for (auto& file : data.files)
+ {
+ for (blt::u32 i = 5; i <= 7; i++)
+ {
+ for (blt::i32 shape = 0; shape < 4; shape++)
+ {
+ auto shape_v = static_cast(shape);
+ tasks.emplace_back(std::vector{
+ task_t{&file, i, i, 2000, shape_v, init_t::COMPLETELY_RANDOM, 1.0},
+ task_t{&file, i, i, 2000, shape_v, init_t::RANDOM_DATA, 1.0},
+ task_t{&file, i, i, 2000, shape_v, init_t::SAMPLED_DATA, 1.0}
+ }, "UnUsed");
+ }
+ }
+ }
+
+ for (blt::size_t i = 0; i < std::thread::hardware_concurrency(); i++)
+ {
+ threads.emplace_back([&]()
+ {
+ while (true)
+ {
+ test_t t;
+ {
+ std::unique_lock lock(task_mutex);
+ if (tasks.empty())
+ break;
+ t = tasks.back();
+ tasks.pop_back();
+ }
+
+ std::vector paths;
+ blt::hashmap_t> data;
+ blt::hashmap_t task_data;
+ for (const auto& task : t.tasks)
+ {
+ auto path = make_path(task) + "last_topological.csv";
+ paths.push_back(path);
+ auto lines = blt::fs::getLinesFromFile(path);
+ for (const auto& line : blt::iterate(lines).skip(1))
+ data[path].emplace_back(paths.back(), std::stof(line), 0);
+ task_data[path] = &task;
+ }
+
+ std::string same = task_data.begin()->first;
+ for (const auto& task : task_data)
+ {
+ for (auto [i, c] : blt::enumerate(task.first))
+ {
+ if (i < same.length() && same[i] != task.first[i])
+ same[i] = '%';
+ }
+ }
+ auto lines = blt::string::split_sv(same, '/');
+ std::string filtered_path = "stats/";
+ blt::size_t index = 0;
+ for (const auto& [i, line] : blt::enumerate(lines))
+ {
+ if (blt::string::contains(line, '%'))
+ {
+ index = i;
+ continue;
+ }
+ filtered_path += line;
+ filtered_path += '/';
+ }
+ auto bin_line = blt::string::split(lines[0], '-');
+ auto bin_size = std::stoi(bin_line[1]);
+ std::filesystem::create_directories(filtered_path);
+ BLT_TRACE("Writing to path %s", filtered_path.c_str());
+
+ std::vector mans;
+ for (auto [i, pair] : blt::iterate(data.begin(), data.end()).enumerate())
+ {
+ for (const auto& [path2, vec2] : blt::iterate(data.begin(), data.end()).skip(i + 1))
+ mans.emplace_back(do_man_whitney(pair.first, path2, pair.second, vec2));
+ }
+
+ std::ofstream stats{filtered_path + "results_table.txt"};
+ stats << "\\begin{figure}[h!]\n\t\\centering" << std::endl;
+ stats << "\t\\makebox[\\textwidth]{\\begin{tabular}{ccc}" << std::endl << "\t\t";
+
+ for (auto [i, task] : blt::enumerate(t.tasks))
+ {
+ if (i != 0)
+ stats << " & \n\t\t";
+ stats << "\\includegraphics[width=0.4\\textwidth]{" << make_path(task) + "errors-topological" << bin_size << "}";
+ }
+ stats << "\\\\" << std::endl;
+ stats << "\t\\end{tabular}}" << std::endl;
+ stats << "\t\\caption{}\n\t\\label{fig:}" << std::endl;
+ stats << "\\end{figure}" << std::endl << std::endl;
+
+ stats << "\\begin{figure}[h!]\n\t\\centering" << std::endl;
+ stats << "\t\\makebox[\\textwidth]{\\begin{tabular}{ccc}" << std::endl << "\t\t";
+
+ for (auto [i, task] : blt::enumerate(t.tasks))
+ {
+ if (i != 0)
+ stats << " & \n\t\t";
+ stats << "\\includegraphics[width=0.4\\textwidth]{" << make_path(task) + "errors-topological" << bin_size << "}";
+ }
+ stats << "\\\\" << std::endl << "\t\t";
+ for (auto [i, task] : blt::enumerate(t.tasks))
+ {
+ if (i != 0)
+ stats << " & \n\t\t";
+ stats << "\\includegraphics[width=0.4\\textwidth]{" << make_path(task) + "errors-quantization" << bin_size << "}";
+ }
+ stats << "\\\\" << std::endl;
+ stats << "\t\\end{tabular}}" << std::endl;
+ stats << "\t\\caption{}\n\t\\label{fig:}" << std::endl;
+ stats << "\\end{figure}" << std::endl << std::endl;
+
+ stats << "\\begin{table}[h!]\n\t\\centering" << std::endl;
+ stats <<
+ "\t\\makebox[\\textwidth]{\\begin{tabular}{||m{0.3\\linewidth}|m{0.125\\linewidth}|m{0.2\\linewidth}|m{0.2\\linewidth}|m{0.15\\linewidth}||}"
+ << std::endl;
+ stats << "\t\t\\hline" << std::endl;
+ stats << "\t\tName & Z-Value & P-Value & Effect Size & Significant\\\\" << std::endl;
+ stats << "\t\t\\hline" << std::endl;
+ for (const auto& man : mans)
+ {
+ auto lines1 = blt::string::split(man.name1, '/');
+ auto lines2 = blt::string::split(man.name2, '/');
+ const auto& name1 = lines1[index];
+ const auto& name2 = lines2[index];
+
+ auto effect = man.r < 0.3 ? "Small" : (man.r < 0.5 ? "Medium" : "Large");
+ constexpr Scalar acceptance_region = 1.96;
+ auto sig = (man.z < -acceptance_region || man.z > acceptance_region) ? "Yes" : "No";
+
+ BLT_TRACE("Z: %f P: %f", man.z, cumulativeNormal(man.z));
+ stats << "\t\t" << name1 << " \\newline " << name2 << " & " << man.z << " & " << cumulativeNormal(man.z) << " & " << man.r << " ("
+ << effect << ") & " << sig << "\\\\" << std::endl;
+ stats << "\t\t\\hline" << std::endl;
+ }
+ stats << "\t\\end{tabular}}" << std::endl;
+ stats << "\t\\caption{}\n\t\\label{tbl:}" << std::endl;
+ stats << "\\end{table}" << std::endl;
+ }
+ });
+ }
+
+ for (auto& thread : threads)
+ {
+ if (thread.joinable())
+ thread.join();
+ }
}
int main(int argc, const char** argv)
diff --git a/src/som.cpp b/src/som.cpp
index 5f374c4..011de1e 100644
--- a/src/som.cpp
+++ b/src/som.cpp
@@ -48,7 +48,7 @@ namespace assign3
{
const auto v0_idx = get_closest_neuron(bins);
auto& v0 = array.get_map()[v0_idx];
- v0.update(bins, v0.dist(bins), eta);
+ // v0.update(bins, v0.dist(bins), eta);
// find the closest neighbour neuron to v0
const auto distance_min = find_closest_neighbour_distance(v0_idx);