diff --git a/CMakeLists.txt b/CMakeLists.txt index 91aecb6..142015e 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1,5 +1,5 @@ cmake_minimum_required(VERSION 3.25) -project(COSC-4P80-Assignment-3 VERSION 0.0.29) +project(COSC-4P80-Assignment-3 VERSION 0.0.30) include(FetchContent) option(ENABLE_ADDRSAN "Enable the address sanitizer" OFF) diff --git a/src/som.cpp b/src/som.cpp index 71ec6ee..71cec0e 100644 --- a/src/som.cpp +++ b/src/som.cpp @@ -26,37 +26,36 @@ namespace assign3 { - som_t::som_t(const data_file_t& file, blt::size_t width, blt::size_t height, blt::size_t max_epochs, distance_function_t* dist_func, topology_function_t* topology_function, shape_t shape, init_t init, bool normalize): - array(file.data_points.begin()->bins.size(), width, height, shape), file(file), max_epochs(max_epochs), dist_func(dist_func), - topology_function(topology_function) + array(file.data_points.begin()->bins.size(), width, height, shape), file(file), max_epochs(max_epochs), dist_func(dist_func), + topology_function(topology_function) { for (auto& v : array.get_map()) v.randomize(std::random_device{}(), init, normalize, file); compute_errors(); } - + void som_t::train_epoch(Scalar initial_learn_rate) { blt::random::random_t rand{std::random_device{}()}; std::shuffle(file.data_points.begin(), file.data_points.end(), rand); - + auto time_ratio = static_cast(current_epoch) / static_cast(max_epochs); auto eta = initial_learn_rate * std::exp(-2 * time_ratio); - + for (auto& current_data : file.data_points) { const auto v0_idx = get_closest_neuron(current_data.bins); auto& v0 = array.get_map()[v0_idx]; v0.update(current_data.bins, v0.dist(current_data.bins), eta); - + // find the closest neighbour neuron to v0 const auto distance_min = find_closest_neighbour_distance(v0_idx); // this will find the required scaling factor to make a point in the middle between v0 and its closest neighbour activate 50% // from the perspective of the gaussian function const auto scale = topology_function->scale(distance_min * 0.5f, 0.5); - + for (auto [i, n] : blt::enumerate(array.get_map())) { if (i == v0_idx) @@ -68,7 +67,7 @@ namespace assign3 current_epoch++; compute_errors(); } - + blt::size_t som_t::get_closest_neuron(const std::vector& data) { blt::size_t index = 0; @@ -84,7 +83,7 @@ namespace assign3 } return index; } - + Scalar som_t::find_closest_neighbour_distance(blt::size_t v0) { Scalar distance_min = std::numeric_limits::max(); @@ -95,102 +94,104 @@ namespace assign3 } return distance_min; } - + struct distance_data_t { Scalar data; blt::size_t index; - + distance_data_t(Scalar data, size_t index): data(data), index(index) - {} - + { + } + inline friend bool operator<(const distance_data_t& a, const distance_data_t& b) { return a.data < b.data; } - + inline friend bool operator==(const distance_data_t& a, const distance_data_t& b) { return a.data == b.data; } }; - + blt::vec2 som_t::get_topological_position(const std::vector& data) { std::vector distances; for (auto [i, d] : blt::enumerate(get_array().get_map())) distances.emplace_back(d.dist(data), i); std::sort(distances.begin(), distances.end()); - + const auto [dist_1, ni_1] = distances[0]; const auto [dist_2, ni_2] = distances[1]; const auto [dist_3, ni_3] = distances[2]; - + const float dt = dist_1 + dist_2 + dist_3; const float dp1 = dist_1 / dt; const float dp2 = dist_2 / dt; const float dp3 = dist_3 / dt; - + const auto& n_1 = array.get_map()[ni_1]; const auto& n_2 = array.get_map()[ni_2]; const auto& n_3 = array.get_map()[ni_3]; - + const auto p_1 = blt::vec2{n_1.get_x(), n_1.get_y()}; const auto p_2 = blt::vec2{n_2.get_x(), n_2.get_y()}; const auto p_3 = blt::vec2{n_3.get_x(), n_3.get_y()}; - + return (dp1 * p_1) + (dp2 * p_2) + (dp3 * p_3); } - + Scalar som_t::topological_error() { Scalar total = 0; std::vector> distances; - + for (const auto& x : file.data_points) { distances.clear(); for (const auto& [i, n] : blt::enumerate(array.get_map())) distances.emplace_back(i, n.dist(x.bins)); - + std::pair min1 = {0, std::numeric_limits::max()}; std::pair min2 = {0, std::numeric_limits::max()}; - + for (const auto& elem : distances) { if (elem.second < min1.second) { min2 = min1; min1 = elem; - } else if (elem.second < min2.second) + } + else if (elem.second < min2.second) min2 = elem; } - + // we can assert the neurons are neighbours if the distance between the BMUs and the nearest neighbour are equal. const auto min_distances = neuron_t::distance(dist_func, array.get_map()[min1.first], array.get_map()[min2.first]); auto neighbour_distances = find_closest_neighbour_distance(min1.first); - + if (!blt::f_equal(min_distances, neighbour_distances)) total += 1; } - + return total / static_cast(file.data_points.size()); } - + void som_t::compute_neuron_activations(Scalar distance, Scalar activation) { for (auto& n : array.get_map()) n.set_activation(0); - + Scalar min = std::numeric_limits::max(); Scalar max = std::numeric_limits::min(); - + for (auto [i, v] : blt::enumerate(array.get_map())) { const auto half = find_closest_neighbour_distance(i) / distance; -// auto sigma = std::sqrt(-(half * half) / (2 * std::log(requested_activation))); -// auto r = 1 / (2 * sigma * sigma); -// + // auto sigma = std::sqrt(-(half * half) / (2 * std::log(requested_activation))); + // auto r = 1 / (2 * sigma * sigma); + // const auto scale = topology_function->scale(half, activation); for (const auto& [is_bad, bins] : file.data_points) { @@ -200,7 +201,7 @@ namespace assign3 else v.activate(ds); } - + min = std::min(min, v.get_activation()); max = std::max(max, v.get_activation()); } @@ -208,28 +209,28 @@ namespace assign3 for (auto& n : array.get_map()) n.set_activation(2 * (n.get_activation() - min) / (max - min) - 1); } - + void som_t::write_activations(std::ostream& out) { out << "x,y,activation\n"; for (const auto& v : array.get_map()) out << v.get_x() << ',' << v.get_y() << ',' << v.get_activation() << '\n'; } - + void som_t::write_topology_errors(std::ostream& out) { out << "epoch,error\n"; for (auto [i, v] : blt::enumerate(topological_errors)) out << i << ',' << v << '\n'; } - + void som_t::write_quantization_errors(std::ostream& out) { out << "epoch,error\n"; for (auto [i, v] : blt::enumerate(quantization_errors)) out << i << ',' << v << '\n'; } - + void som_t::write_all_errors(std::ostream& out) { out << "epoch,topology error,quantization error\n"; @@ -239,26 +240,26 @@ namespace assign3 out << i << ',' << t << ',' << q << '\n'; } } - + Scalar som_t::quantization_error() { Scalar incorrect = 0; - + for (const auto& point : file.data_points) { const auto& nearest = array.get_map()[get_closest_neuron(point.bins)]; - + bool is_neural = nearest.get_activation() > -quantization_distance && nearest.get_activation() < quantization_distance; - + if (is_neural) { incorrect++; continue; } - + bool is_bad = nearest.get_activation() <= -quantization_distance; bool is_good = nearest.get_activation() >= quantization_distance; - + if ((is_bad && point.is_bad) || (is_good && !point.is_bad)) continue; incorrect++; @@ -266,13 +267,11 @@ namespace assign3 return incorrect; } - + void som_t::compute_errors() { compute_neuron_activations(); topological_errors.push_back(topological_error()); quantization_errors.push_back(quantization_error()); } - - -} \ No newline at end of file +}