idk what changed
parent
f5c79b5bb4
commit
bd9d08b4ea
|
@ -1,5 +1,5 @@
|
||||||
cmake_minimum_required(VERSION 3.25)
|
cmake_minimum_required(VERSION 3.25)
|
||||||
project(COSC-4P80-Assignment-3 VERSION 0.0.29)
|
project(COSC-4P80-Assignment-3 VERSION 0.0.30)
|
||||||
include(FetchContent)
|
include(FetchContent)
|
||||||
|
|
||||||
option(ENABLE_ADDRSAN "Enable the address sanitizer" OFF)
|
option(ENABLE_ADDRSAN "Enable the address sanitizer" OFF)
|
||||||
|
|
99
src/som.cpp
99
src/som.cpp
|
@ -26,37 +26,36 @@
|
||||||
|
|
||||||
namespace assign3
|
namespace assign3
|
||||||
{
|
{
|
||||||
|
|
||||||
som_t::som_t(const data_file_t& file, blt::size_t width, blt::size_t height, blt::size_t max_epochs, distance_function_t* dist_func,
|
som_t::som_t(const data_file_t& file, blt::size_t width, blt::size_t height, blt::size_t max_epochs, distance_function_t* dist_func,
|
||||||
topology_function_t* topology_function, shape_t shape, init_t init, bool normalize):
|
topology_function_t* topology_function, shape_t shape, init_t init, bool normalize):
|
||||||
array(file.data_points.begin()->bins.size(), width, height, shape), file(file), max_epochs(max_epochs), dist_func(dist_func),
|
array(file.data_points.begin()->bins.size(), width, height, shape), file(file), max_epochs(max_epochs), dist_func(dist_func),
|
||||||
topology_function(topology_function)
|
topology_function(topology_function)
|
||||||
{
|
{
|
||||||
for (auto& v : array.get_map())
|
for (auto& v : array.get_map())
|
||||||
v.randomize(std::random_device{}(), init, normalize, file);
|
v.randomize(std::random_device{}(), init, normalize, file);
|
||||||
compute_errors();
|
compute_errors();
|
||||||
}
|
}
|
||||||
|
|
||||||
void som_t::train_epoch(Scalar initial_learn_rate)
|
void som_t::train_epoch(Scalar initial_learn_rate)
|
||||||
{
|
{
|
||||||
blt::random::random_t rand{std::random_device{}()};
|
blt::random::random_t rand{std::random_device{}()};
|
||||||
std::shuffle(file.data_points.begin(), file.data_points.end(), rand);
|
std::shuffle(file.data_points.begin(), file.data_points.end(), rand);
|
||||||
|
|
||||||
auto time_ratio = static_cast<Scalar>(current_epoch) / static_cast<Scalar>(max_epochs);
|
auto time_ratio = static_cast<Scalar>(current_epoch) / static_cast<Scalar>(max_epochs);
|
||||||
auto eta = initial_learn_rate * std::exp(-2 * time_ratio);
|
auto eta = initial_learn_rate * std::exp(-2 * time_ratio);
|
||||||
|
|
||||||
for (auto& current_data : file.data_points)
|
for (auto& current_data : file.data_points)
|
||||||
{
|
{
|
||||||
const auto v0_idx = get_closest_neuron(current_data.bins);
|
const auto v0_idx = get_closest_neuron(current_data.bins);
|
||||||
auto& v0 = array.get_map()[v0_idx];
|
auto& v0 = array.get_map()[v0_idx];
|
||||||
v0.update(current_data.bins, v0.dist(current_data.bins), eta);
|
v0.update(current_data.bins, v0.dist(current_data.bins), eta);
|
||||||
|
|
||||||
// find the closest neighbour neuron to v0
|
// find the closest neighbour neuron to v0
|
||||||
const auto distance_min = find_closest_neighbour_distance(v0_idx);
|
const auto distance_min = find_closest_neighbour_distance(v0_idx);
|
||||||
// this will find the required scaling factor to make a point in the middle between v0 and its closest neighbour activate 50%
|
// this will find the required scaling factor to make a point in the middle between v0 and its closest neighbour activate 50%
|
||||||
// from the perspective of the gaussian function
|
// from the perspective of the gaussian function
|
||||||
const auto scale = topology_function->scale(distance_min * 0.5f, 0.5);
|
const auto scale = topology_function->scale(distance_min * 0.5f, 0.5);
|
||||||
|
|
||||||
for (auto [i, n] : blt::enumerate(array.get_map()))
|
for (auto [i, n] : blt::enumerate(array.get_map()))
|
||||||
{
|
{
|
||||||
if (i == v0_idx)
|
if (i == v0_idx)
|
||||||
|
@ -68,7 +67,7 @@ namespace assign3
|
||||||
current_epoch++;
|
current_epoch++;
|
||||||
compute_errors();
|
compute_errors();
|
||||||
}
|
}
|
||||||
|
|
||||||
blt::size_t som_t::get_closest_neuron(const std::vector<Scalar>& data)
|
blt::size_t som_t::get_closest_neuron(const std::vector<Scalar>& data)
|
||||||
{
|
{
|
||||||
blt::size_t index = 0;
|
blt::size_t index = 0;
|
||||||
|
@ -84,7 +83,7 @@ namespace assign3
|
||||||
}
|
}
|
||||||
return index;
|
return index;
|
||||||
}
|
}
|
||||||
|
|
||||||
Scalar som_t::find_closest_neighbour_distance(blt::size_t v0)
|
Scalar som_t::find_closest_neighbour_distance(blt::size_t v0)
|
||||||
{
|
{
|
||||||
Scalar distance_min = std::numeric_limits<Scalar>::max();
|
Scalar distance_min = std::numeric_limits<Scalar>::max();
|
||||||
|
@ -95,102 +94,104 @@ namespace assign3
|
||||||
}
|
}
|
||||||
return distance_min;
|
return distance_min;
|
||||||
}
|
}
|
||||||
|
|
||||||
struct distance_data_t
|
struct distance_data_t
|
||||||
{
|
{
|
||||||
Scalar data;
|
Scalar data;
|
||||||
blt::size_t index;
|
blt::size_t index;
|
||||||
|
|
||||||
distance_data_t(Scalar data, size_t index): data(data), index(index)
|
distance_data_t(Scalar data, size_t index): data(data), index(index)
|
||||||
{}
|
{
|
||||||
|
}
|
||||||
|
|
||||||
inline friend bool operator<(const distance_data_t& a, const distance_data_t& b)
|
inline friend bool operator<(const distance_data_t& a, const distance_data_t& b)
|
||||||
{
|
{
|
||||||
return a.data < b.data;
|
return a.data < b.data;
|
||||||
}
|
}
|
||||||
|
|
||||||
inline friend bool operator==(const distance_data_t& a, const distance_data_t& b)
|
inline friend bool operator==(const distance_data_t& a, const distance_data_t& b)
|
||||||
{
|
{
|
||||||
return a.data == b.data;
|
return a.data == b.data;
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
blt::vec2 som_t::get_topological_position(const std::vector<Scalar>& data)
|
blt::vec2 som_t::get_topological_position(const std::vector<Scalar>& data)
|
||||||
{
|
{
|
||||||
std::vector<distance_data_t> distances;
|
std::vector<distance_data_t> distances;
|
||||||
for (auto [i, d] : blt::enumerate(get_array().get_map()))
|
for (auto [i, d] : blt::enumerate(get_array().get_map()))
|
||||||
distances.emplace_back(d.dist(data), i);
|
distances.emplace_back(d.dist(data), i);
|
||||||
std::sort(distances.begin(), distances.end());
|
std::sort(distances.begin(), distances.end());
|
||||||
|
|
||||||
const auto [dist_1, ni_1] = distances[0];
|
const auto [dist_1, ni_1] = distances[0];
|
||||||
const auto [dist_2, ni_2] = distances[1];
|
const auto [dist_2, ni_2] = distances[1];
|
||||||
const auto [dist_3, ni_3] = distances[2];
|
const auto [dist_3, ni_3] = distances[2];
|
||||||
|
|
||||||
const float dt = dist_1 + dist_2 + dist_3;
|
const float dt = dist_1 + dist_2 + dist_3;
|
||||||
const float dp1 = dist_1 / dt;
|
const float dp1 = dist_1 / dt;
|
||||||
const float dp2 = dist_2 / dt;
|
const float dp2 = dist_2 / dt;
|
||||||
const float dp3 = dist_3 / dt;
|
const float dp3 = dist_3 / dt;
|
||||||
|
|
||||||
const auto& n_1 = array.get_map()[ni_1];
|
const auto& n_1 = array.get_map()[ni_1];
|
||||||
const auto& n_2 = array.get_map()[ni_2];
|
const auto& n_2 = array.get_map()[ni_2];
|
||||||
const auto& n_3 = array.get_map()[ni_3];
|
const auto& n_3 = array.get_map()[ni_3];
|
||||||
|
|
||||||
const auto p_1 = blt::vec2{n_1.get_x(), n_1.get_y()};
|
const auto p_1 = blt::vec2{n_1.get_x(), n_1.get_y()};
|
||||||
const auto p_2 = blt::vec2{n_2.get_x(), n_2.get_y()};
|
const auto p_2 = blt::vec2{n_2.get_x(), n_2.get_y()};
|
||||||
const auto p_3 = blt::vec2{n_3.get_x(), n_3.get_y()};
|
const auto p_3 = blt::vec2{n_3.get_x(), n_3.get_y()};
|
||||||
|
|
||||||
return (dp1 * p_1) + (dp2 * p_2) + (dp3 * p_3);
|
return (dp1 * p_1) + (dp2 * p_2) + (dp3 * p_3);
|
||||||
}
|
}
|
||||||
|
|
||||||
Scalar som_t::topological_error()
|
Scalar som_t::topological_error()
|
||||||
{
|
{
|
||||||
Scalar total = 0;
|
Scalar total = 0;
|
||||||
std::vector<std::pair<blt::size_t, Scalar>> distances;
|
std::vector<std::pair<blt::size_t, Scalar>> distances;
|
||||||
|
|
||||||
for (const auto& x : file.data_points)
|
for (const auto& x : file.data_points)
|
||||||
{
|
{
|
||||||
distances.clear();
|
distances.clear();
|
||||||
for (const auto& [i, n] : blt::enumerate(array.get_map()))
|
for (const auto& [i, n] : blt::enumerate(array.get_map()))
|
||||||
distances.emplace_back(i, n.dist(x.bins));
|
distances.emplace_back(i, n.dist(x.bins));
|
||||||
|
|
||||||
std::pair<blt::size_t, Scalar> min1 = {0, std::numeric_limits<Scalar>::max()};
|
std::pair<blt::size_t, Scalar> min1 = {0, std::numeric_limits<Scalar>::max()};
|
||||||
std::pair<blt::size_t, Scalar> min2 = {0, std::numeric_limits<Scalar>::max()};
|
std::pair<blt::size_t, Scalar> min2 = {0, std::numeric_limits<Scalar>::max()};
|
||||||
|
|
||||||
for (const auto& elem : distances)
|
for (const auto& elem : distances)
|
||||||
{
|
{
|
||||||
if (elem.second < min1.second)
|
if (elem.second < min1.second)
|
||||||
{
|
{
|
||||||
min2 = min1;
|
min2 = min1;
|
||||||
min1 = elem;
|
min1 = elem;
|
||||||
} else if (elem.second < min2.second)
|
}
|
||||||
|
else if (elem.second < min2.second)
|
||||||
min2 = elem;
|
min2 = elem;
|
||||||
}
|
}
|
||||||
|
|
||||||
// we can assert the neurons are neighbours if the distance between the BMUs and the nearest neighbour are equal.
|
// we can assert the neurons are neighbours if the distance between the BMUs and the nearest neighbour are equal.
|
||||||
const auto min_distances = neuron_t::distance(dist_func, array.get_map()[min1.first], array.get_map()[min2.first]);
|
const auto min_distances = neuron_t::distance(dist_func, array.get_map()[min1.first], array.get_map()[min2.first]);
|
||||||
auto neighbour_distances = find_closest_neighbour_distance(min1.first);
|
auto neighbour_distances = find_closest_neighbour_distance(min1.first);
|
||||||
|
|
||||||
if (!blt::f_equal(min_distances, neighbour_distances))
|
if (!blt::f_equal(min_distances, neighbour_distances))
|
||||||
total += 1;
|
total += 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
return total / static_cast<Scalar>(file.data_points.size());
|
return total / static_cast<Scalar>(file.data_points.size());
|
||||||
}
|
}
|
||||||
|
|
||||||
void som_t::compute_neuron_activations(Scalar distance, Scalar activation)
|
void som_t::compute_neuron_activations(Scalar distance, Scalar activation)
|
||||||
{
|
{
|
||||||
for (auto& n : array.get_map())
|
for (auto& n : array.get_map())
|
||||||
n.set_activation(0);
|
n.set_activation(0);
|
||||||
|
|
||||||
Scalar min = std::numeric_limits<Scalar>::max();
|
Scalar min = std::numeric_limits<Scalar>::max();
|
||||||
Scalar max = std::numeric_limits<Scalar>::min();
|
Scalar max = std::numeric_limits<Scalar>::min();
|
||||||
|
|
||||||
for (auto [i, v] : blt::enumerate(array.get_map()))
|
for (auto [i, v] : blt::enumerate(array.get_map()))
|
||||||
{
|
{
|
||||||
const auto half = find_closest_neighbour_distance(i) / distance;
|
const auto half = find_closest_neighbour_distance(i) / distance;
|
||||||
// auto sigma = std::sqrt(-(half * half) / (2 * std::log(requested_activation)));
|
// auto sigma = std::sqrt(-(half * half) / (2 * std::log(requested_activation)));
|
||||||
// auto r = 1 / (2 * sigma * sigma);
|
// auto r = 1 / (2 * sigma * sigma);
|
||||||
//
|
//
|
||||||
const auto scale = topology_function->scale(half, activation);
|
const auto scale = topology_function->scale(half, activation);
|
||||||
for (const auto& [is_bad, bins] : file.data_points)
|
for (const auto& [is_bad, bins] : file.data_points)
|
||||||
{
|
{
|
||||||
|
@ -200,7 +201,7 @@ namespace assign3
|
||||||
else
|
else
|
||||||
v.activate(ds);
|
v.activate(ds);
|
||||||
}
|
}
|
||||||
|
|
||||||
min = std::min(min, v.get_activation());
|
min = std::min(min, v.get_activation());
|
||||||
max = std::max(max, v.get_activation());
|
max = std::max(max, v.get_activation());
|
||||||
}
|
}
|
||||||
|
@ -208,28 +209,28 @@ namespace assign3
|
||||||
for (auto& n : array.get_map())
|
for (auto& n : array.get_map())
|
||||||
n.set_activation(2 * (n.get_activation() - min) / (max - min) - 1);
|
n.set_activation(2 * (n.get_activation() - min) / (max - min) - 1);
|
||||||
}
|
}
|
||||||
|
|
||||||
void som_t::write_activations(std::ostream& out)
|
void som_t::write_activations(std::ostream& out)
|
||||||
{
|
{
|
||||||
out << "x,y,activation\n";
|
out << "x,y,activation\n";
|
||||||
for (const auto& v : array.get_map())
|
for (const auto& v : array.get_map())
|
||||||
out << v.get_x() << ',' << v.get_y() << ',' << v.get_activation() << '\n';
|
out << v.get_x() << ',' << v.get_y() << ',' << v.get_activation() << '\n';
|
||||||
}
|
}
|
||||||
|
|
||||||
void som_t::write_topology_errors(std::ostream& out)
|
void som_t::write_topology_errors(std::ostream& out)
|
||||||
{
|
{
|
||||||
out << "epoch,error\n";
|
out << "epoch,error\n";
|
||||||
for (auto [i, v] : blt::enumerate(topological_errors))
|
for (auto [i, v] : blt::enumerate(topological_errors))
|
||||||
out << i << ',' << v << '\n';
|
out << i << ',' << v << '\n';
|
||||||
}
|
}
|
||||||
|
|
||||||
void som_t::write_quantization_errors(std::ostream& out)
|
void som_t::write_quantization_errors(std::ostream& out)
|
||||||
{
|
{
|
||||||
out << "epoch,error\n";
|
out << "epoch,error\n";
|
||||||
for (auto [i, v] : blt::enumerate(quantization_errors))
|
for (auto [i, v] : blt::enumerate(quantization_errors))
|
||||||
out << i << ',' << v << '\n';
|
out << i << ',' << v << '\n';
|
||||||
}
|
}
|
||||||
|
|
||||||
void som_t::write_all_errors(std::ostream& out)
|
void som_t::write_all_errors(std::ostream& out)
|
||||||
{
|
{
|
||||||
out << "epoch,topology error,quantization error\n";
|
out << "epoch,topology error,quantization error\n";
|
||||||
|
@ -239,26 +240,26 @@ namespace assign3
|
||||||
out << i << ',' << t << ',' << q << '\n';
|
out << i << ',' << t << ',' << q << '\n';
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
Scalar som_t::quantization_error()
|
Scalar som_t::quantization_error()
|
||||||
{
|
{
|
||||||
Scalar incorrect = 0;
|
Scalar incorrect = 0;
|
||||||
|
|
||||||
for (const auto& point : file.data_points)
|
for (const auto& point : file.data_points)
|
||||||
{
|
{
|
||||||
const auto& nearest = array.get_map()[get_closest_neuron(point.bins)];
|
const auto& nearest = array.get_map()[get_closest_neuron(point.bins)];
|
||||||
|
|
||||||
bool is_neural = nearest.get_activation() > -quantization_distance && nearest.get_activation() < quantization_distance;
|
bool is_neural = nearest.get_activation() > -quantization_distance && nearest.get_activation() < quantization_distance;
|
||||||
|
|
||||||
if (is_neural)
|
if (is_neural)
|
||||||
{
|
{
|
||||||
incorrect++;
|
incorrect++;
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
bool is_bad = nearest.get_activation() <= -quantization_distance;
|
bool is_bad = nearest.get_activation() <= -quantization_distance;
|
||||||
bool is_good = nearest.get_activation() >= quantization_distance;
|
bool is_good = nearest.get_activation() >= quantization_distance;
|
||||||
|
|
||||||
if ((is_bad && point.is_bad) || (is_good && !point.is_bad))
|
if ((is_bad && point.is_bad) || (is_good && !point.is_bad))
|
||||||
continue;
|
continue;
|
||||||
incorrect++;
|
incorrect++;
|
||||||
|
@ -266,13 +267,11 @@ namespace assign3
|
||||||
|
|
||||||
return incorrect;
|
return incorrect;
|
||||||
}
|
}
|
||||||
|
|
||||||
void som_t::compute_errors()
|
void som_t::compute_errors()
|
||||||
{
|
{
|
||||||
compute_neuron_activations();
|
compute_neuron_activations();
|
||||||
topological_errors.push_back(topological_error());
|
topological_errors.push_back(topological_error());
|
||||||
quantization_errors.push_back(quantization_error());
|
quantization_errors.push_back(quantization_error());
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
}
|
|
||||||
|
|
Loading…
Reference in New Issue