COSC-4P80-Assignment-2/include/assign2/network.h

182 lines
7.2 KiB
C
Raw Normal View History

2024-10-21 19:25:00 -04:00
#pragma once
/*
* Copyright (C) 2024 Brett Terpstra
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <https://www.gnu.org/licenses/>.
*/
#ifndef COSC_4P80_ASSIGNMENT_2_NETWORK_H
#define COSC_4P80_ASSIGNMENT_2_NETWORK_H
#include <assign2/common.h>
#include <assign2/layer.h>
2024-10-25 01:22:32 -04:00
#include "blt/std/assert.h"
2024-10-27 18:09:37 -04:00
#include "global_magic.h"
2024-10-21 19:25:00 -04:00
namespace assign2
{
class network_t
{
public:
2024-10-23 01:51:32 -04:00
template<typename WeightFunc, typename BiasFunc>
2024-10-25 14:01:47 -04:00
network_t(blt::i32 input_size, blt::i32 output_size, blt::i32 layer_count, blt::i32 hidden_size, WeightFunc w, BiasFunc b)
2024-10-21 19:25:00 -04:00
{
2024-10-23 01:51:32 -04:00
if (layer_count > 0)
2024-10-21 19:25:00 -04:00
{
2024-10-23 01:51:32 -04:00
for (blt::i32 i = 0; i < layer_count; i++)
{
if (i == 0)
2024-10-27 18:09:37 -04:00
layers.push_back(std::make_unique<layer_t>(input_size, hidden_size, w, b));
2024-10-23 01:51:32 -04:00
else
2024-10-27 18:09:37 -04:00
layers.push_back(std::make_unique<layer_t>(hidden_size, hidden_size, w, b));
2024-10-23 01:51:32 -04:00
}
2024-10-27 18:09:37 -04:00
layers.push_back(std::make_unique<layer_t>(hidden_size, output_size, w, b));
2024-10-21 19:25:00 -04:00
} else
2024-10-23 01:51:32 -04:00
{
2024-10-27 18:09:37 -04:00
layers.push_back(std::make_unique<layer_t>(input_size, output_size, w, b));
2024-10-23 01:51:32 -04:00
}
}
template<typename WeightFunc, typename BiasFunc, typename OutputWeightFunc, typename OutputBiasFunc>
network_t(blt::i32 input_size, blt::i32 output_size, blt::i32 layer_count, blt::i32 hidden_size,
2024-10-25 14:01:47 -04:00
WeightFunc w, BiasFunc b, OutputWeightFunc ow, OutputBiasFunc ob)
2024-10-23 01:51:32 -04:00
{
if (layer_count > 0)
{
for (blt::i32 i = 0; i < layer_count; i++)
{
if (i == 0)
2024-10-27 18:09:37 -04:00
layers.push_back(std::make_unique<layer_t>(input_size, hidden_size, w, b));
2024-10-23 01:51:32 -04:00
else
2024-10-27 18:09:37 -04:00
layers.push_back(std::make_unique<layer_t>(hidden_size, hidden_size, w, b));
2024-10-23 01:51:32 -04:00
}
2024-10-27 18:09:37 -04:00
layers.push_back(std::make_unique<layer_t>(hidden_size, output_size, ow, ob));
2024-10-23 01:51:32 -04:00
} else
{
2024-10-27 18:09:37 -04:00
layers.push_back(std::make_unique<layer_t>(input_size, output_size, ow, ob));
2024-10-23 01:51:32 -04:00
}
}
2024-10-27 18:09:37 -04:00
explicit network_t(std::vector<std::unique_ptr<layer_t>> layers): layers(std::move(layers))
2024-10-23 01:51:32 -04:00
{}
network_t() = default;
2024-10-25 14:01:47 -04:00
const std::vector<Scalar>& execute(const std::vector<Scalar>& input)
2024-10-23 01:51:32 -04:00
{
2024-10-25 14:01:47 -04:00
std::vector<blt::ref<const std::vector<Scalar>>> outputs;
outputs.emplace_back(input);
2024-10-23 01:51:32 -04:00
2024-10-28 01:55:13 -04:00
for (auto [i, v] : blt::enumerate(layers))
2024-10-27 18:09:37 -04:00
outputs.emplace_back(v->call(outputs.back()));
2024-10-23 01:51:32 -04:00
2024-10-25 14:01:47 -04:00
return outputs.back();
2024-10-23 01:51:32 -04:00
}
2024-10-28 01:55:13 -04:00
error_data_t error(const data_file_t& data)
2024-10-25 01:22:32 -04:00
{
2024-10-23 01:51:32 -04:00
Scalar total_error = 0;
2024-10-25 01:22:32 -04:00
Scalar total_d_error = 0;
2024-10-28 01:55:13 -04:00
for (auto& d : data.data_points)
2024-10-23 01:51:32 -04:00
{
2024-10-28 01:55:13 -04:00
std::vector<Scalar> expected{d.is_bad ? 0.0f : 1.0f, d.is_bad ? 1.0f : 0.0f};
auto out = execute(d.bins);
2024-10-25 14:01:47 -04:00
2024-10-28 01:55:13 -04:00
BLT_ASSERT(out.size() == expected.size());
for (auto [o, e] : blt::in_pairs(out, expected))
2024-10-25 14:01:47 -04:00
{
2024-10-28 01:55:13 -04:00
auto d_error = o - e;
2024-10-28 23:12:19 -04:00
2024-10-28 01:55:13 -04:00
auto error = 0.5f * (d_error * d_error);
2024-10-28 23:12:19 -04:00
total_error += error;
total_d_error += d_error;
2024-10-25 14:01:47 -04:00
}
2024-10-23 01:51:32 -04:00
}
2024-10-25 01:22:32 -04:00
2024-10-28 01:55:13 -04:00
return {total_error / static_cast<Scalar>(data.data_points.size()), total_d_error / static_cast<Scalar>(data.data_points.size())};
}
2024-10-28 23:12:19 -04:00
error_data_t train(const data_t& data, bool reset)
2024-10-28 01:55:13 -04:00
{
error_data_t error = {0, 0};
execute(data.bins);
std::vector<Scalar> expected{data.is_bad ? 0.0f : 1.0f, data.is_bad ? 1.0f : 0.0f};
for (auto [i, layer] : blt::iterate(layers).enumerate().rev())
{
if (i == layers.size() - 1)
{
error += layer->back_prop(layers[i - 1]->outputs, expected);
} else if (i == 0)
{
error += layer->back_prop(data.bins, *layers[i + 1]);
} else
{
error += layer->back_prop(layers[i - 1]->outputs, *layers[i + 1]);
}
}
for (auto& l : layers)
2024-10-28 23:12:19 -04:00
l->update(m_omega, reset);
// BLT_TRACE("Error for input: %f, derr: %f", error.error, error.d_error);
2024-10-28 01:55:13 -04:00
return error;
}
2024-10-28 23:12:19 -04:00
error_data_t train_epoch(const data_file_t& example, blt::i32 trains_per_data = 1)
2024-10-28 01:55:13 -04:00
{
2024-10-28 23:12:19 -04:00
error_data_t error{0, 0};
2024-10-28 01:55:13 -04:00
for (const auto& x : example.data_points)
2024-10-28 23:12:19 -04:00
{
for (blt::i32 i = 0; i < trains_per_data; i++)
error += train(x, reset_next);
}
// take the average cost over all the training.
error.d_error /= static_cast<Scalar>(example.data_points.size() * trains_per_data);
error.error /= static_cast<Scalar>(example.data_points.size() * trains_per_data);
// as long as we are reducing error in the same direction in overall terms, we should still build momentum.
auto last_sign = last_d_error >= 0;
auto cur_sign = error.d_error >= 0;
last_d_error = error.d_error;
reset_next = last_sign != cur_sign;
2024-10-28 01:55:13 -04:00
return error;
2024-10-21 19:25:00 -04:00
}
2024-10-28 23:12:19 -04:00
void with_momentum(Scalar* omega)
{
m_omega = omega;
}
2024-10-27 18:09:37 -04:00
#ifdef BLT_USE_GRAPHICS
2024-10-28 01:55:13 -04:00
void render(blt::gfx::batch_renderer_2d& renderer) const
2024-10-27 18:09:37 -04:00
{
for (auto& l : layers)
2024-10-28 01:55:13 -04:00
l->render(renderer);
2024-10-27 18:09:37 -04:00
}
#endif
2024-10-21 19:25:00 -04:00
private:
2024-10-28 23:12:19 -04:00
// pointer so it can be changed from the UI
Scalar* m_omega = nullptr;
Scalar last_d_error = 0;
bool reset_next = false;
2024-10-27 18:09:37 -04:00
std::vector<std::unique_ptr<layer_t>> layers;
2024-10-21 19:25:00 -04:00
};
}
#endif //COSC_4P80_ASSIGNMENT_2_NETWORK_H