COSC-4P80-Assignment-2/include/assign2/layer.h

213 lines
7.5 KiB
C
Raw Normal View History

2024-10-21 16:42:03 -04:00
#pragma once
/*
* Copyright (C) 2024 Brett Terpstra
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <https://www.gnu.org/licenses/>.
*/
#ifndef COSC_4P80_ASSIGNMENT_2_LAYER_H
#define COSC_4P80_ASSIGNMENT_2_LAYER_H
#include <blt/std/types.h>
2024-10-21 19:25:00 -04:00
#include <assign2/initializers.h>
2024-10-23 01:51:32 -04:00
#include "blt/iterator/zip.h"
#include "blt/iterator/iterator.h"
2024-10-21 16:42:03 -04:00
namespace assign2
{
2024-10-23 01:51:32 -04:00
class neuron_t
2024-10-21 16:42:03 -04:00
{
2024-10-25 01:22:32 -04:00
friend layer_t;
2024-10-21 16:42:03 -04:00
public:
2024-10-23 01:51:32 -04:00
// empty neuron for loading from a stream
2024-10-25 14:01:47 -04:00
explicit neuron_t(weight_view weights, weight_view dw): dw(dw), weights(weights)
2024-10-23 01:51:32 -04:00
{}
// neuron with bias
2024-10-25 14:01:47 -04:00
explicit neuron_t(weight_view weights, weight_view dw, Scalar bias): bias(bias), dw(dw), weights(weights)
2024-10-21 16:42:03 -04:00
{}
2024-10-25 01:22:32 -04:00
Scalar activate(const Scalar* inputs, function_t* act_func)
2024-10-21 19:25:00 -04:00
{
2024-10-25 01:22:32 -04:00
z = bias;
2024-10-23 01:51:32 -04:00
for (auto [x, w] : blt::zip_iterator_container({inputs, inputs + weights.size()}, {weights.begin(), weights.end()}))
2024-10-25 01:22:32 -04:00
z += x * w;
a = act_func->call(z);
return a;
2024-10-21 19:25:00 -04:00
}
2024-10-25 14:01:47 -04:00
void back_prop(function_t* act, const std::vector<Scalar>& previous_outputs, Scalar next_error)
{
// delta for weights
error = act->derivative(z) * next_error;
for (auto [prev_out, d_weight] : blt::zip(previous_outputs, dw))
{
// dw / apply dw
d_weight = learn_rate * prev_out * error;
}
}
void update()
{
for (auto [w, d] : blt::in_pairs(weights, dw))
w += d;
}
2024-10-23 01:51:32 -04:00
template<typename OStream>
OStream& serialize(OStream& stream)
2024-10-21 16:42:03 -04:00
{
2024-10-23 01:51:32 -04:00
stream << bias;
for (auto d : weights)
stream << d;
}
template<typename IStream>
IStream& deserialize(IStream& stream)
{
for (auto& d : blt::iterate(weights).rev())
stream >> d;
stream >> bias;
2024-10-21 16:42:03 -04:00
}
2024-10-25 01:22:32 -04:00
void debug() const
{
std::cout << bias << " ";
}
2024-10-21 16:42:03 -04:00
2024-10-23 01:51:32 -04:00
private:
2024-10-25 01:22:32 -04:00
float z = 0;
float a = 0;
float bias = 0;
float error = 0;
2024-10-25 14:01:47 -04:00
weight_view dw;
2024-10-23 01:51:32 -04:00
weight_view weights;
};
class layer_t
{
2024-10-25 14:01:47 -04:00
friend network_t;
2024-10-23 01:51:32 -04:00
public:
template<typename WeightFunc, typename BiasFunc>
2024-10-25 01:22:32 -04:00
layer_t(const blt::i32 in, const blt::i32 out, function_t* act_func, WeightFunc w, BiasFunc b):
in_size(in), out_size(out), act_func(act_func)
2024-10-23 01:51:32 -04:00
{
neurons.reserve(out_size);
for (blt::i32 i = 0; i < out_size; i++)
{
auto weight = weights.allocate_view(in_size);
2024-10-25 14:01:47 -04:00
auto dw = weight_derivatives.allocate_view(in_size);
2024-10-23 01:51:32 -04:00
for (auto& v : weight)
v = w(i);
2024-10-25 14:01:47 -04:00
neurons.push_back(neuron_t{weight, dw, b(i)});
2024-10-23 01:51:32 -04:00
}
}
2024-10-25 14:01:47 -04:00
const std::vector<Scalar>& call(const std::vector<Scalar>& in)
2024-10-23 01:51:32 -04:00
{
2024-10-25 14:01:47 -04:00
outputs.clear();
outputs.reserve(out_size);
2024-10-23 01:51:32 -04:00
#if BLT_DEBUG_LEVEL > 0
if (in.size() != in_size)
throw std::runtime_exception("Input vector doesn't match expected input size!");
#endif
for (auto& n : neurons)
2024-10-25 14:01:47 -04:00
outputs.push_back(n.activate(in.data(), act_func));
return outputs;
2024-10-23 01:51:32 -04:00
}
2024-10-25 14:01:47 -04:00
Scalar back_prop(const std::vector<Scalar>& prev_layer_output,
const std::variant<blt::ref<const std::vector<Scalar>>, blt::ref<const layer_t>>& data)
2024-10-25 01:22:32 -04:00
{
2024-10-25 14:01:47 -04:00
return std::visit(blt::lambda_visitor{
// is provided if we are an output layer, contains output of this net (per neuron) and the expected output (per neuron)
[this, &prev_layer_output](const std::vector<Scalar>& expected) {
Scalar total_error = 0;
for (auto [i, n] : blt::enumerate(neurons))
{
auto d = outputs[i] - expected[i];
auto d2 = 0.5f * (d * d);
total_error += d2;
n.back_prop(act_func, prev_layer_output, d2);
}
return total_error;
},
// interior layer
[this, &prev_layer_output](const layer_t& layer) {
Scalar total_error = 0;
for (auto [i, n] : blt::enumerate(neurons))
{
Scalar weight_error = 0;
// TODO: this is not efficient on the cache!
for (auto nn : layer.neurons)
weight_error += nn.error * nn.weights[i];
Scalar w2 = 0.5f * weight_error * weight_error;
total_error += w2;
n.back_prop(act_func, prev_layer_output, w2);
}
return total_error;
}
}, data);
}
void update()
{
for (auto& n : neurons)
n.update();
2024-10-25 01:22:32 -04:00
}
2024-10-23 01:51:32 -04:00
template<typename OStream>
OStream& serialize(OStream& stream)
{
for (auto d : neurons)
stream << d;
}
template<typename IStream>
IStream& deserialize(IStream& stream)
{
for (auto& d : blt::iterate(neurons).rev())
stream >> d;
}
[[nodiscard]] inline blt::i32 get_in_size() const
{
return in_size;
}
[[nodiscard]] inline blt::i32 get_out_size() const
{
return out_size;
}
2024-10-25 01:22:32 -04:00
void debug() const
{
std::cout << "Bias: ";
for (auto& v : neurons)
v.debug();
std::cout << std::endl;
weights.debug();
}
2024-10-21 16:42:03 -04:00
private:
const blt::i32 in_size, out_size;
2024-10-23 01:51:32 -04:00
weight_t weights;
2024-10-25 14:01:47 -04:00
weight_t weight_derivatives;
2024-10-25 01:22:32 -04:00
function_t* act_func;
2024-10-23 01:51:32 -04:00
std::vector<neuron_t> neurons;
2024-10-25 14:01:47 -04:00
std::vector<Scalar> outputs;
2024-10-21 16:42:03 -04:00
};
}
#endif //COSC_4P80_ASSIGNMENT_2_LAYER_H