diff --git a/.idea/vcs.xml b/.idea/vcs.xml
index ebc05d3..8bc3e1c 100644
--- a/.idea/vcs.xml
+++ b/.idea/vcs.xml
@@ -4,6 +4,10 @@
+
+
+
+
\ No newline at end of file
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 073ccc3..d60ed54 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -1,5 +1,5 @@
cmake_minimum_required(VERSION 3.25)
-project(COSC-4P80-Assignment-2 VERSION 0.0.5)
+project(COSC-4P80-Assignment-2 VERSION 0.0.6)
option(ENABLE_ADDRSAN "Enable the address sanitizer" OFF)
option(ENABLE_UBSAN "Enable the ub sanitizer" OFF)
diff --git a/commit.py b/commit.py
index fb9d5cc..8131892 100755
--- a/commit.py
+++ b/commit.py
@@ -25,7 +25,7 @@ ENVIRONMENT_DATA_LOCATION = USER_HOME / ".brett_scripts.env"
if sys.platform.startswith("win"):
CONFIG_FILE_DIRECTORY = Path(os.getenv('APPDATA') + "\BLT")
- CONFIG_FILE_LOCATION = Path(CONFIG_FILE_DIRECTORY + "\commit_config.env")
+ CONFIG_FILE_LOCATION = Path(CONFIG_FILE_DIRECTORY + "\commit_config.json")
else:
XDG_CONFIG_HOME = os.environ.get('XDG_CONFIG_HOME')
if XDG_CONFIG_HOME is None:
@@ -36,7 +36,7 @@ else:
if len(str(XDG_CONFIG_HOME)) == 0:
XDG_CONFIG_HOME = USER_HOME
CONFIG_FILE_DIRECTORY = XDG_CONFIG_HOME / "blt"
- CONFIG_FILE_LOCATION = CONFIG_FILE_DIRECTORY / "commit_config.env"
+ CONFIG_FILE_LOCATION = CONFIG_FILE_DIRECTORY / "commit_config.json"
class Config:
def __init__(self):
diff --git a/include/assign2/layer.h b/include/assign2/layer.h
index 03ec2d4..43c84ef 100644
--- a/include/assign2/layer.h
+++ b/include/assign2/layer.h
@@ -31,11 +31,11 @@ namespace assign2
friend layer_t;
public:
// empty neuron for loading from a stream
- explicit neuron_t(weight_view weights): weights(weights)
+ explicit neuron_t(weight_view weights, weight_view dw): dw(dw), weights(weights)
{}
// neuron with bias
- explicit neuron_t(weight_view weights, Scalar bias): bias(bias), weights(weights)
+ explicit neuron_t(weight_view weights, weight_view dw, Scalar bias): bias(bias), dw(dw), weights(weights)
{}
Scalar activate(const Scalar* inputs, function_t* act_func)
@@ -47,6 +47,23 @@ namespace assign2
return a;
}
+ void back_prop(function_t* act, const std::vector& previous_outputs, Scalar next_error)
+ {
+ // delta for weights
+ error = act->derivative(z) * next_error;
+ for (auto [prev_out, d_weight] : blt::zip(previous_outputs, dw))
+ {
+ // dw / apply dw
+ d_weight = learn_rate * prev_out * error;
+ }
+ }
+
+ void update()
+ {
+ for (auto [w, d] : blt::in_pairs(weights, dw))
+ w += d;
+ }
+
template
OStream& serialize(OStream& stream)
{
@@ -73,11 +90,13 @@ namespace assign2
float a = 0;
float bias = 0;
float error = 0;
+ weight_view dw;
weight_view weights;
};
class layer_t
{
+ friend network_t;
public:
template
layer_t(const blt::i32 in, const blt::i32 out, function_t* act_func, WeightFunc w, BiasFunc b):
@@ -87,62 +106,64 @@ namespace assign2
for (blt::i32 i = 0; i < out_size; i++)
{
auto weight = weights.allocate_view(in_size);
+ auto dw = weight_derivatives.allocate_view(in_size);
for (auto& v : weight)
v = w(i);
- neurons.push_back(neuron_t{weight, b(i)});
+ neurons.push_back(neuron_t{weight, dw, b(i)});
}
}
- std::vector call(const std::vector& in)
+ const std::vector& call(const std::vector& in)
{
- std::vector out;
- out.reserve(out_size);
+ outputs.clear();
+ outputs.reserve(out_size);
#if BLT_DEBUG_LEVEL > 0
if (in.size() != in_size)
throw std::runtime_exception("Input vector doesn't match expected input size!");
#endif
for (auto& n : neurons)
- out.push_back(n.activate(in.data(), act_func));
- return out;
+ outputs.push_back(n.activate(in.data(), act_func));
+ return outputs;
}
- Scalar back_prop(const std::vector& prev_layer_output, Scalar error, const layer_t& next_layer, bool is_output)
+ Scalar back_prop(const std::vector& prev_layer_output,
+ const std::variant>, blt::ref>& data)
{
- std::vector dw;
-
-
- // this is close! i think the changes should be applied in the neuron since the slides show the change of weight PER NEURON PER INPUT
- // δ(h)
- if (is_output)
- {
- // assign error to output layer
- for (auto& n : neurons)
- n.error = act_func->derivative(n.z) * error; // f'act(net(h)) * (error)
- } else
- {
- // first calculate and assign input layer error
- std::vector next_error;
- next_error.resize(next_layer.neurons.size());
- for (const auto& [i, w] : blt::enumerate(next_layer.neurons))
- {
- for (auto wv : w.weights)
- next_error[i] += w.error * wv;
- // needed?
- next_error[i] /= static_cast(w.weights.size());
- }
-
- for (auto& n : neurons)
- {
- n.error = act_func->derivative(n.z);
- }
- }
-
- for (const auto& v : prev_layer_output)
- {
-
- }
-
- return error_at_current_layer;
+ return std::visit(blt::lambda_visitor{
+ // is provided if we are an output layer, contains output of this net (per neuron) and the expected output (per neuron)
+ [this, &prev_layer_output](const std::vector& expected) {
+ Scalar total_error = 0;
+ for (auto [i, n] : blt::enumerate(neurons))
+ {
+ auto d = outputs[i] - expected[i];
+ auto d2 = 0.5f * (d * d);
+ total_error += d2;
+ n.back_prop(act_func, prev_layer_output, d2);
+ }
+ return total_error;
+ },
+ // interior layer
+ [this, &prev_layer_output](const layer_t& layer) {
+ Scalar total_error = 0;
+ for (auto [i, n] : blt::enumerate(neurons))
+ {
+ Scalar weight_error = 0;
+ // TODO: this is not efficient on the cache!
+ for (auto nn : layer.neurons)
+ weight_error += nn.error * nn.weights[i];
+ Scalar w2 = 0.5f * weight_error * weight_error;
+ total_error += w2;
+ n.back_prop(act_func, prev_layer_output, w2);
+ }
+ return total_error;
+ }
+ }, data);
+ }
+
+ void update()
+ {
+ for (auto& n : neurons)
+ n.update();
}
template
@@ -181,8 +202,10 @@ namespace assign2
private:
const blt::i32 in_size, out_size;
weight_t weights;
+ weight_t weight_derivatives;
function_t* act_func;
std::vector neurons;
+ std::vector outputs;
};
}
diff --git a/include/assign2/network.h b/include/assign2/network.h
index dd739d7..da81efb 100644
--- a/include/assign2/network.h
+++ b/include/assign2/network.h
@@ -29,8 +29,7 @@ namespace assign2
{
public:
template
- network_t(blt::i32 input_size, blt::i32 output_size, blt::i32 layer_count, blt::i32 hidden_size, WeightFunc w, BiasFunc b):
- input_size(input_size), output_size(output_size), hidden_count(layer_count), hidden_size(hidden_size)
+ network_t(blt::i32 input_size, blt::i32 output_size, blt::i32 layer_count, blt::i32 hidden_size, WeightFunc w, BiasFunc b)
{
if (layer_count > 0)
{
@@ -50,8 +49,7 @@ namespace assign2
template
network_t(blt::i32 input_size, blt::i32 output_size, blt::i32 layer_count, blt::i32 hidden_size,
- WeightFunc w, BiasFunc b, OutputWeightFunc ow, OutputBiasFunc ob):
- input_size(input_size), output_size(output_size), hidden_count(layer_count), hidden_size(hidden_size)
+ WeightFunc w, BiasFunc b, OutputWeightFunc ow, OutputBiasFunc ob)
{
if (layer_count > 0)
{
@@ -69,28 +67,20 @@ namespace assign2
}
}
- explicit network_t(std::vector layers):
- input_size(layers.begin()->get_in_size()), output_size(layers.end()->get_out_size()),
- hidden_count(static_cast(layers.size()) - 1), hidden_size(layers.end()->get_in_size()), layers(std::move(layers))
+ explicit network_t(std::vector layers): layers(std::move(layers))
{}
network_t() = default;
- std::vector execute(const std::vector& input)
+ const std::vector& execute(const std::vector& input)
{
- std::vector previous_output;
- std::vector current_output;
+ std::vector>> outputs;
+ outputs.emplace_back(input);
- for (auto [i, v] : blt::enumerate(layers))
- {
- previous_output = current_output;
- if (i == 0)
- current_output = v.call(input);
- else
- current_output = v.call(previous_output);
- }
+ for (auto& v : layers)
+ outputs.emplace_back(v.call(outputs.back()));
- return current_output;
+ return outputs.back();
}
std::pair error(const std::vector& outputs, bool is_bad)
@@ -108,19 +98,33 @@ namespace assign2
return {0.5f * (error * error), error};
}
- Scalar train(const data_file_t& example)
+ Scalar train_epoch(const data_file_t& example)
{
Scalar total_error = 0;
Scalar total_d_error = 0;
for (const auto& x : example.data_points)
{
- print_vec(x.bins) << std::endl;
- auto o = execute(x.bins);
- print_vec(o) << std::endl;
- auto [e, de] = error(o, x.is_bad);
- total_error += e;
- total_d_error += -learn_rate * de;
- BLT_TRACE("\tError %f, %f, is bad? %s", e, -learn_rate * de, x.is_bad ? "True" : "False");
+ execute(x.bins);
+ std::vector expected{x.is_bad ? 0.0f : 1.0f, x.is_bad ? 1.0f : 0.0f};
+
+ for (auto [i, layer] : blt::iterate(layers).enumerate().rev())
+ {
+ if (i == layers.size() - 1)
+ {
+ auto e = layer.back_prop(layers[i - 1].outputs, expected);
+ total_error += e;
+ } else if (i == 0)
+ {
+ auto e = layer.back_prop(x.bins, layers[i + 1]);
+ total_error += e;
+ } else
+ {
+ auto e = layer.back_prop(layers[i - 1].outputs, layers[i + 1]);
+ total_error += e;
+ }
+ }
+ for (auto& l : layers)
+ l.update();
}
BLT_DEBUG("Total Errors found %f, %f", total_error, total_d_error);
@@ -128,7 +132,6 @@ namespace assign2
}
private:
- blt::i32 input_size, output_size, hidden_count, hidden_size;
std::vector layers;
};
}
diff --git a/src/main.cpp b/src/main.cpp
index 0d5504e..ef7cab6 100644
--- a/src/main.cpp
+++ b/src/main.cpp
@@ -79,37 +79,46 @@ int main(int argc, const char** argv)
auto data_files = load_data_files(get_data_files(data_directory));
random_init randomizer{619};
+ empty_init empty;
sigmoid_function sig;
relu_function relu;
threshold_function thresh;
- layer_t layer1{16, 8, &sig, randomizer, randomizer};
+ layer_t layer1{16, 16, &sig, randomizer, empty};
layer1.debug();
- layer_t layer2{8, 8, &sig, randomizer, randomizer};
+ layer_t layer2{16, 16, &sig, randomizer, empty};
layer2.debug();
- layer_t layer3{8, 8, &sig, randomizer, randomizer};
+ layer_t layer3{16, 16, &sig, randomizer, empty};
layer3.debug();
- layer_t layer_output{8, 2, &relu, randomizer, randomizer};
+ layer_t layer_output{16, 2, &sig, randomizer, empty};
layer_output.debug();
network_t network{{layer1, layer2, layer3, layer_output}};
- std::vector input;
- input.resize(16);
for (auto f : data_files)
{
if (f.data_points.begin()->bins.size() == 16)
{
- for (auto [i, b] : blt::enumerate(f.data_points.begin()->bins))
- input[i] = b;
- network.train(f);
+ for (blt::size_t i = 0; i < 10; i++)
+ {
+ network.train_epoch(f);
+ }
break;
}
}
+ BLT_INFO("Test Cases:");
- auto output = network.execute(input);
- print_vec(output) << std::endl;
-
+ for (auto f : data_files)
+ {
+ if (f.data_points.begin()->bins.size() == 16)
+ {
+ for (auto& d : f.data_points)
+ {
+ std::cout << "Good or bad? " << d.is_bad << " :: ";
+ print_vec(network.execute(d.bins)) << std::endl;
+ }
+ }
+ }
// for (auto d : data_files)
// {
// BLT_TRACE_STREAM << "\nSilly new file:\n";