diff --git a/.idea/vcs.xml b/.idea/vcs.xml index ebc05d3..8bc3e1c 100644 --- a/.idea/vcs.xml +++ b/.idea/vcs.xml @@ -4,6 +4,10 @@ + + + + \ No newline at end of file diff --git a/CMakeLists.txt b/CMakeLists.txt index 073ccc3..d60ed54 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1,5 +1,5 @@ cmake_minimum_required(VERSION 3.25) -project(COSC-4P80-Assignment-2 VERSION 0.0.5) +project(COSC-4P80-Assignment-2 VERSION 0.0.6) option(ENABLE_ADDRSAN "Enable the address sanitizer" OFF) option(ENABLE_UBSAN "Enable the ub sanitizer" OFF) diff --git a/commit.py b/commit.py index fb9d5cc..8131892 100755 --- a/commit.py +++ b/commit.py @@ -25,7 +25,7 @@ ENVIRONMENT_DATA_LOCATION = USER_HOME / ".brett_scripts.env" if sys.platform.startswith("win"): CONFIG_FILE_DIRECTORY = Path(os.getenv('APPDATA') + "\BLT") - CONFIG_FILE_LOCATION = Path(CONFIG_FILE_DIRECTORY + "\commit_config.env") + CONFIG_FILE_LOCATION = Path(CONFIG_FILE_DIRECTORY + "\commit_config.json") else: XDG_CONFIG_HOME = os.environ.get('XDG_CONFIG_HOME') if XDG_CONFIG_HOME is None: @@ -36,7 +36,7 @@ else: if len(str(XDG_CONFIG_HOME)) == 0: XDG_CONFIG_HOME = USER_HOME CONFIG_FILE_DIRECTORY = XDG_CONFIG_HOME / "blt" - CONFIG_FILE_LOCATION = CONFIG_FILE_DIRECTORY / "commit_config.env" + CONFIG_FILE_LOCATION = CONFIG_FILE_DIRECTORY / "commit_config.json" class Config: def __init__(self): diff --git a/include/assign2/layer.h b/include/assign2/layer.h index 03ec2d4..43c84ef 100644 --- a/include/assign2/layer.h +++ b/include/assign2/layer.h @@ -31,11 +31,11 @@ namespace assign2 friend layer_t; public: // empty neuron for loading from a stream - explicit neuron_t(weight_view weights): weights(weights) + explicit neuron_t(weight_view weights, weight_view dw): dw(dw), weights(weights) {} // neuron with bias - explicit neuron_t(weight_view weights, Scalar bias): bias(bias), weights(weights) + explicit neuron_t(weight_view weights, weight_view dw, Scalar bias): bias(bias), dw(dw), weights(weights) {} Scalar activate(const Scalar* inputs, function_t* act_func) @@ -47,6 +47,23 @@ namespace assign2 return a; } + void back_prop(function_t* act, const std::vector& previous_outputs, Scalar next_error) + { + // delta for weights + error = act->derivative(z) * next_error; + for (auto [prev_out, d_weight] : blt::zip(previous_outputs, dw)) + { + // dw / apply dw + d_weight = learn_rate * prev_out * error; + } + } + + void update() + { + for (auto [w, d] : blt::in_pairs(weights, dw)) + w += d; + } + template OStream& serialize(OStream& stream) { @@ -73,11 +90,13 @@ namespace assign2 float a = 0; float bias = 0; float error = 0; + weight_view dw; weight_view weights; }; class layer_t { + friend network_t; public: template layer_t(const blt::i32 in, const blt::i32 out, function_t* act_func, WeightFunc w, BiasFunc b): @@ -87,62 +106,64 @@ namespace assign2 for (blt::i32 i = 0; i < out_size; i++) { auto weight = weights.allocate_view(in_size); + auto dw = weight_derivatives.allocate_view(in_size); for (auto& v : weight) v = w(i); - neurons.push_back(neuron_t{weight, b(i)}); + neurons.push_back(neuron_t{weight, dw, b(i)}); } } - std::vector call(const std::vector& in) + const std::vector& call(const std::vector& in) { - std::vector out; - out.reserve(out_size); + outputs.clear(); + outputs.reserve(out_size); #if BLT_DEBUG_LEVEL > 0 if (in.size() != in_size) throw std::runtime_exception("Input vector doesn't match expected input size!"); #endif for (auto& n : neurons) - out.push_back(n.activate(in.data(), act_func)); - return out; + outputs.push_back(n.activate(in.data(), act_func)); + return outputs; } - Scalar back_prop(const std::vector& prev_layer_output, Scalar error, const layer_t& next_layer, bool is_output) + Scalar back_prop(const std::vector& prev_layer_output, + const std::variant>, blt::ref>& data) { - std::vector dw; - - - // this is close! i think the changes should be applied in the neuron since the slides show the change of weight PER NEURON PER INPUT - // δ(h) - if (is_output) - { - // assign error to output layer - for (auto& n : neurons) - n.error = act_func->derivative(n.z) * error; // f'act(net(h)) * (error) - } else - { - // first calculate and assign input layer error - std::vector next_error; - next_error.resize(next_layer.neurons.size()); - for (const auto& [i, w] : blt::enumerate(next_layer.neurons)) - { - for (auto wv : w.weights) - next_error[i] += w.error * wv; - // needed? - next_error[i] /= static_cast(w.weights.size()); - } - - for (auto& n : neurons) - { - n.error = act_func->derivative(n.z); - } - } - - for (const auto& v : prev_layer_output) - { - - } - - return error_at_current_layer; + return std::visit(blt::lambda_visitor{ + // is provided if we are an output layer, contains output of this net (per neuron) and the expected output (per neuron) + [this, &prev_layer_output](const std::vector& expected) { + Scalar total_error = 0; + for (auto [i, n] : blt::enumerate(neurons)) + { + auto d = outputs[i] - expected[i]; + auto d2 = 0.5f * (d * d); + total_error += d2; + n.back_prop(act_func, prev_layer_output, d2); + } + return total_error; + }, + // interior layer + [this, &prev_layer_output](const layer_t& layer) { + Scalar total_error = 0; + for (auto [i, n] : blt::enumerate(neurons)) + { + Scalar weight_error = 0; + // TODO: this is not efficient on the cache! + for (auto nn : layer.neurons) + weight_error += nn.error * nn.weights[i]; + Scalar w2 = 0.5f * weight_error * weight_error; + total_error += w2; + n.back_prop(act_func, prev_layer_output, w2); + } + return total_error; + } + }, data); + } + + void update() + { + for (auto& n : neurons) + n.update(); } template @@ -181,8 +202,10 @@ namespace assign2 private: const blt::i32 in_size, out_size; weight_t weights; + weight_t weight_derivatives; function_t* act_func; std::vector neurons; + std::vector outputs; }; } diff --git a/include/assign2/network.h b/include/assign2/network.h index dd739d7..da81efb 100644 --- a/include/assign2/network.h +++ b/include/assign2/network.h @@ -29,8 +29,7 @@ namespace assign2 { public: template - network_t(blt::i32 input_size, blt::i32 output_size, blt::i32 layer_count, blt::i32 hidden_size, WeightFunc w, BiasFunc b): - input_size(input_size), output_size(output_size), hidden_count(layer_count), hidden_size(hidden_size) + network_t(blt::i32 input_size, blt::i32 output_size, blt::i32 layer_count, blt::i32 hidden_size, WeightFunc w, BiasFunc b) { if (layer_count > 0) { @@ -50,8 +49,7 @@ namespace assign2 template network_t(blt::i32 input_size, blt::i32 output_size, blt::i32 layer_count, blt::i32 hidden_size, - WeightFunc w, BiasFunc b, OutputWeightFunc ow, OutputBiasFunc ob): - input_size(input_size), output_size(output_size), hidden_count(layer_count), hidden_size(hidden_size) + WeightFunc w, BiasFunc b, OutputWeightFunc ow, OutputBiasFunc ob) { if (layer_count > 0) { @@ -69,28 +67,20 @@ namespace assign2 } } - explicit network_t(std::vector layers): - input_size(layers.begin()->get_in_size()), output_size(layers.end()->get_out_size()), - hidden_count(static_cast(layers.size()) - 1), hidden_size(layers.end()->get_in_size()), layers(std::move(layers)) + explicit network_t(std::vector layers): layers(std::move(layers)) {} network_t() = default; - std::vector execute(const std::vector& input) + const std::vector& execute(const std::vector& input) { - std::vector previous_output; - std::vector current_output; + std::vector>> outputs; + outputs.emplace_back(input); - for (auto [i, v] : blt::enumerate(layers)) - { - previous_output = current_output; - if (i == 0) - current_output = v.call(input); - else - current_output = v.call(previous_output); - } + for (auto& v : layers) + outputs.emplace_back(v.call(outputs.back())); - return current_output; + return outputs.back(); } std::pair error(const std::vector& outputs, bool is_bad) @@ -108,19 +98,33 @@ namespace assign2 return {0.5f * (error * error), error}; } - Scalar train(const data_file_t& example) + Scalar train_epoch(const data_file_t& example) { Scalar total_error = 0; Scalar total_d_error = 0; for (const auto& x : example.data_points) { - print_vec(x.bins) << std::endl; - auto o = execute(x.bins); - print_vec(o) << std::endl; - auto [e, de] = error(o, x.is_bad); - total_error += e; - total_d_error += -learn_rate * de; - BLT_TRACE("\tError %f, %f, is bad? %s", e, -learn_rate * de, x.is_bad ? "True" : "False"); + execute(x.bins); + std::vector expected{x.is_bad ? 0.0f : 1.0f, x.is_bad ? 1.0f : 0.0f}; + + for (auto [i, layer] : blt::iterate(layers).enumerate().rev()) + { + if (i == layers.size() - 1) + { + auto e = layer.back_prop(layers[i - 1].outputs, expected); + total_error += e; + } else if (i == 0) + { + auto e = layer.back_prop(x.bins, layers[i + 1]); + total_error += e; + } else + { + auto e = layer.back_prop(layers[i - 1].outputs, layers[i + 1]); + total_error += e; + } + } + for (auto& l : layers) + l.update(); } BLT_DEBUG("Total Errors found %f, %f", total_error, total_d_error); @@ -128,7 +132,6 @@ namespace assign2 } private: - blt::i32 input_size, output_size, hidden_count, hidden_size; std::vector layers; }; } diff --git a/src/main.cpp b/src/main.cpp index 0d5504e..ef7cab6 100644 --- a/src/main.cpp +++ b/src/main.cpp @@ -79,37 +79,46 @@ int main(int argc, const char** argv) auto data_files = load_data_files(get_data_files(data_directory)); random_init randomizer{619}; + empty_init empty; sigmoid_function sig; relu_function relu; threshold_function thresh; - layer_t layer1{16, 8, &sig, randomizer, randomizer}; + layer_t layer1{16, 16, &sig, randomizer, empty}; layer1.debug(); - layer_t layer2{8, 8, &sig, randomizer, randomizer}; + layer_t layer2{16, 16, &sig, randomizer, empty}; layer2.debug(); - layer_t layer3{8, 8, &sig, randomizer, randomizer}; + layer_t layer3{16, 16, &sig, randomizer, empty}; layer3.debug(); - layer_t layer_output{8, 2, &relu, randomizer, randomizer}; + layer_t layer_output{16, 2, &sig, randomizer, empty}; layer_output.debug(); network_t network{{layer1, layer2, layer3, layer_output}}; - std::vector input; - input.resize(16); for (auto f : data_files) { if (f.data_points.begin()->bins.size() == 16) { - for (auto [i, b] : blt::enumerate(f.data_points.begin()->bins)) - input[i] = b; - network.train(f); + for (blt::size_t i = 0; i < 10; i++) + { + network.train_epoch(f); + } break; } } + BLT_INFO("Test Cases:"); - auto output = network.execute(input); - print_vec(output) << std::endl; - + for (auto f : data_files) + { + if (f.data_points.begin()->bins.size() == 16) + { + for (auto& d : f.data_points) + { + std::cout << "Good or bad? " << d.is_bad << " :: "; + print_vec(network.execute(d.bins)) << std::endl; + } + } + } // for (auto d : data_files) // { // BLT_TRACE_STREAM << "\nSilly new file:\n";