From 5e81b426d3a380b4ceb6d26171cac24ce564661b Mon Sep 17 00:00:00 2001
From: Ben Kiev <146984941+Bean91@users.noreply.github.com>
Date: Sat, 20 Jun 2026 19:09:47 -0500
Subject: [PATCH 1/8] added gradient finding, need to add the weight changing
 next.

---
 include/block.hpp          | 20 ++++++++---
 include/embedder.hpp       | 24 +++++++++++++
 include/layer.hpp          | 23 +++++++-----
 include/model.hpp          | 61 ++++++++++++++++++++++----------
 include/neural_network.hpp | 46 +++++++++++++-----------
 include/self_attention.hpp | 71 ++++++++++++++++++++++++++++++++------
 include/utility.hpp        | 16 +++++++++
 7 files changed, 201 insertions(+), 60 deletions(-)
diff --git a/include/block.hpp b/include/block.hpp
index 9bace6b..7c32beb 100644
--- a/include/block.hpp
+++ b/include/block.hpp
@@ -3,6 +3,8 @@
 
 #include "neural_network.hpp"
 #include "self_attention.hpp"
+#include "utility.hpp"
+#include <vector>
 
 namespace openchat {
     class block {
@@ -41,14 +43,24 @@ namespace openchat {
 
             utility::matrix feedForward(utility::matrix x) {
                 x = attention.attention(x);
-                for (int i = 0; i < x.rows; i++) {
-                    std::vector<float> output = network.feedForward(std::vector<float>(x[i], x[i] + this->n_embd));
-                    std::copy(output.begin(), output.end(), x[i]);
-                }
+                x = network.feedForward(x);
 
                 return x;
             }
 
+            std::pair<utility::matrix, std::pair<std::vector<utility::matrix>, std::vector<utility::matrix>>> backward(utility::matrix dZ) {
+                std::pair<utility::matrix, std::vector<utility::matrix>> net_pass = network.backward(dZ);
+                
+                std::pair<utility::matrix, std::vector<utility::matrix>> attn_pass = attention.backward(net_pass.first);
+            
+                std::pair<std::vector<utility::matrix>, std::vector<utility::matrix>> dW;
+                dW.first = net_pass.second;
+                dW.second = attn_pass.second;
+            
+                return {attn_pass.first, dW};
+            }
+            
+
             void changeOne(char mat, size_t row, size_t col, float d) {
                 this->attention.changeOne(mat, row, col, d);
             }
diff --git a/include/embedder.hpp b/include/embedder.hpp
index cd46ff7..84617b1 100644
--- a/include/embedder.hpp
+++ b/include/embedder.hpp
@@ -3,6 +3,7 @@
 
 #include <cstddef>
 #include <filesystem>
+#include <forward_list>
 #include <random>
 #include <fstream>
 #include "utility.hpp"
@@ -14,6 +15,8 @@ namespace openchat {
             size_t n_tok;
             size_t n_embd;
 
+            std::forward_list<int> toks;
+
             std::default_random_engine generator;
             std::normal_distribution<float> initDist;
 
@@ -88,10 +91,31 @@ namespace openchat {
                 return vec;
             }
 
+            utility::matrix embed(std::forward_list<int> toks) {
+                this->toks = toks;
+                utility::matrix vec(std::distance(toks.begin(), toks.end()), this->n_embd);
+                int i = 0;
+                for (int tok : toks) {
+                  std::vector<float> emb = this->embed(tok);
+                  std::copy(emb.begin(), emb.end(), vec[i]);
+                  i++;
+                }
+
+                return vec;
+            }
+
             utility::matrix * getTable() {
                 return &this->table;
             }
 
+            void backward(utility::matrix dZ, float lr) {
+                for (int i = 0; i < dZ.rows; i++) {
+                    for (int j = 0; j < dZ.cols; j++) {
+                        this->table[*std::next(this->toks.begin(), i)][j] += dZ[i][j] * lr;
+                    }
+                }
+            }
+
             embedder() {}
     };
 }
diff --git a/include/layer.hpp b/include/layer.hpp
index 68e610f..85beb4d 100644
--- a/include/layer.hpp
+++ b/include/layer.hpp
@@ -11,6 +11,9 @@ namespace openchat {
         private:
             utility::matrix weights;
             utility::matrix biases;
+
+            utility::matrix X;
+
             std::default_random_engine generator;
             std::normal_distribution<float> initDist;
 
@@ -71,14 +74,18 @@ namespace openchat {
                 this->readFromFile(input);
             }
 
-            std::vector<float> feedForward(std::vector<float> input) {
-                utility::matrix i (1, input.size());
-                i.data = input;
-                std::vector<float> output = utility::add(utility::dot(i, this->weights), this->biases).data;
-                for (size_t i = 0; i < output.size(); i++) {
-                    output[i] = utility::relu(output[i]);
-                }
-                return output;
+            utility::matrix feedForward(utility::matrix x) {
+                this->X = x;
+                utility::matrix z = utility::add(utility::dot(x, this->weights), this->biases);
+                for (size_t i = 0; i < z.rows; i++) for (size_t j = 0; j < z.cols; j++) z[i][j] = utility::relu(z[i][j]);
+                return z;
+            }
+
+            std::pair<utility::matrix, utility::matrix> backward(utility::matrix dZ) {
+                utility::matrix dW = utility::dot(utility::transpose(X), dZ);
+                utility::matrix dX = utility::dot(dZ, utility::transpose(this->weights));
+
+                return {dX, dW};
             }
 
             void changeOne(float d, size_t n_in, size_t n_out) {
diff --git a/include/model.hpp b/include/model.hpp
index 7a52eac..af1fecf 100644
--- a/include/model.hpp
+++ b/include/model.hpp
@@ -1,7 +1,9 @@
 #ifndef MODEL_HPP
 #define MODEL_HPP
 
+#include <cstddef>
 #include <forward_list>
+#include <stdexcept>
 #include <vector>
 #include <filesystem>
 #include <utility>
@@ -17,6 +19,7 @@ namespace openchat {
             tokenizer tokenizer;
             embedder embedder;
             std::vector<block> blocks;
+            float learning_rate = 0.01;
             
         public:
             void init() {
@@ -33,25 +36,24 @@ namespace openchat {
                 }
             }
 
-            model(class tokenizer &tokenizer, class embedder &embedder, std::vector<class block> &blocks) {
+            model(class tokenizer &tokenizer, class embedder &embedder, std::vector<class block> &blocks, float learning_rate = 0.01) {
                 this->tokenizer = tokenizer;
                 this->embedder = embedder;
                 this->blocks = blocks;
 
+                this->learning_rate = learning_rate;
+
                 this->init();
             }
 
-            std::string forwardPass(std::string input) {
-                std::forward_list<int> tokens = tokenizer.encode(input);
+            void changeLearningRate(float n) {
+                this->learning_rate = n;
+            }
+
+            utility::matrix forwardPass(std::forward_list<int> tokens) {
                 utility::matrix unembed = utility::transpose(*embedder.getTable());
 
-                utility::matrix x = utility::matrix(std::distance(tokens.begin(), tokens.end()), embedder.getNEmbd());
-                int i = 0;
-                for (int token : tokens) {
-                  std::vector<float> emb = embedder.embed(token);
-                  std::copy(emb.begin(), emb.end(), x[i]);
-                  i++;
-                }
+                utility::matrix x = embedder.embed(tokens);
                 
                 x = positionalEncoding(x).apply();
 
@@ -64,17 +66,40 @@ namespace openchat {
 
                 dist = utility::softmax(utility::dot(dist, unembed));
 
-                float max = dist[0][0];
-                int token = 0;
-                for (size_t j = 1; j < dist.cols; j++) {
-                    if (dist[0][j] > max) {
-                        max = dist[0][j];
-                        token = static_cast<int>(j);
+                return dist;
+            }
+
+            void backward(std::string input, size_t epochs = 100) {
+                std::forward_list<int> corpus = tokenizer.encode(input);
+                auto length = std::distance(corpus.begin(), corpus.end());
+                
+                if (length < static_cast<long long>(epochs)) throw std::invalid_argument("Give a longer input!");
+                size_t start = length - epochs;
+            
+                for (size_t i = 0; i < epochs; i++) {
+                    std::forward_list<int> tokens(corpus.begin(), std::next(corpus.begin(), i + start));
+                    int next = *std::next(corpus.begin(), i + start);
+            
+                    utility::matrix dist = forwardPass(tokens);
+                    utility::matrix oneHot = utility::matrix(dist.rows, 1);
+                    oneHot[next][0] = 1;
+                    float loss = -1 * std::log(dist[next][0]);
+            
+                    utility::matrix dZ = utility::subtract(dist, oneHot);
+            
+                    std::vector<std::pair<std::vector<utility::matrix>, std::vector<utility::matrix>>> bdW;
+                    utility::matrix edW;
+            
+                    for (auto it = blocks.rbegin(); it != blocks.rend(); ++it) {
+                        std::pair<utility::matrix, std::pair<std::vector<utility::matrix>, std::vector<utility::matrix>>> p = it->backward(dZ);
+                        dZ = p.first;
+                        bdW.push_back(p.second);
                     }
+            
+                    embedder.backward(dZ, this->learning_rate);
                 }
-
-                return tokenizer.decode({token});
             }
+            
     };
 }
 
diff --git a/include/neural_network.hpp b/include/neural_network.hpp
index b677c21..d223f8d 100644
--- a/include/neural_network.hpp
+++ b/include/neural_network.hpp
@@ -6,6 +6,7 @@
 #include <limits>
 #include <vector>
 #include "layer.hpp"
+#include "utility.hpp"
 
 namespace openchat {
     class neuralNetwork {
@@ -13,13 +14,8 @@ namespace openchat {
             std::vector<layer> network;
             std::vector<size_t> dimensions;
 
-            std::vector<float> gain;
-            std::vector<float> bias;
-
         public:
             void init() {
-              this->gain = std::vector<float>(this->dimensions[(this->dimensions.size() - 1)], 1.0f);
-              this->bias = std::vector<float>(this->dimensions[(this->dimensions.size() - 1)], 0.0f);
               for (size_t i = 0; i < dimensions.size() - 1; i++) {
                 network.push_back(layer(dimensions[i], dimensions[i + 1]));
               }
@@ -30,8 +26,6 @@ namespace openchat {
 
                 if (inFile.is_open()) {
                     inFile.read(reinterpret_cast<char *>(this->dimensions.data()), sizeof(size_t) * this->dimensions.size());
-                    inFile.read(reinterpret_cast<char *>(this->gain.data()), sizeof(float) * this->dimensions[(this->dimensions.size() - 1)]);
-                    inFile.read(reinterpret_cast<char *>(this->bias.data()), sizeof(float) * this->dimensions[(this->dimensions.size() - 1)]);
                 }
 
                 for (size_t i = 0; i < input.second.size(); i++) {
@@ -44,8 +38,6 @@ namespace openchat {
 
                 if (outFile.is_open()) {
                     outFile.write(reinterpret_cast<char *>(this->dimensions.data()), sizeof(size_t) * this->dimensions.size());
-                    outFile.write(reinterpret_cast<char *>(this->gain.data()), sizeof(float) * this->dimensions[(this->dimensions.size() - 1)]);
-                    outFile.write(reinterpret_cast<char *>(this->bias.data()), sizeof(float) * this->dimensions[(this->dimensions.size() - 1)]);
                 }
 
                 for (size_t i = 0; i < output.second.size(); i++) {
@@ -63,7 +55,9 @@ namespace openchat {
                 this->readFromFile(input);
             }
 
-            void layerNorm(std::vector<float> &input) {
+            void layerNorm(std::vector<float> &x, size_t start, size_t end) {
+                std::vector<float> input(x[start], x[end]);
+
                 if (input.empty()) return;
 
                 float sum = 0;
@@ -75,17 +69,34 @@ namespace openchat {
                 float variance = sum / input.size();
 
                 for (int i = 0; i < input.size(); i++) {
-                    input[i] = ((input[i] - mean)/std::sqrt(variance + std::numeric_limits<float>::epsilon())) * this->gain[i] + this->bias[i];
+                    input[i] = ((input[i] - mean)/std::sqrt(variance + std::numeric_limits<float>::epsilon()));
+                }
+
+                for (int i = 0; i < input.size(); i++) {
+                    x[start+i] = input[i];
                 }
             }
 
-            std::vector<float> feedForward(std::vector<float> input) {
+            utility::matrix feedForward(utility::matrix x) {
                 for (layer &l : network) {
-                    input = l.feedForward(input);
+                    x = l.feedForward(x);
                 }
 
-                this->layerNorm(input);
-                return input;
+                for (int i = 0; i < x.rows; i++) {
+                    this->layerNorm(x.data, i*x.cols, i*x.cols+x.cols);
+                }
+                return x;
+            }
+
+            std::pair<utility::matrix, std::vector<utility::matrix>> backward(utility::matrix dZ) {
+                std::pair<utility::matrix, std::vector<utility::matrix>> ndW;
+                for (auto it = this->network.rbegin(); it != this->network.rend(); ++it) {
+                    std::pair<utility::matrix, utility::matrix> p = it->backward(dZ);
+                    dZ = p.first;
+                    ndW.second.push_back(p.second); 
+                }
+                ndW.first = dZ;
+                return ndW;
             }
 
             void changeOne(size_t layer, float d, size_t n_in, size_t n_out) {
@@ -95,11 +106,6 @@ namespace openchat {
             void changeOne(size_t layer, float d, size_t n_in) {
                 this->network[layer].changeOne(d, n_in);
             }
-            
-            void changeOne(int x, float d, int pos) {
-                if (x == 0) gain[pos] -= d;
-                if (x == 1) bias[pos] -= d;
-            }
 
             neuralNetwork() {}
     };
diff --git a/include/self_attention.hpp b/include/self_attention.hpp
index 9a8f0e8..1eb8db1 100644
--- a/include/self_attention.hpp
+++ b/include/self_attention.hpp
@@ -5,6 +5,9 @@
 #include <filesystem>
 #include <fstream>
 #include <random>
+#include <cmath>
+#include <vector>
+#include <utility>
 
 namespace openchat {
     class selfAttention {
@@ -16,6 +19,8 @@ namespace openchat {
             utility::matrix q;
             utility::matrix k;
             utility::matrix v;
+            utility::matrix p;
+            utility::matrix x;
 
             size_t n_embd;
 
@@ -24,7 +29,8 @@ namespace openchat {
 
           public:
             void init() {
-                initDist = std::normal_distribution<float>(0, this->n_embd);
+                float stddev = 1.0f / std::sqrt(static_cast<float>(this->n_embd));
+                initDist = std::normal_distribution<float>(0.0f, stddev);
 
                 for (size_t i = 0; i < this->n_embd; i++) {
                     for (size_t j = 0; j < this->n_embd; j++) {
@@ -78,23 +84,68 @@ namespace openchat {
             }
 
             utility::matrix attention(utility::matrix x) {
+                this->x = x;
                 this->q = utility::dot(x, this->wq);
                 this->k = utility::dot(x, this->wk);
                 this->v = utility::dot(x, this->wv);
-                
-                return utility::dot(
-                    utility::softmax(
-                        utility::scalar_div(
-                            utility::dot(this->q, 
-                            utility::transpose(this->k)), 
-                        std::sqrt(n_embd))), 
-                    this->v); 
+
+                this->p = utility::softmax(utility::scalar_div(
+                    utility::dot(this->q, utility::transpose(this->k)),
+                    std::sqrt(static_cast<float>(n_embd))));
+
+                return utility::dot(this->p, this->v);
             }
 
             size_t getNEmbed() {
                 return this->n_embd;
             }
 
+            std::pair<utility::matrix, std::vector<utility::matrix>> backward(utility::matrix dZ) {
+                utility::matrix dV = utility::dot(utility::transpose(this->p), dZ);
+                utility::matrix dP = utility::dot(dZ, utility::transpose(this->v));
+                
+                int M = dP.rows; 
+                utility::matrix dS(M, M);
+                
+                for (int i = 0; i < M; ++i) {
+                    float sum_dP_P = 0.0f;
+
+                    for (int k = 0; k < M; ++k) {
+                        sum_dP_P += dP.data[i * M + k] * this->p.data[i * M + k];
+                    }
+                    
+                    for (int j = 0; j < M; ++j) {
+                        int idx = i * M + j;
+                        dS.data[idx] = this->p.data[idx] * (dP.data[idx] - sum_dP_P);
+                    }
+                }
+                
+                int K = this->q.cols; 
+                float scale = 1.0f / std::sqrt(static_cast<float>(K));
+                for (int i = 0; i < M * M; ++i) {
+                    dS.data[i] *= scale;
+                }
+                
+                utility::matrix dQ = utility::dot(dS, this->k);
+                utility::matrix dK = utility::dot(utility::transpose(dS), this->q);
+                
+                utility::matrix dWq = utility::dot(utility::transpose(this->x), dQ);
+                utility::matrix dWk = utility::dot(utility::transpose(this->x), dK);
+                utility::matrix dWv = utility::dot(utility::transpose(this->x), dV);
+                
+                utility::matrix dX_q = utility::dot(dQ, utility::transpose(this->wq));
+                utility::matrix dX_k = utility::dot(dK, utility::transpose(this->wk));
+                utility::matrix dX_v = utility::dot(dV, utility::transpose(this->wv));
+                
+                utility::matrix dX(dX_q.rows, dX_q.cols);
+                for (int i = 0; i < dX.rows * dX.cols; ++i) {
+                    dX.data[i] = dX_q.data[i] + dX_k.data[i] + dX_v.data[i];
+                }
+                
+                std::vector<utility::matrix> weight_gradients = {dWq, dWk, dWv};
+                return {dX, weight_gradients};
+            }
+
             void changeOne(char mat, size_t row, size_t col, float d) {
                 if (mat == 'q') wq[row][col] -= d;
                 else if (mat == 'k') wk[row][col] -= d;
@@ -105,4 +156,4 @@ namespace openchat {
     };
 }
 
-#endif
\ No newline at end of file
+#endif
diff --git a/include/utility.hpp b/include/utility.hpp
index 686f2aa..d829931 100644
--- a/include/utility.hpp
+++ b/include/utility.hpp
@@ -49,6 +49,22 @@ namespace openchat {
 
             return c;
         }
+
+        inline matrix subtract(const matrix& a, const matrix& b) {
+            if (a.cols != b.cols)
+                throw std::invalid_argument("Columns don't match!");
+            if (a.rows != b.rows)
+                throw std::invalid_argument("Rows don't match!");
+            
+            matrix c = matrix(a.rows, b.cols);
+            for (size_t i = 0; i < a.rows; i++ ) {
+                for (size_t j = 0; j < a.cols; j++) {
+                    c[i][j] = a[i][j] - b[i][j];
+                }
+            }
+
+            return c;
+        }
         
         inline matrix dot(const matrix& a, const matrix& b) {
             if (a.cols != b.rows) throw std::invalid_argument("Inner dimensions don't match!");

From dcc57bc7e841eb0f411cef585bb234c3b33d3d6f Mon Sep 17 00:00:00 2001
From: Ben Kiev <146984941+Bean91@users.noreply.github.com>
Date: Sat, 20 Jun 2026 19:51:21 -0500
Subject: [PATCH 2/8] finished backprop

---
 include/block.hpp          |  6 +++---
 include/layer.hpp          | 20 ++++++++++++++++----
 include/model.hpp          | 35 ++++++++++++++++++++++++++++++++---
 include/neural_network.hpp |  6 +++---
 4 files changed, 54 insertions(+), 13 deletions(-)

diff --git a/include/block.hpp b/include/block.hpp
index 7c32beb..5c437fc 100644
--- a/include/block.hpp
+++ b/include/block.hpp
@@ -48,12 +48,12 @@ namespace openchat {
                 return x;
             }
 
-            std::pair<utility::matrix, std::pair<std::vector<utility::matrix>, std::vector<utility::matrix>>> backward(utility::matrix dZ) {
-                std::pair<utility::matrix, std::vector<utility::matrix>> net_pass = network.backward(dZ);
+            std::pair<utility::matrix, std::pair<std::vector<std::pair<utility::matrix, utility::matrix>>, std::vector<utility::matrix>>> backward(utility::matrix dZ) {
+                std::pair<utility::matrix, std::vector<std::pair<utility::matrix, utility::matrix>>> net_pass = network.backward(dZ);
                 
                 std::pair<utility::matrix, std::vector<utility::matrix>> attn_pass = attention.backward(net_pass.first);
             
-                std::pair<std::vector<utility::matrix>, std::vector<utility::matrix>> dW;
+                std::pair<std::vector<std::pair<utility::matrix, utility::matrix>>, std::vector<utility::matrix>> dW;
                 dW.first = net_pass.second;
                 dW.second = attn_pass.second;
             
diff --git a/include/layer.hpp b/include/layer.hpp
index 85beb4d..044e179 100644
--- a/include/layer.hpp
+++ b/include/layer.hpp
@@ -81,11 +81,23 @@ namespace openchat {
                 return z;
             }
 
-            std::pair<utility::matrix, utility::matrix> backward(utility::matrix dZ) {
-                utility::matrix dW = utility::dot(utility::transpose(X), dZ);
-                utility::matrix dX = utility::dot(dZ, utility::transpose(this->weights));
+            std::pair<utility::matrix, std::pair<utility::matrix, utility::matrix>> backward(utility::matrix dZ) {
+                int M = dZ.rows;
+                int N = dZ.cols;
 
-                return {dX, dW};
+                utility::matrix dW = utility::dot(utility::transpose(this->X), dZ);
+                utility::matrix dX = utility::dot(dZ, utility::transpose(this->weights));
+        
+                utility::matrix db(1, N); 
+                std::fill(db.data.begin(), db.data.end(), 0.0f);
+        
+                for (int i = 0; i < M; ++i) {
+                    for (int j = 0; j < N; ++j) {
+                        db.data[j] += dZ.data[i * N + j];
+                    }
+                }
+        
+                return {dX, {dW, db}};
             }
 
             void changeOne(float d, size_t n_in, size_t n_out) {
diff --git a/include/model.hpp b/include/model.hpp
index af1fecf..281ca31 100644
--- a/include/model.hpp
+++ b/include/model.hpp
@@ -69,7 +69,7 @@ namespace openchat {
                 return dist;
             }
 
-            void backward(std::string input, size_t epochs = 100) {
+            void train(std::string input, size_t epochs = 100) {
                 std::forward_list<int> corpus = tokenizer.encode(input);
                 auto length = std::distance(corpus.begin(), corpus.end());
                 
@@ -87,16 +87,45 @@ namespace openchat {
             
                     utility::matrix dZ = utility::subtract(dist, oneHot);
             
-                    std::vector<std::pair<std::vector<utility::matrix>, std::vector<utility::matrix>>> bdW;
+                    std::vector<std::pair<std::vector<std::pair<utility::matrix, utility::matrix>>, std::vector<utility::matrix>>> bdW;
                     utility::matrix edW;
             
                     for (auto it = blocks.rbegin(); it != blocks.rend(); ++it) {
-                        std::pair<utility::matrix, std::pair<std::vector<utility::matrix>, std::vector<utility::matrix>>> p = it->backward(dZ);
+                        std::pair<utility::matrix, std::pair<std::vector<std::pair<utility::matrix, utility::matrix>>, std::vector<utility::matrix>>> p = it->backward(dZ);
                         dZ = p.first;
                         bdW.push_back(p.second);
                     }
             
                     embedder.backward(dZ, this->learning_rate);
+
+                    int j = 0;
+                    for (block& b : this->blocks) {
+                        std::vector<std::pair<utility::matrix, utility::matrix>> ndW = bdW[j].first;
+                        std::vector<utility::matrix> adW = bdW[j].second;
+
+                        for (size_t layer = ndW.size() - 1; layer >= 0; layer--) {
+                            for (int k = 0; k < ndW[layer].first.cols; k++) 
+                                b.changeOne(layer, ndW[layer].first[0][k] * this->learning_rate, k);
+
+                            for (int k = 0; k < ndW[layer].second.rows; k ++)
+                                for (int l = 0; l < ndW[layer].second.rows; l++)
+                                    b.changeOne(layer,  ndW[layer].second[k][l] * this->learning_rate, k, l);
+                        }
+
+                        for (int k = 0; k < adW[0].rows; k++)
+                            for (int l = 0; l < adW[0].rows; l++)
+                                b.changeOne('q', k, l, adW[0][k][l] * this->learning_rate);
+
+                        for (int k = 0; k < adW[1].rows; k++)
+                            for (int l = 0; l < adW[1].rows; l++)
+                                b.changeOne('k', k, l, adW[1][k][l] * this->learning_rate);
+
+                        for (int k = 0; k < adW[2].rows; k++)
+                            for (int l = 0; l < adW[2].rows; l++)
+                                b.changeOne('v', k, l, adW[2][k][l] * this->learning_rate);
+
+                        j++;
+                    }
                 }
             }
             
diff --git a/include/neural_network.hpp b/include/neural_network.hpp
index d223f8d..71234ed 100644
--- a/include/neural_network.hpp
+++ b/include/neural_network.hpp
@@ -88,10 +88,10 @@ namespace openchat {
                 return x;
             }
 
-            std::pair<utility::matrix, std::vector<utility::matrix>> backward(utility::matrix dZ) {
-                std::pair<utility::matrix, std::vector<utility::matrix>> ndW;
+            std::pair<utility::matrix, std::vector<std::pair<utility::matrix, utility::matrix>>> backward(utility::matrix dZ) {
+                std::pair<utility::matrix, std::vector<std::pair<utility::matrix, utility::matrix>>> ndW;
                 for (auto it = this->network.rbegin(); it != this->network.rend(); ++it) {
-                    std::pair<utility::matrix, utility::matrix> p = it->backward(dZ);
+                    std::pair<utility::matrix, std::pair<utility::matrix, utility::matrix>> p = it->backward(dZ);
                     dZ = p.first;
                     ndW.second.push_back(p.second); 
                 }

From d7c055983890596245ee70224ce31ac4cabbb175 Mon Sep 17 00:00:00 2001
From: Ben Kiev <146984941+Bean91@users.noreply.github.com>
Date: Sat, 20 Jun 2026 20:03:36 -0500
Subject: [PATCH 3/8] Update include/embedder.hpp

Co-authored-by: coderabbitai[bot] <136622811+coderabbitai[bot]@users.noreply.github.com>
---
 include/embedder.hpp | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/include/embedder.hpp b/include/embedder.hpp
index 84617b1..1bf839f 100644
--- a/include/embedder.hpp
+++ b/include/embedder.hpp
@@ -109,10 +109,13 @@ namespace openchat {
             }
 
             void backward(utility::matrix dZ, float lr) {
+                auto tok_it = this->toks.begin();
                 for (int i = 0; i < dZ.rows; i++) {
+                    int tok = *tok_it;
                     for (int j = 0; j < dZ.cols; j++) {
-                        this->table[*std::next(this->toks.begin(), i)][j] += dZ[i][j] * lr;
+                        this->table[tok][j] -= dZ[i][j] * lr;
                     }
+                    +tok_it;
                 }
             }
 

From d7efe3d14c9b7350dc94782235e122da3388a70a Mon Sep 17 00:00:00 2001
From: Ben Kiev <146984941+Bean91@users.noreply.github.com>
Date: Sat, 20 Jun 2026 20:04:23 -0500
Subject: [PATCH 4/8] Update include/layer.hpp

Co-authored-by: coderabbitai[bot] <136622811+coderabbitai[bot]@users.noreply.github.com>
---
 include/layer.hpp | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/include/layer.hpp b/include/layer.hpp
index 044e179..d0f15e9 100644
--- a/include/layer.hpp
+++ b/include/layer.hpp
@@ -76,7 +76,12 @@ namespace openchat {
 
             utility::matrix feedForward(utility::matrix x) {
                 this->X = x;
-                utility::matrix z = utility::add(utility::dot(x, this->weights), this->biases);
+                utility::matrix z = utility::dot(x, this->weights);
+                for (size_t i = 0; i < z.rows; i++) {
+                    for (size_t j = 0; j < z.cols; j++) {
+                        z[i][j] += this->biases[0][j];
+                    }
+                }
                 for (size_t i = 0; i < z.rows; i++) for (size_t j = 0; j < z.cols; j++) z[i][j] = utility::relu(z[i][j]);
                 return z;
             }

From e0de46863de68d50cfc7a334d24d7dcb5bd6c934 Mon Sep 17 00:00:00 2001
From: Ben Kiev <146984941+Bean91@users.noreply.github.com>
Date: Sat, 20 Jun 2026 21:57:09 -0500
Subject: [PATCH 5/8] Update include/neural_network.hpp

Co-authored-by: coderabbitai[bot] <136622811+coderabbitai[bot]@users.noreply.github.com>
---
 include/neural_network.hpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/include/neural_network.hpp b/include/neural_network.hpp
index 71234ed..75a222b 100644
--- a/include/neural_network.hpp
+++ b/include/neural_network.hpp
@@ -56,7 +56,7 @@ namespace openchat {
             }
 
             void layerNorm(std::vector<float> &x, size_t start, size_t end) {
-                std::vector<float> input(x[start], x[end]);
+                std::vector<float> input(x.begin() + start, x.begin() + end);
 
                 if (input.empty()) return;
 

From f770a6d4bc08c9690c4aa29b4c26c679b9eb6c07 Mon Sep 17 00:00:00 2001
From: Ben Kiev <146984941+Bean91@users.noreply.github.com>
Date: Sun, 21 Jun 2026 10:33:14 -0500
Subject: [PATCH 6/8] bug fixes

---
 AIUsage.md           |  1 +
 include/embedder.hpp |  6 +++---
 include/layer.hpp    |  2 ++
 include/model.hpp    | 25 +++++++++++++------------
 4 files changed, 19 insertions(+), 15 deletions(-)

diff --git a/AIUsage.md b/AIUsage.md
index 918539f..4932e78 100644
--- a/AIUsage.md
+++ b/AIUsage.md
@@ -3,6 +3,7 @@
 ## Source Code
 
  - Notebook LM was used to understand technical articles relevant to the creation of code
+ - CodeRabbit AI was used for PR review
 
 ### Tokenizer
 
diff --git a/include/embedder.hpp b/include/embedder.hpp
index 84617b1..2901bcf 100644
--- a/include/embedder.hpp
+++ b/include/embedder.hpp
@@ -96,9 +96,9 @@ namespace openchat {
                 utility::matrix vec(std::distance(toks.begin(), toks.end()), this->n_embd);
                 int i = 0;
                 for (int tok : toks) {
-                  std::vector<float> emb = this->embed(tok);
-                  std::copy(emb.begin(), emb.end(), vec[i]);
-                  i++;
+                    std::vector<float> emb = this->embed(tok);
+                    std::copy(emb.begin(), emb.end(), vec[i]);
+                    i++;
                 }
 
                 return vec;
diff --git a/include/layer.hpp b/include/layer.hpp
index 044e179..d80bf9d 100644
--- a/include/layer.hpp
+++ b/include/layer.hpp
@@ -13,6 +13,7 @@ namespace openchat {
             utility::matrix biases;
 
             utility::matrix X;
+            utility::matrix Z;
 
             std::default_random_engine generator;
             std::normal_distribution<float> initDist;
@@ -77,6 +78,7 @@ namespace openchat {
             utility::matrix feedForward(utility::matrix x) {
                 this->X = x;
                 utility::matrix z = utility::add(utility::dot(x, this->weights), this->biases);
+                this->Z = z;
                 for (size_t i = 0; i < z.rows; i++) for (size_t j = 0; j < z.cols; j++) z[i][j] = utility::relu(z[i][j]);
                 return z;
             }
diff --git a/include/model.hpp b/include/model.hpp
index 281ca31..062e518 100644
--- a/include/model.hpp
+++ b/include/model.hpp
@@ -81,9 +81,9 @@ namespace openchat {
                     int next = *std::next(corpus.begin(), i + start);
             
                     utility::matrix dist = forwardPass(tokens);
-                    utility::matrix oneHot = utility::matrix(dist.rows, 1);
-                    oneHot[next][0] = 1;
-                    float loss = -1 * std::log(dist[next][0]);
+                    utility::matrix oneHot = utility::matrix(dist.rows, dist.cols);
+                    oneHot[0][next] = 1;
+                    float loss = -1 * std::log(dist[0][next]);
             
                     utility::matrix dZ = utility::subtract(dist, oneHot);
             
@@ -99,29 +99,30 @@ namespace openchat {
                     embedder.backward(dZ, this->learning_rate);
 
                     int j = 0;
-                    for (block& b : this->blocks) {
+                    for (auto it = this->blocks.rbegin(); it != this->blocks.rend(); ++it) {
+                        block &b = *it;
                         std::vector<std::pair<utility::matrix, utility::matrix>> ndW = bdW[j].first;
                         std::vector<utility::matrix> adW = bdW[j].second;
 
-                        for (size_t layer = ndW.size() - 1; layer >= 0; layer--) {
-                            for (int k = 0; k < ndW[layer].first.cols; k++) 
+                        for (size_t layer = 0; layer < ndW.size(); layer++) {
+                            for (int k = 0; k < ndW[layer].first.cols; k++)
                                 b.changeOne(layer, ndW[layer].first[0][k] * this->learning_rate, k);
 
-                            for (int k = 0; k < ndW[layer].second.rows; k ++)
-                                for (int l = 0; l < ndW[layer].second.rows; l++)
-                                    b.changeOne(layer,  ndW[layer].second[k][l] * this->learning_rate, k, l);
+                            for (int k = 0; k < ndW[layer].second.rows; k++)
+                                for (int l = 0; l < ndW[layer].second.cols; l++)
+                                    b.changeOne(layer, ndW[layer].second[k][l] * this->learning_rate, k, l);
                         }
 
                         for (int k = 0; k < adW[0].rows; k++)
-                            for (int l = 0; l < adW[0].rows; l++)
+                            for (int l = 0; l < adW[0].cols; l++)
                                 b.changeOne('q', k, l, adW[0][k][l] * this->learning_rate);
 
                         for (int k = 0; k < adW[1].rows; k++)
-                            for (int l = 0; l < adW[1].rows; l++)
+                            for (int l = 0; l < adW[1].cols; l++)
                                 b.changeOne('k', k, l, adW[1][k][l] * this->learning_rate);
 
                         for (int k = 0; k < adW[2].rows; k++)
-                            for (int l = 0; l < adW[2].rows; l++)
+                            for (int l = 0; l < adW[2].cols; l++)
                                 b.changeOne('v', k, l, adW[2][k][l] * this->learning_rate);
 
                         j++;

From 5fd98cfa09b2caf523a16d3f10f460ded8334c03 Mon Sep 17 00:00:00 2001
From: Ben Kiev <146984941+Bean91@users.noreply.github.com>
Date: Sun, 21 Jun 2026 10:34:48 -0500
Subject: [PATCH 7/8] bug fixes

---
 .DS_Store                           | Bin 0 -> 10244 bytes
 .github/workflows/documentation.yml |  34 ++++++++++++++++++++++++++++
 .github/workflows/macos.yml         |  27 ++++++++++++++++++++++
 .github/workflows/pre-commit.yml    |  15 ++++++++++++
 .github/workflows/ubuntu.yml        |  31 +++++++++++++++++++++++++
 .github/workflows/windows.yml       |  28 +++++++++++++++++++++++
 include/embedder.hpp                |   2 +-
 7 files changed, 136 insertions(+), 1 deletion(-)
 create mode 100644 .DS_Store
 create mode 100644 .github/workflows/documentation.yml
 create mode 100644 .github/workflows/macos.yml
 create mode 100644 .github/workflows/pre-commit.yml
 create mode 100644 .github/workflows/ubuntu.yml
 create mode 100644 .github/workflows/windows.yml

diff --git a/.DS_Store b/.DS_Store
new file mode 100644
index 0000000000000000000000000000000000000000..601adcf7f660eeac9f96882c51c4b5b8d297e08b
GIT binary patch
literal 10244
zcmeHMYiu3G6+UO};N2`CYh#lTa&s@fKuqie<2>4sl3c$;Ozb$xwc|$~m%Vo<_A2XL
zv%B{i6Qwf0cm$B>k5;8X|5b$_yxJhz{y{1Tkboa3M1X_<A@K`|XQ`@6EoWx-+V@@u
z`4<JLJJQUVIdjfD&YW*{W)=Xjr)b>_kOBaeBB#dXlw2b4dLEYrCE^%Fk@Nvfm;wgd
zU<RyZ(lH~pKx~270<i^R3&a-qDOiBM**vJaq;VQsAhtkkfkg}O{UJq>(@aDcBo$i+
zCE^M|w4T@^P@Z20xK1jfnTRe(N;MQvsuGo|7+o<?suMpI$z>wCAgNRbj4mG-^^DOC
z1x4@FeoBY~W+aW%*aEQymRo?sWi@oe0E|LVd4C^pyh7gb@>I`ddMIxDuLzU_SOs|~
zf(;H>rL6Bdo;yWc4$gYI`+HD;pr(cAsjjkx1{kDXd!Zi=LKga~rLna}T7x4b*$sU#
zL{z*m)M)&wMq@36;M0t3a7CPg9F58&-Ykv0c65AxPgKlr1?~J?FdJFt#r&*e2VrOD
zMO5nQS6$Z7pf%_XC$iJtTsAbrV$fsy$9QSP^PGG+Z|0BMY%XUs_j+z<+Ai~jf^9Jp
zIX-5)mN(bq6<sS3x>;GFX{wep)}J}kwR>krYWwb8XFF17c6W7lq;_oIb@r^Lwxn;`
zb0l}#nQ^@bXhsw;+Av{_EH9?#D7V5at)SrKY1F)l6-4bqDKZGYSvJUrh7p-W)mH15
zueoBa(b#llV{>CmOUqU3)?dBhnhiPQ%Dh=PJ>~PM_Ii%z_c%p%Y|6K-?5tU^-Kp`A
z-4$l-yIIa?su4ZHtdmQuk@;DYX<lN8=5F&Db47c@r%nYd9Iwbv<czgJ!S@{JkQdmY
z?YR@j16rtokj)IZr#$MhM2j>$%d3K;%nyhz(;1&IecyDaSkL?k$ILTl*zNJc(3=^z
z3!W=SzE(6##X~;hO{VvKA=e?DIZ)2D$&3MMnW2Mub2%4LOutlkN#sXd&t+OVb4b=`
z^!02fbPT~!I1P8hBk(vp34epv-~;#^{)G)_U@Nxc4!jlj;eH&zNi5(r+UUy$l(Ls3
zWVxJTK)Dr`-U&**5({<N^3uPL5(|9c^)-pH@rlV}$8&~J4A>Fon4w)_eP+l4ZcT3S
zCDV)*Jf}G0QcYvQbP7chX5Eg%?I@(H*Q>%iYL;?*GVuJ6e1ICOScyxlsHQ!A#k%Bm
zZCks3_KQsaz|i3f+)s7A<*Kog9oTt?O_)J}xfXfj=$vhZ)02coH?&XL%-6I=>=<#)
z8FrzGd-TRcLdq=Zlv^1u=IuL+b~taORaN$pXiaOXasIePlN|Ca-l8RH5{cH$s(Os`
zdwoW!Wb29c4pJ~j0y0N5%+8xN%?L(L)0QDs^_{y^&9Fv=Ge)di_7Lm9SV{VD#BAB8
zYU2fOW`-scNM541PgRZAPV$Zi%^)0}W$xmTdiSg9WH4>|Y<P|~tg^<Ss)}VKK3D0t
z?9lUtH4e5W5)-_sZp&g8O?t!ntD6$p#Heb#O*)3{mTcN02~~2ZR2N-8o!KH?u)^cX
z#_3F_WG_Po0U!aL^qIOHPSOTH59i?5@O$_(oQD_S6?h9S!bk8ad<kE{*YGX;8|!g3
zHseOT9yenf-iTeeo#3$xdvOql@Sp&Qg$$?A!6KG$4(IWgcrV_E_v1Ny1RurU;bZs&
z{sEuCXYo1wEB+l{##iuFyoev-NBA**hX25C@t=yOG$|XEgtAH5th6axly+sWvQOEs
z99AZkoH8vZv<ScxvRqD;dCIK}z-aGPp1w2yleg_3xc$K3PZogh$xlNweHK#JZ@Fp5
z&u{HsbWG{TW!J5Qgg`+Rb_5Cp8f!gRU{l|e93_~ryOzKy!CHo>(ZJ!XUf|N#>glAh
zihv}GH|T9i<1zw~EVd`NCJlwKD~p}UF5RdjWNJcpXL74<pehTy^*y>#PZ*WOz4|8I
z08JKpl6^^oBToIHeE)TL8{Q?~|BQV9KUjw<uEP}GfSu&)dohFE*oXb(?}zX(W^n{h
zU=HuVJbfM<bkV~*(Z@4*mVAE!@4*M~L3{`w#^2y?2?3Ac<K+K;#3%77{1cwX7w|>=
zi$KB~_zu30pVavFP}!>`#cx*j?V7{ox=S4DEIQ<c6m#O|Q%HG6hrDfk7pOiW0zZOQ
z9`fp;M6xcFs^!Gtvr?`B=14A22V|SKUM;QgPiXbQwo1D45GCgwr?CZM3&a-qu`N(1
z2aw|L|NY<p|NqAxX54>lf!G2+parnLFWbi-z{-yRwS6hXNKw3lA`e_{K~kYY2^Zmf
u@f=SD{v0n}G>8}7*+hLNl3S2esFR*hC;C778Bq126W{;i`~Uyn{r@|(kixtG

literal 0
HcmV?d00001

diff --git a/.github/workflows/documentation.yml b/.github/workflows/documentation.yml
new file mode 100644
index 0000000..d741a19
--- /dev/null
+++ b/.github/workflows/documentation.yml
@@ -0,0 +1,34 @@
+name: Documentation
+
+on:
+  push:
+    tags:
+      - "*"
+    branches: [ main, master ]
+
+jobs:
+  build:
+    name: Build and publish documentation
+    runs-on: ubuntu-latest
+    steps:
+    - uses: actions/checkout@v2
+    - uses: actions/setup-python@v2
+    - name: Install Docs
+      run: |
+        sudo apt-get install doxygen
+        pip install jinja2 Pygments
+    - name: prepare
+      run: |
+        make prepare
+    - name: configure
+      run: |
+        cmake -H. -Bbuild -G "Unix Makefiles" -DCMAKE_BUILD_TYPE="Debug"
+    - name: building
+      run: |
+        cmake --build build --config Debug --target docs -j4
+    - name: Deploy to GitHub Pages
+      uses: Cecilapp/GitHub-Pages-deploy@v3
+      env:
+        GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
+      with:
+        build_dir: ./docs/html
diff --git a/.github/workflows/macos.yml b/.github/workflows/macos.yml
new file mode 100644
index 0000000..40ce4ca
--- /dev/null
+++ b/.github/workflows/macos.yml
@@ -0,0 +1,27 @@
+name: MacOS CI Test
+
+on:
+  push:
+    branches: [ main, master, dev ]
+  pull_request:
+    branches: [ main, master, dev ]
+
+jobs:
+  build:
+    runs-on: macos-latest
+    steps:
+    - uses: actions/checkout@v2
+    - name: prepare
+      run: |
+        make prepare
+    - name: configure
+      run: |
+        cmake -H. -Bbuild -G "Unix Makefiles" -DCMAKE_BUILD_TYPE="Debug"
+    - name: building
+      run: |
+        cmake --build build --config Debug --target unit_tests -j4
+    - name: testing
+      run: |
+        cd build
+        cd tests
+        ./unit_tests
diff --git a/.github/workflows/pre-commit.yml b/.github/workflows/pre-commit.yml
new file mode 100644
index 0000000..5b607b2
--- /dev/null
+++ b/.github/workflows/pre-commit.yml
@@ -0,0 +1,15 @@
+name: pre-commit
+
+on:
+  push:
+    branches: [ main, master, dev ]
+  pull_request:
+    branches: [ main, master, dev ]
+
+jobs:
+  pre-commit:
+    runs-on: ubuntu-latest
+    steps:
+    - uses: actions/checkout@v2
+    - uses: actions/setup-python@v2
+    - uses: pre-commit/action@v2.0.0
diff --git a/.github/workflows/ubuntu.yml b/.github/workflows/ubuntu.yml
new file mode 100644
index 0000000..e9ca5ac
--- /dev/null
+++ b/.github/workflows/ubuntu.yml
@@ -0,0 +1,31 @@
+name: Ubuntu CI Test
+
+on:
+  push:
+    branches: [ main, master, dev ]
+  pull_request:
+    branches: [ main, master, dev ]
+
+jobs:
+  build:
+    runs-on: ubuntu-latest
+    steps:
+    - uses: actions/checkout@v2
+    - name: install
+      run: |
+        sudo apt-get install gcovr lcov
+    - name: prepare
+      run: |
+        make prepare
+    - name: configure
+      run: |
+        cmake -H. -Bbuild -G "Unix Makefiles" -DCMAKE_BUILD_TYPE=Debug -DENABLE_COVERAGE=On
+    - name: building
+      run: |
+        cmake --build build --config Debug --target coverage -j4
+    - name: testing
+      run: |
+        cd build
+        cd tests
+        ./unit_tests
+        bash <(curl -s https://codecov.io/bash)
diff --git a/.github/workflows/windows.yml b/.github/workflows/windows.yml
new file mode 100644
index 0000000..7be18e6
--- /dev/null
+++ b/.github/workflows/windows.yml
@@ -0,0 +1,28 @@
+name: Windows CI Test
+
+on:
+  push:
+    branches: [ main, master, dev ]
+  pull_request:
+    branches: [ main, master, dev ]
+
+jobs:
+  build:
+    runs-on: windows-latest
+    steps:
+    - uses: actions/checkout@v2
+    - name: prepare
+      run: |
+        make prepare
+    - name: configure
+      run: |
+        cmake -H"." -Bbuild -T host=x86 -A x64 -DCMAKE_BUILD_TYPE="Debug"
+    - name: building
+      run: |
+        cmake --build build --config Debug --target unit_tests -j4
+    - name: testing
+      run: |
+        cd build
+        cd tests
+        cd Debug
+        .\unit_tests.exe
diff --git a/include/embedder.hpp b/include/embedder.hpp
index d4d6c8c..60dc182 100644
--- a/include/embedder.hpp
+++ b/include/embedder.hpp
@@ -115,7 +115,7 @@ namespace openchat {
                     for (int j = 0; j < dZ.cols; j++) {
                         this->table[tok][j] -= dZ[i][j] * lr;
                     }
-                    +tok_it;
+                    ++tok_it;
                 }
             }
 

From 5654136a2bbab7b4cf235238fc2220d5743e6379 Mon Sep 17 00:00:00 2001
From: Ben Kiev <146984941+Bean91@users.noreply.github.com>
Date: Sun, 21 Jun 2026 10:38:08 -0500
Subject: [PATCH 8/8] removed workflows - uneeded

---
 .github/workflows/documentation.yml | 34 -----------------------------
 .github/workflows/macos.yml         | 27 -----------------------
 .github/workflows/pre-commit.yml    | 15 -------------
 .github/workflows/ubuntu.yml        | 31 --------------------------
 .github/workflows/windows.yml       | 28 ------------------------
 5 files changed, 135 deletions(-)
 delete mode 100644 .github/workflows/documentation.yml
 delete mode 100644 .github/workflows/macos.yml
 delete mode 100644 .github/workflows/pre-commit.yml
 delete mode 100644 .github/workflows/ubuntu.yml
 delete mode 100644 .github/workflows/windows.yml

diff --git a/.github/workflows/documentation.yml b/.github/workflows/documentation.yml
deleted file mode 100644
index d741a19..0000000
--- a/.github/workflows/documentation.yml
+++ /dev/null
@@ -1,34 +0,0 @@
-name: Documentation
-
-on:
-  push:
-    tags:
-      - "*"
-    branches: [ main, master ]
-
-jobs:
-  build:
-    name: Build and publish documentation
-    runs-on: ubuntu-latest
-    steps:
-    - uses: actions/checkout@v2
-    - uses: actions/setup-python@v2
-    - name: Install Docs
-      run: |
-        sudo apt-get install doxygen
-        pip install jinja2 Pygments
-    - name: prepare
-      run: |
-        make prepare
-    - name: configure
-      run: |
-        cmake -H. -Bbuild -G "Unix Makefiles" -DCMAKE_BUILD_TYPE="Debug"
-    - name: building
-      run: |
-        cmake --build build --config Debug --target docs -j4
-    - name: Deploy to GitHub Pages
-      uses: Cecilapp/GitHub-Pages-deploy@v3
-      env:
-        GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
-      with:
-        build_dir: ./docs/html
diff --git a/.github/workflows/macos.yml b/.github/workflows/macos.yml
deleted file mode 100644
index 40ce4ca..0000000
--- a/.github/workflows/macos.yml
+++ /dev/null
@@ -1,27 +0,0 @@
-name: MacOS CI Test
-
-on:
-  push:
-    branches: [ main, master, dev ]
-  pull_request:
-    branches: [ main, master, dev ]
-
-jobs:
-  build:
-    runs-on: macos-latest
-    steps:
-    - uses: actions/checkout@v2
-    - name: prepare
-      run: |
-        make prepare
-    - name: configure
-      run: |
-        cmake -H. -Bbuild -G "Unix Makefiles" -DCMAKE_BUILD_TYPE="Debug"
-    - name: building
-      run: |
-        cmake --build build --config Debug --target unit_tests -j4
-    - name: testing
-      run: |
-        cd build
-        cd tests
-        ./unit_tests
diff --git a/.github/workflows/pre-commit.yml b/.github/workflows/pre-commit.yml
deleted file mode 100644
index 5b607b2..0000000
--- a/.github/workflows/pre-commit.yml
+++ /dev/null
@@ -1,15 +0,0 @@
-name: pre-commit
-
-on:
-  push:
-    branches: [ main, master, dev ]
-  pull_request:
-    branches: [ main, master, dev ]
-
-jobs:
-  pre-commit:
-    runs-on: ubuntu-latest
-    steps:
-    - uses: actions/checkout@v2
-    - uses: actions/setup-python@v2
-    - uses: pre-commit/action@v2.0.0
diff --git a/.github/workflows/ubuntu.yml b/.github/workflows/ubuntu.yml
deleted file mode 100644
index e9ca5ac..0000000
--- a/.github/workflows/ubuntu.yml
+++ /dev/null
@@ -1,31 +0,0 @@
-name: Ubuntu CI Test
-
-on:
-  push:
-    branches: [ main, master, dev ]
-  pull_request:
-    branches: [ main, master, dev ]
-
-jobs:
-  build:
-    runs-on: ubuntu-latest
-    steps:
-    - uses: actions/checkout@v2
-    - name: install
-      run: |
-        sudo apt-get install gcovr lcov
-    - name: prepare
-      run: |
-        make prepare
-    - name: configure
-      run: |
-        cmake -H. -Bbuild -G "Unix Makefiles" -DCMAKE_BUILD_TYPE=Debug -DENABLE_COVERAGE=On
-    - name: building
-      run: |
-        cmake --build build --config Debug --target coverage -j4
-    - name: testing
-      run: |
-        cd build
-        cd tests
-        ./unit_tests
-        bash <(curl -s https://codecov.io/bash)
diff --git a/.github/workflows/windows.yml b/.github/workflows/windows.yml
deleted file mode 100644
index 7be18e6..0000000
--- a/.github/workflows/windows.yml
+++ /dev/null
@@ -1,28 +0,0 @@
-name: Windows CI Test
-
-on:
-  push:
-    branches: [ main, master, dev ]
-  pull_request:
-    branches: [ main, master, dev ]
-
-jobs:
-  build:
-    runs-on: windows-latest
-    steps:
-    - uses: actions/checkout@v2
-    - name: prepare
-      run: |
-        make prepare
-    - name: configure
-      run: |
-        cmake -H"." -Bbuild -T host=x86 -A x64 -DCMAKE_BUILD_TYPE="Debug"
-    - name: building
-      run: |
-        cmake --build build --config Debug --target unit_tests -j4
-    - name: testing
-      run: |
-        cd build
-        cd tests
-        cd Debug
-        .\unit_tests.exe