diff --git a/nn/include/backprop.h b/nn/include/backprop.h
index 606f110..29aa941 100644
--- a/nn/include/backprop.h
+++ b/nn/include/backprop.h
@@ -23,9 +23,9 @@ void backpropagate(NeuralNetwork* nn, const Matrix* y_true,
                    LossFunction loss_func, LossFunctionGrad loss_func_grad);
 
 /** @brief Calculate weight gradient for a specific layer. */
-Matrix* calculate_weight_gradient(const Cache* cache, int layer_index,
-                                  int total_layers);
+Matrix* calculate_weight_gradient(const Cache* cache, size_t layer_index,
+                                  size_t total_layers);
 
 /** @brief Calculate bias gradient for a specific layer. */
-Matrix* calculate_bias_gradient(const Cache* cache, int layer_index,
-                                int total_layers);
+Matrix* calculate_bias_gradient(const Cache* cache, size_t layer_index,
+                                size_t total_layers);
diff --git a/nn/include/feedforward.h b/nn/include/feedforward.h
index 3037009..fe6d740 100644
--- a/nn/include/feedforward.h
+++ b/nn/include/feedforward.h
@@ -12,7 +12,7 @@
 //==============================
 
 /** @brief Allocate and initialize a network with `num_layers` slots. */
-NeuralNetwork* create_network(int num_layers);
+NeuralNetwork* create_network(size_t num_layers);
 
 /** @brief Free a network and its associated resources. */
 void free_network(NeuralNetwork* nn);
diff --git a/nn/include/linalg.h b/nn/include/linalg.h
index c2fe74d..d760990 100644
--- a/nn/include/linalg.h
+++ b/nn/include/linalg.h
@@ -1,5 +1,7 @@
 #pragma once
 
+#include <stddef.h>
+
 /**
  * @file linalg.h
  * @brief Matrix data structure and linear algebra primitives.
@@ -20,8 +22,8 @@
 // 1D array used, this is way more performant than a 2D array is
 typedef struct _Matrix {
   double* matrix_data;
-  int rows;
-  int cols;
+  size_t rows;
+  size_t cols;
 } Matrix;
 
 //==============================
@@ -35,7 +37,7 @@ typedef struct _Matrix {
 /** @brief Read a matrix from a text file. */
 Matrix* read_matrix(const char* filename);
 /** @brief Create an uninitialized matrix with given shape. */
-Matrix* create_matrix(int rows, int cols);
+Matrix* create_matrix(size_t rows, size_t cols);
 /** @brief Deep copy a matrix. */
 Matrix* copy_matrix(const Matrix* m);
 /** @brief Flatten a matrix along an axis (implementation-specific). */
@@ -57,7 +59,7 @@ int matrix_argmax(Matrix* m);
 // Matrix Operations
 //============================
 /** @brief Create an identity matrix of size n×n. */
-Matrix* identity_matrix(int n);
+Matrix* identity_matrix(size_t n);
 /** @brief Elementwise addition: result = a + b. */
 Matrix* add_matrix(Matrix* a, Matrix* b);
 /** @brief Elementwise subtraction: result = a - b. */
diff --git a/nn/include/neural_network.h b/nn/include/neural_network.h
index b924457..2e75f0f 100644
--- a/nn/include/neural_network.h
+++ b/nn/include/neural_network.h
@@ -37,8 +37,8 @@ typedef struct {
  * @brief Neural network composed of sequential fully connected layers.
  */
 typedef struct {
-  Layer** layers; /**< Array of layer pointers (length = num_layers). */
-  int num_layers; /**< Number of layers. */
+  Layer** layers;    /**< Array of layer pointers (length = num_layers). */
+  size_t num_layers; /**< Number of layers. */
 
   /** Caches intermediate forward/backward values. */
   Cache* cache;
diff --git a/nn/src/activation/activation.c b/nn/src/activation/activation.c
index 7f5284f..7d6fbe8 100644
--- a/nn/src/activation/activation.c
+++ b/nn/src/activation/activation.c
@@ -22,11 +22,12 @@
 
 Matrix* sigmoid(Matrix* m) {
   ASSERT(m != NULL, "Input matrix is NULL.");
-  LOG_INFO("Applying sigmoid activation to a %dx%d matrix.", m->rows, m->cols);
+  LOG_INFO("Applying sigmoid activation to a %zux%zu matrix.", m->rows,
+           m->cols);
 
   Matrix* result = create_matrix(m->rows, m->cols);
-  int total_elements = m->rows * m->cols;
-  for (int i = 0; i < total_elements; i++) {
+  size_t total_elements = m->rows * m->cols;
+  for (size_t i = 0; i < total_elements; i++) {
     result->matrix_data[i] = 1.0 / (1.0 + exp(-m->matrix_data[i]));
   }
   return result;
@@ -34,12 +35,12 @@ Matrix* sigmoid(Matrix* m) {
 
 Matrix* sigmoid_prime(Matrix* m) {
   ASSERT(m != NULL, "Input matrix is NULL.");
-  LOG_INFO("Applying sigmoid_prime activation to a %dx%d matrix.", m->rows,
+  LOG_INFO("Applying sigmoid_prime activation to a %zux%zu matrix.", m->rows,
            m->cols);
 
   Matrix* result = create_matrix(m->rows, m->cols);
-  int total_elements = m->rows * m->cols;
-  for (int i = 0; i < total_elements; i++) {
+  size_t total_elements = m->rows * m->cols;
+  for (size_t i = 0; i < total_elements; i++) {
     double sigmoid_val = 1.0 / (1.0 + exp(-m->matrix_data[i]));
     result->matrix_data[i] = sigmoid_val * (1.0 - sigmoid_val);
   }
@@ -52,11 +53,11 @@ Matrix* sigmoid_prime(Matrix* m) {
 
 Matrix* relu(Matrix* m) {
   ASSERT(m != NULL, "Input matrix is NULL.");
-  LOG_INFO("Applying ReLU activation to a %dx%d matrix.", m->rows, m->cols);
+  LOG_INFO("Applying ReLU activation to a %zux%zu matrix.", m->rows, m->cols);
 
   Matrix* result = create_matrix(m->rows, m->cols);
-  int total_elements = m->rows * m->cols;
-  for (int i = 0; i < total_elements; i++) {
+  size_t total_elements = m->rows * m->cols;
+  for (size_t i = 0; i < total_elements; i++) {
     if (m->matrix_data[i] > 0) {
       result->matrix_data[i] = m->matrix_data[i];
     } else {
@@ -68,12 +69,12 @@ Matrix* relu(Matrix* m) {
 
 Matrix* relu_prime(Matrix* m) {
   ASSERT(m != NULL, "Input matrix is NULL.");
-  LOG_INFO("Applying ReLU_prime activation to a %dx%d matrix.", m->rows,
+  LOG_INFO("Applying ReLU_prime activation to a %zux%zu matrix.", m->rows,
            m->cols);
 
   Matrix* result = create_matrix(m->rows, m->cols);
-  int total_elements = m->rows * m->cols;
-  for (int i = 0; i < total_elements; i++) {
+  size_t total_elements = m->rows * m->cols;
+  for (size_t i = 0; i < total_elements; i++) {
     if (m->matrix_data[i] > 0) {
       result->matrix_data[i] = 1;
     } else {
@@ -89,11 +90,11 @@ Matrix* relu_prime(Matrix* m) {
 
 Matrix* tanh_activation(Matrix* m) {
   ASSERT(m != NULL, "Input matrix is NULL.");
-  LOG_INFO("Applying Tanh activation to a %dx%d matrix.", m->rows, m->cols);
+  LOG_INFO("Applying Tanh activation to a %zux%zu matrix.", m->rows, m->cols);
 
   Matrix* result = create_matrix(m->rows, m->cols);
-  int total_elements = m->rows * m->cols;
-  for (int i = 0; i < total_elements; i++) {
+  size_t total_elements = m->rows * m->cols;
+  for (size_t i = 0; i < total_elements; i++) {
     result->matrix_data[i] = tanh(m->matrix_data[i]);
   }
   return result;
@@ -101,12 +102,12 @@ Matrix* tanh_activation(Matrix* m) {
 
 Matrix* tanh_prime(Matrix* m) {
   ASSERT(m != NULL, "Input matrix is NULL.");
-  LOG_INFO("Applying Tanh_prime activation to a %dx%d matrix.", m->rows,
+  LOG_INFO("Applying Tanh_prime activation to a %zux%zu matrix.", m->rows,
            m->cols);
 
   Matrix* result = create_matrix(m->rows, m->cols);
-  int total_elements = m->rows * m->cols;
-  for (int i = 0; i < total_elements; i++) {
+  size_t total_elements = m->rows * m->cols;
+  for (size_t i = 0; i < total_elements; i++) {
     double tanh_val = tanh(m->matrix_data[i]);
     result->matrix_data[i] = 1.0 - pow(tanh_val, 2);
   }
@@ -121,12 +122,12 @@ Matrix* tanh_prime(Matrix* m) {
 // This will assume the alpha
 Matrix* leaky_relu(Matrix* m) {
   ASSERT(m != NULL, "Input matrix is NULL.");
-  LOG_INFO("Applying Leaky ReLU activation to a %dx%d matrix.", m->rows,
+  LOG_INFO("Applying Leaky ReLU activation to a %zux%zu matrix.", m->rows,
            m->cols);
 
   Matrix* result = create_matrix(m->rows, m->cols);
-  int total_elements = m->rows * m->cols;
-  for (int i = 0; i < total_elements; i++) {
+  size_t total_elements = m->rows * m->cols;
+  for (size_t i = 0; i < total_elements; i++) {
     if (m->matrix_data[i] > 0) {
       result->matrix_data[i] = m->matrix_data[i];
     } else {
@@ -138,12 +139,12 @@ Matrix* leaky_relu(Matrix* m) {
 
 Matrix* leaky_relu_prime(Matrix* m) {
   ASSERT(m != NULL, "Input matrix is NULL.");
-  LOG_INFO("Applying Leaky ReLU_prime activation to a %dx%d matrix.", m->rows,
+  LOG_INFO("Applying Leaky ReLU_prime activation to a %zux%zu matrix.", m->rows,
            m->cols);
 
   Matrix* result = create_matrix(m->rows, m->cols);
-  int total_elements = m->rows * m->cols;
-  for (int i = 0; i < total_elements; i++) {
+  size_t total_elements = m->rows * m->cols;
+  for (size_t i = 0; i < total_elements; i++) {
     if (m->matrix_data[i] > 0) {
       result->matrix_data[i] = 1;
     } else {
@@ -162,13 +163,13 @@ Matrix* leaky_relu_with_alpha(Matrix* m, double leak_parameter) {
 
   LOG_INFO(
       "Applying Leaky ReLU with leak_parameter=%.2f activation function to a "
-      "%dx%d "
+      "%zux%zu "
       "matrix.",
       leak_parameter, m->rows, m->cols);
 
   Matrix* result = create_matrix(m->rows, m->cols);
-  int total_elements = m->rows * m->cols;
-  for (int i = 0; i < total_elements; i++) {
+  size_t total_elements = m->rows * m->cols;
+  for (size_t i = 0; i < total_elements; i++) {
     if (m->matrix_data[i] > 0) {
       result->matrix_data[i] = m->matrix_data[i];
     } else {
@@ -182,12 +183,13 @@ Matrix* leaky_relu_with_alpha(Matrix* m, double leak_parameter) {
 Matrix* leaky_relu_prime_with_alpha(Matrix* m, double leak_parameter) {
   ASSERT(m != NULL, "Input matrix for leaky_relu_prime is NULL.");
   ASSERT(leak_parameter >= 0.0, "Alpha value must be non-negative.");
-  LOG_INFO("Applying Leaky ReLU with alpha=%.2f derivative to a %dx%d matrix.",
-           leak_parameter, m->rows, m->cols);
+  LOG_INFO(
+      "Applying Leaky ReLU with alpha=%.2f derivative to a %zux%zu matrix.",
+      leak_parameter, m->rows, m->cols);
   Matrix* result = create_matrix(m->rows, m->cols);
-  int total_elements = m->rows * m->cols;
+  size_t total_elements = m->rows * m->cols;
 
-  for (int i = 0; i < total_elements; i++) {
+  for (size_t i = 0; i < total_elements; i++) {
     if (m->matrix_data[i] > 0) {
       result->matrix_data[i] = 1.0;
     } else {
@@ -204,11 +206,11 @@ Matrix* leaky_relu_prime_with_alpha(Matrix* m, double leak_parameter) {
 
 Matrix* sign_activation(Matrix* m) {
   ASSERT(m != NULL, "Input matrix is NULL.");
-  LOG_INFO("Applying Sign activation to a %dx%d matrix.", m->rows, m->cols);
+  LOG_INFO("Applying Sign activation to a %zux%zu matrix.", m->rows, m->cols);
 
   Matrix* result = create_matrix(m->rows, m->cols);
-  int total_elements = m->rows * m->cols;
-  for (int i = 0; i < total_elements; i++) {
+  size_t total_elements = m->rows * m->cols;
+  for (size_t i = 0; i < total_elements; i++) {
     if (m->matrix_data[i] > 0) {
       result->matrix_data[i] = 1.0;
     } else if (m->matrix_data[i] < 0) {
@@ -222,14 +224,14 @@ Matrix* sign_activation(Matrix* m) {
 
 Matrix* sign_prime(Matrix* m) {
   ASSERT(m != NULL, "Input matrix is NULL.");
-  LOG_INFO("Applying Sign_prime activation to a %dx%d matrix.", m->rows,
+  LOG_INFO("Applying Sign_prime activation to a %zux%zu matrix.", m->rows,
            m->cols);
   // The derivative of the sign function is 0 everywhere except at 0, where it
   // is undefined. For backpropagation, the derivative is commonly approximated
   // as 0.
   Matrix* result = create_matrix(m->rows, m->cols);
-  int total_elements = m->rows * m->cols;
-  for (int i = 0; i < total_elements; i++) {
+  size_t total_elements = m->rows * m->cols;
+  for (size_t i = 0; i < total_elements; i++) {
     result->matrix_data[i] = 0.0;
   }
   return result;
@@ -241,18 +243,19 @@ Matrix* sign_prime(Matrix* m) {
 
 Matrix* identity_activation(Matrix* m) {
   ASSERT(m != NULL, "Input matrix is NULL.");
-  LOG_INFO("Applying Identity activation to a %dx%d matrix.", m->rows, m->cols);
+  LOG_INFO("Applying Identity activation to a %zux%zu matrix.", m->rows,
+           m->cols);
   return copy_matrix(m);
 }
 
 Matrix* identity_prime(Matrix* m) {
   ASSERT(m != NULL, "Input matrix is NULL.");
-  LOG_INFO("Applying Identity_prime activation to a %dx%d matrix.", m->rows,
+  LOG_INFO("Applying Identity_prime activation to a %zux%zu matrix.", m->rows,
            m->cols);
 
   Matrix* result = create_matrix(m->rows, m->cols);
-  int total_elements = m->rows * m->cols;
-  for (int i = 0; i < total_elements; i++) {
+  size_t total_elements = m->rows * m->cols;
+  for (size_t i = 0; i < total_elements; i++) {
     result->matrix_data[i] = 1.0;
   }
   return result;
@@ -264,12 +267,12 @@ Matrix* identity_prime(Matrix* m) {
 
 Matrix* hard_tanh(Matrix* m) {
   ASSERT(m != NULL, "Input matrix is NULL.");
-  LOG_INFO("Applying Hard Tanh activation to a %dx%d matrix.", m->rows,
+  LOG_INFO("Applying Hard Tanh activation to a %zux%zu matrix.", m->rows,
            m->cols);
 
   Matrix* result = create_matrix(m->rows, m->cols);
-  int total_elements = m->rows * m->cols;
-  for (int i = 0; i < total_elements; i++) {
+  size_t total_elements = m->rows * m->cols;
+  for (size_t i = 0; i < total_elements; i++) {
     if (m->matrix_data[i] > 1.0) {
       result->matrix_data[i] = 1.0;
     } else if (m->matrix_data[i] < -1.0) {
@@ -283,12 +286,12 @@ Matrix* hard_tanh(Matrix* m) {
 
 Matrix* hard_tanh_prime(Matrix* m) {
   ASSERT(m != NULL, "Input matrix is NULL.");
-  LOG_INFO("Applying Hard Tanh_prime activation to a %dx%d matrix.", m->rows,
+  LOG_INFO("Applying Hard Tanh_prime activation to a %zux%zu matrix.", m->rows,
            m->cols);
 
   Matrix* result = create_matrix(m->rows, m->cols);
-  int total_elements = m->rows * m->cols;
-  for (int i = 0; i < total_elements; i++) {
+  size_t total_elements = m->rows * m->cols;
+  for (size_t i = 0; i < total_elements; i++) {
     if (m->matrix_data[i] > -1.0 && m->matrix_data[i] < 1.0) {
       result->matrix_data[i] = 1.0;
     } else {
diff --git a/nn/src/cache/cache.c b/nn/src/cache/cache.c
index d7bc569..f7352d9 100644
--- a/nn/src/cache/cache.c
+++ b/nn/src/cache/cache.c
@@ -30,7 +30,7 @@ static unsigned int hash(const char* key) {
   // not the most secure, but I'll look into a better hash function
   // later.
   unsigned long long hash = 0;
-  int i = 0;
+  size_t i = 0;
   while (key[i] != '\0') {
     hash = hash * 31 + key[i];
     i++;
@@ -48,7 +48,7 @@ Cache* init_cache() {
     return NULL;
   }
 
-  for (int i = 0; i < HASH_MAP_SIZE; i++) {
+  for (size_t i = 0; i < HASH_MAP_SIZE; i++) {
     cache->entries[i] = NULL;
   }
   return cache;
@@ -101,7 +101,7 @@ void clear_cache(Cache* cache) {
   if (cache == NULL) {
     return;
   }
-  for (int i = 0; i < HASH_MAP_SIZE; i++) {
+  for (size_t i = 0; i < HASH_MAP_SIZE; i++) {
     CacheEntry* current = cache->entries[i];
     while (current != NULL) {
       CacheEntry* to_free = current;
diff --git a/nn/src/linalg/io.c b/nn/src/linalg/io.c
index c957035..77d4142 100644
--- a/nn/src/linalg/io.c
+++ b/nn/src/linalg/io.c
@@ -22,7 +22,7 @@ Matrix* read_matrix(const char* filename) {
   ASSERT(file != NULL, "Failed to open file for matrix loading.");
 
   char entry[1024];
-  int rows = 0, cols = 0;
+  size_t rows = 0, cols = 0;
 
   if (fgets(entry, sizeof(entry), file) == NULL) {
     LOG_ERROR("Could not read rows from file: %s", filename);
@@ -42,7 +42,7 @@ Matrix* read_matrix(const char* filename) {
 
   Matrix* m = create_matrix(rows, cols);
 
-  for (int i = 0; i < rows; i++) {
+  for (size_t i = 0; i < rows; i++) {
     if (fgets(entry, sizeof(entry), file) == NULL) {
       LOG_ERROR("Unexpected end of file while reading matrix data.");
       free_matrix(m);
@@ -51,19 +51,19 @@ Matrix* read_matrix(const char* filename) {
     }
 
     char* line_ptr = entry;
-    for (int j = 0; j < cols; j++) {
+    for (size_t j = 0; j < cols; j++) {
       m->matrix_data[i * cols + j] = strtod(line_ptr, &line_ptr);
     }
   }
 
-  LOG_INFO("Successfully loaded a %dx%d matrix from %s.", m->rows, m->cols,
+  LOG_INFO("Successfully loaded a %zux%zu matrix from %s.", m->rows, m->cols,
            filename);
   fclose(file);
   return m;
 }
 
-Matrix* create_matrix(int rows, int cols) {
-  LOG_INFO("Creating a new matrix of size %dx%d.", rows, cols);
+Matrix* create_matrix(size_t rows, size_t cols) {
+  LOG_INFO("Creating a new matrix of size %zux%zu.", rows, cols);
 
   Matrix* matrix = (Matrix*)malloc(sizeof(Matrix));
   CHECK_MALLOC(matrix, "Failed to allocate memory for Matrix struct.");
@@ -83,7 +83,7 @@ Matrix* create_matrix(int rows, int cols) {
 Matrix* copy_matrix(const Matrix* m) {
   ASSERT(m != NULL, "Input matrix for copy is NULL.");
 
-  LOG_INFO("Copying a %dx%d matrix.", m->rows, m->cols);
+  LOG_INFO("Copying a %zux%zu matrix.", m->rows, m->cols);
   Matrix* new_matrix = create_matrix(m->rows, m->cols);
 
   size_t total_bytes = m->rows * m->cols * sizeof(double);
@@ -98,9 +98,9 @@ Matrix* copy_matrix(const Matrix* m) {
 Matrix* flatten_column_wise(const Matrix* m) {
   Matrix* new_matrix = create_matrix(m->rows * m->cols, 1);
 
-  int k = 0;
-  for (int j = 0; j < m->cols; j++) {
-    for (int i = 0; i < m->rows; i++) {
+  size_t k = 0;
+  for (size_t j = 0; j < m->cols; j++) {
+    for (size_t i = 0; i < m->rows; i++) {
       new_matrix->matrix_data[k] = m->matrix_data[i * m->cols + j];
       k++;
     }
@@ -129,24 +129,25 @@ Matrix* flatten_matrix(Matrix* m, int axis) {
 
 void fill_matrix(Matrix* m, double n) {
   ASSERT(m != NULL, "Input matrix for fill_matrix is NULL.");
-  LOG_INFO("Filling a %dx%d matrix with the value %.2f.", m->rows, m->cols, n);
+  LOG_INFO("Filling a %zux%zu matrix with the value %.2f.", m->rows, m->cols,
+           n);
 
-  for (int i = 0; i < m->rows; i++) {
-    for (int j = 0; j < m->cols; j++) {
+  for (size_t i = 0; i < m->rows; i++) {
+    for (size_t j = 0; j < m->cols; j++) {
       m->matrix_data[i * m->cols + j] = n;
     }
   }
 }
 
 void randomize_matrix(Matrix* m, double n) {
-  LOG_INFO("Randomizing a %dx%d matrix.", m->rows, m->cols);
+  LOG_INFO("Randomizing a %zux%zu matrix.", m->rows, m->cols);
   // Apparently a 1/n or 1/n^2 scaling leads to a vanishing gradient problem
   double min = -1.0 / sqrt(n);
   double max = 1.0 / sqrt(n);
   double range = max - min;
 
-  for (int i = 0; i < m->rows; i++) {
-    for (int j = 0; j < m->cols; j++) {
+  for (size_t i = 0; i < m->rows; i++) {
+    for (size_t j = 0; j < m->cols; j++) {
       double random_value = (double)rand() / (double)RAND_MAX;
       m->matrix_data[i * m->cols + j] = min + random_value * range;
     }
@@ -169,9 +170,9 @@ void free_matrix(Matrix* m) {
 
 void print_matrix(Matrix* m) {
   ASSERT(m != NULL, "Input matrix for print is NULL.");
-  LOG_INFO("Printing matrix of size %dx%d.", m->rows, m->cols);
-  for (int i = 0; i < m->rows; i++) {
-    for (int j = 0; j < m->cols; j++) {
+  LOG_INFO("Printing matrix of size %zux%zu.", m->rows, m->cols);
+  for (size_t i = 0; i < m->rows; i++) {
+    for (size_t j = 0; j < m->cols; j++) {
       printf("%.3f ", m->matrix_data[i * m->cols + j]);
     }
     printf("\n");
@@ -180,16 +181,16 @@ void print_matrix(Matrix* m) {
 
 void write_matrix(Matrix* m, const char* filename) {
   ASSERT(m != NULL, "Input matrix for save is NULL.");
-  LOG_INFO("Saving a %dx%d matrix to file: %s", m->rows, m->cols, filename);
+  LOG_INFO("Saving a %zux%zu matrix to file: %s", m->rows, m->cols, filename);
 
   FILE* file = fopen(filename, "w");
   ASSERT(file != NULL, "Failed to open file for saving matrix.");
 
-  fprintf(file, "%d\n", m->rows);
-  fprintf(file, "%d\n", m->cols);
+  fprintf(file, "%zu\n", m->rows);
+  fprintf(file, "%zu\n", m->cols);
 
-  for (int i = 0; i < m->rows; i++) {
-    for (int j = 0; j < m->cols; j++) {
+  for (size_t i = 0; i < m->rows; i++) {
+    for (size_t j = 0; j < m->cols; j++) {
       fprintf(file, "%.3f ", m->matrix_data[i * m->cols + j]);
     }
     fprintf(file, "\n");
@@ -206,9 +207,9 @@ int matrix_argmax(Matrix* m) {
   double maxValue = INT_MIN;
   int maxIndex = 0;
 
-  for (int i = 0; i < m->rows; i++) {
+  for (size_t i = 0; i < m->rows; i++) {
     if (m->matrix_data[i] > maxValue) {
-      maxIndex = i;
+      maxIndex = (int)i;
       maxValue = m->matrix_data[i];
     }
   }
diff --git a/nn/src/linalg/operations.c b/nn/src/linalg/operations.c
index 4f3f5c2..99a0590 100644
--- a/nn/src/linalg/operations.c
+++ b/nn/src/linalg/operations.c
@@ -19,13 +19,13 @@
 // Functions for Matrix Operations
 //============================
 
-Matrix* identity_matrix(int n) {
-  LOG_INFO("Creating a %dx%d identity matrix.", n, n);
+Matrix* identity_matrix(size_t n) {
+  LOG_INFO("Creating a %zux%zu identity matrix.", n, n);
   ASSERT(n > 0, "Matrix size must be greater than 0.");
   Matrix* m = create_matrix(n, n);
   // Use memset to efficiently initialize all elements to 0
   memset(m->matrix_data, 0, n * n * sizeof(double));
-  for (int i = 0; i < n; i++) {
+  for (size_t i = 0; i < n; i++) {
     m->matrix_data[i * n + i] = 1.0;
   }
   LOG_INFO("Identity matrix created successfully.");
@@ -37,11 +37,11 @@ Matrix* add_matrix(Matrix* a, Matrix* b) {
   ASSERT(a->rows == b->rows && a->cols == b->cols,
          "Matrices must have the same dimensions for addition.");
 
-  LOG_INFO("Adding two %dx%d matrices.", a->rows, a->cols);
+  LOG_INFO("Adding two %zux%zu matrices.", a->rows, a->cols);
   Matrix* result = create_matrix(a->rows, a->cols);
-  int total_elements = a->rows * a->cols;
+  size_t total_elements = a->rows * a->cols;
 
-  for (int i = 0; i < total_elements; i++) {
+  for (size_t i = 0; i < total_elements; i++) {
     result->matrix_data[i] = a->matrix_data[i] + b->matrix_data[i];
   }
 
@@ -54,11 +54,11 @@ Matrix* subtract_matrix(Matrix* a, Matrix* b) {
   ASSERT(a->rows == b->rows && a->cols == b->cols,
          "Matrices must have the same dimensions for subtraction.");
 
-  LOG_INFO("Subtracting two %dx%d matrices.", a->rows, a->cols);
+  LOG_INFO("Subtracting two %zux%zu matrices.", a->rows, a->cols);
   Matrix* result = create_matrix(a->rows, a->cols);
-  int total_elements = a->rows * a->cols;
+  size_t total_elements = a->rows * a->cols;
 
-  for (int i = 0; i < total_elements; i++) {
+  for (size_t i = 0; i < total_elements; i++) {
     result->matrix_data[i] = a->matrix_data[i] - b->matrix_data[i];
   }
 
@@ -72,11 +72,11 @@ Matrix* multiply_matrix(Matrix* a, Matrix* b) {
          "Matrices must have the same dimensions for element-wise "
          "multiplication.");
 
-  LOG_INFO("Multiplying two %dx%d matrices element-wise.", a->rows, a->cols);
+  LOG_INFO("Multiplying two %zux%zu matrices element-wise.", a->rows, a->cols);
   Matrix* result = create_matrix(a->rows, a->cols);
-  int total_elements = a->rows * a->cols;
+  size_t total_elements = a->rows * a->cols;
 
-  for (int i = 0; i < total_elements; i++) {
+  for (size_t i = 0; i < total_elements; i++) {
     result->matrix_data[i] = a->matrix_data[i] * b->matrix_data[i];
   }
 
@@ -86,12 +86,12 @@ Matrix* multiply_matrix(Matrix* a, Matrix* b) {
 
 Matrix* apply_onto_matrix(double (*func)(double), Matrix* m) {
   ASSERT(m != NULL, "Input matrix cannot be NULL.");
-  LOG_INFO("Applying a function to each element of a %dx%d matrix.", m->rows,
+  LOG_INFO("Applying a function to each element of a %zux%zu matrix.", m->rows,
            m->cols);
   Matrix* result = create_matrix(m->rows, m->cols);
-  int total_elements = m->rows * m->cols;
+  size_t total_elements = m->rows * m->cols;
 
-  for (int i = 0; i < total_elements; i++) {
+  for (size_t i = 0; i < total_elements; i++) {
     result->matrix_data[i] = func(m->matrix_data[i]);
   }
 
@@ -101,11 +101,11 @@ Matrix* apply_onto_matrix(double (*func)(double), Matrix* m) {
 
 Matrix* add_scalar_to_matrix(Matrix* m, double n) {
   ASSERT(m != NULL, "Input matrix cannot be NULL.");
-  LOG_INFO("Adding scalar %.2f to a %dx%d matrix.", n, m->rows, m->cols);
+  LOG_INFO("Adding scalar %.2f to a %zux%zu matrix.", n, m->rows, m->cols);
   Matrix* result = create_matrix(m->rows, m->cols);
-  int total_elements = m->rows * m->cols;
+  size_t total_elements = m->rows * m->cols;
 
-  for (int i = 0; i < total_elements; i++) {
+  for (size_t i = 0; i < total_elements; i++) {
     result->matrix_data[i] = m->matrix_data[i] + n;
   }
 
@@ -119,14 +119,14 @@ Matrix* dot_matrix(Matrix* a, Matrix* b) {
          "The number of columns in the first matrix must equal the number of "
          "rows in the second matrix for dot product.");
 
-  LOG_INFO("Performing dot product on a %dx%d and a %dx%d matrix.", a->rows,
+  LOG_INFO("Performing dot product on a %zux%zu and a %zux%zu matrix.", a->rows,
            a->cols, b->rows, b->cols);
   Matrix* result = create_matrix(a->rows, b->cols);
 
-  for (int i = 0; i < a->rows; i++) {
-    for (int j = 0; j < b->cols; j++) {
+  for (size_t i = 0; i < a->rows; i++) {
+    for (size_t j = 0; j < b->cols; j++) {
       double sum = 0;
-      for (int k = 0; k < a->cols; k++) {
+      for (size_t k = 0; k < a->cols; k++) {
         sum +=
             a->matrix_data[i * a->cols + k] * b->matrix_data[k * b->cols + j];
       }
@@ -134,35 +134,35 @@ Matrix* dot_matrix(Matrix* a, Matrix* b) {
     }
   }
 
-  LOG_INFO("Matrix dot product complete. Resulting matrix is %dx%d.",
+  LOG_INFO("Matrix dot product complete. Resulting matrix is %zux%zu.",
            result->rows, result->cols);
   return result;
 }
 
 Matrix* transpose_matrix(Matrix* m) {
   ASSERT(m != NULL, "Input matrix cannot be NULL.");
-  LOG_INFO("Transposing a %dx%d matrix.", m->rows, m->cols);
+  LOG_INFO("Transposing a %zux%zu matrix.", m->rows, m->cols);
   Matrix* result = create_matrix(m->cols, m->rows);
 
-  for (int i = 0; i < m->rows; i++) {
-    for (int j = 0; j < m->cols; j++) {
+  for (size_t i = 0; i < m->rows; i++) {
+    for (size_t j = 0; j < m->cols; j++) {
       result->matrix_data[j * result->cols + i] =
           m->matrix_data[i * m->cols + j];
     }
   }
 
-  LOG_INFO("Matrix transpose complete. Resulting matrix is %dx%d.",
+  LOG_INFO("Matrix transpose complete. Resulting matrix is %zux%zu.",
            result->rows, result->cols);
   return result;
 }
 
 Matrix* scale_matrix(double n, Matrix* m) {
   ASSERT(m != NULL, "Input matrix cannot be NULL.");
-  LOG_INFO("Scaling a %dx%d matrix by %.2f.", m->rows, m->cols, n);
+  LOG_INFO("Scaling a %zux%zu matrix by %.2f.", m->rows, m->cols, n);
   Matrix* result = create_matrix(m->rows, m->cols);
-  int total_elements = m->rows * m->cols;
+  size_t total_elements = m->rows * m->cols;
 
-  for (int i = 0; i < total_elements; i++) {
+  for (size_t i = 0; i < total_elements; i++) {
     result->matrix_data[i] = m->matrix_data[i] * n;
   }
 
diff --git a/nn/src/loss/loss.c b/nn/src/loss/loss.c
index a18c25b..75d2015 100644
--- a/nn/src/loss/loss.c
+++ b/nn/src/loss/loss.c
@@ -54,7 +54,7 @@ double mean_squared_error(const Matrix* y_hat, const Matrix* y) {
   double loss = 0.0;
   int total_elements = y_hat->rows * y_hat->cols;
 
-  for (int i = 0; i < total_elements; i++) {
+  for (size_t i = 0; i < total_elements; i++) {
     double diff = y_hat->matrix_data[i] - y->matrix_data[i];
     loss += pow(diff, 2);
   }
@@ -69,7 +69,7 @@ double categorical_cross_entropy(const Matrix* y_hat, const Matrix* y) {
   double loss = 0.0;
   int total_elements = y_hat->rows * y_hat->cols;
 
-  for (int i = 0; i < total_elements; i++) {
+  for (size_t i = 0; i < total_elements; i++) {
     loss -= y->matrix_data[i] * log(y_hat->matrix_data[i] + EPSILON);
   }
 
@@ -83,7 +83,7 @@ double mean_absolute_error(const Matrix* y_hat, const Matrix* y) {
   double loss = 0.0;
   int total_elements = y_hat->rows * y_hat->cols;
 
-  for (int i = 0; i < total_elements; i++) {
+  for (size_t i = 0; i < total_elements; i++) {
     loss += fabs(y_hat->matrix_data[i] - y->matrix_data[i]);
   }
 
@@ -97,7 +97,7 @@ double binary_cross_entropy(const Matrix* y_hat, const Matrix* y) {
   double loss = 0.0;
   int total_elements = y_hat->rows * y_hat->cols;
 
-  for (int i = 0; i < total_elements; i++) {
+  for (size_t i = 0; i < total_elements; i++) {
     loss -= y->matrix_data[i] * log(y_hat->matrix_data[i] + EPSILON) +
             (1 - y->matrix_data[i]) * log(1 - y_hat->matrix_data[i] + EPSILON);
   }
@@ -112,7 +112,7 @@ Matrix* mean_squared_error_gradient(const Matrix* y_hat, const Matrix* y) {
   Matrix* gradient = create_matrix(y_hat->rows, y_hat->cols);
   int total_elements = y_hat->rows * y_hat->cols;
 
-  for (int i = 0; i < total_elements; i++) {
+  for (size_t i = 0; i < total_elements; i++) {
     gradient->matrix_data[i] =
         2.0 * (y_hat->matrix_data[i] - y->matrix_data[i]);
   }
@@ -129,7 +129,7 @@ Matrix* categorical_cross_entropy_gradient(const Matrix* y_hat,
   Matrix* gradient = create_matrix(y_hat->rows, y_hat->cols);
   int total_elements = y_hat->rows * y_hat->cols;
 
-  for (int i = 0; i < total_elements; i++) {
+  for (size_t i = 0; i < total_elements; i++) {
     gradient->matrix_data[i] =
         -y->matrix_data[i] / (y_hat->matrix_data[i] + EPSILON);
   }
@@ -144,7 +144,7 @@ Matrix* mean_absolute_error_gradient(const Matrix* y_hat, const Matrix* y) {
   Matrix* gradient = create_matrix(y_hat->rows, y_hat->cols);
   int total_elements = y_hat->rows * y_hat->cols;
 
-  for (int i = 0; i < total_elements; i++) {
+  for (size_t i = 0; i < total_elements; i++) {
     if (y_hat->matrix_data[i] > y->matrix_data[i]) {
       gradient->matrix_data[i] = 1.0;
     } else if (y_hat->matrix_data[i] < y->matrix_data[i]) {
@@ -165,7 +165,7 @@ Matrix* binary_cross_entropy_gradient(const Matrix* y_hat, const Matrix* y) {
   Matrix* gradient = create_matrix(y_hat->rows, y_hat->cols);
   int total_elements = y_hat->rows * y_hat->cols;
 
-  for (int i = 0; i < total_elements; i++) {
+  for (size_t i = 0; i < total_elements; i++) {
     gradient->matrix_data[i] =
         (y_hat->matrix_data[i] - y->matrix_data[i]) /
         (y_hat->matrix_data[i] * (1 - y_hat->matrix_data[i]) + EPSILON);
diff --git a/nn/src/neural_network/backprop.c b/nn/src/neural_network/backprop.c
index e772907..5316f95 100644
--- a/nn/src/neural_network/backprop.c
+++ b/nn/src/neural_network/backprop.c
@@ -4,6 +4,7 @@
  */
 #include "backprop.h"
 
+#include <stddef.h>
 #include <stdio.h>
 #include <stdlib.h>
 
@@ -56,11 +57,11 @@ void backpropagate(NeuralNetwork* nn, const Matrix* y_true,
   ASSERT(y_true != NULL, "Ground truth matrix cannot be NULL.");
   ASSERT(loss_func_grad != NULL, "Loss gradient function cannot be NULL.");
 
-  int last_index = nn->num_layers - 1;
+  size_t last_index = nn->num_layers - 1;
 
   // Get y_hat from cache (activation of last layer)
   char a_last_key[32];
-  sprintf(a_last_key, "a_%d", last_index);
+  sprintf(a_last_key, "a_%zu", last_index);
   Matrix* y_hat = get_matrix(nn->cache, a_last_key);
   ASSERT(y_hat != NULL, "Cached prediction (y_hat) not found.");
 
@@ -70,7 +71,7 @@ void backpropagate(NeuralNetwork* nn, const Matrix* y_true,
 
   // delta for output layer: dL/dz = dL/da .* a'(z)
   char z_last_key[32];
-  sprintf(z_last_key, "z_%d", last_index);
+  sprintf(z_last_key, "z_%zu", last_index);
   Matrix* z_last = get_matrix(nn->cache, z_last_key);
   ASSERT(z_last != NULL, "Cached z for last layer not found.");
 
@@ -80,7 +81,7 @@ void backpropagate(NeuralNetwork* nn, const Matrix* y_true,
   ASSERT(delta_last != NULL, "Failed to compute delta for last layer.");
 
   char delta_last_key[32];
-  sprintf(delta_last_key, "delta_%d", last_index);
+  sprintf(delta_last_key, "delta_%zu", last_index);
   put_matrix(nn->cache, delta_last_key, delta_last);
 
   // Clean up temporaries for last layer
@@ -91,10 +92,10 @@ void backpropagate(NeuralNetwork* nn, const Matrix* y_true,
   free_matrix(delta_last);
 
   // Backpropagate through hidden layers
-  for (int i = last_index - 1; i >= 0; i--) {
+  for (size_t i = last_index - 1; i != SIZE_MAX; i--) {
     // delta_{i} = (delta_{i+1} dot W_{i+1}^T) .* a'_i(z_i)
     char delta_next_key[32];
-    sprintf(delta_next_key, "delta_%d", i + 1);
+    sprintf(delta_next_key, "delta_%zu", i + 1);
     Matrix* delta_next = get_matrix(nn->cache, delta_next_key);
     ASSERT(delta_next != NULL, "Cached delta for next layer not found.");
 
@@ -105,7 +106,7 @@ void backpropagate(NeuralNetwork* nn, const Matrix* y_true,
     Matrix* propagated = dot_matrix(delta_next, W_next_T);
 
     char z_key[32];
-    sprintf(z_key, "z_%d", i);
+    sprintf(z_key, "z_%zu", i);
     Matrix* z_i = get_matrix(nn->cache, z_key);
     ASSERT(z_i != NULL, "Cached z for layer not found.");
     Matrix* act_prime_i = activation_derivative_for_layer(nn->layers[i], z_i);
@@ -114,7 +115,7 @@ void backpropagate(NeuralNetwork* nn, const Matrix* y_true,
     ASSERT(delta_i != NULL, "Failed to compute delta for layer.");
 
     char delta_i_key[32];
-    sprintf(delta_i_key, "delta_%d", i);
+    sprintf(delta_i_key, "delta_%zu", i);
     put_matrix(nn->cache, delta_i_key, delta_i);
 
     // Clean up
@@ -127,11 +128,10 @@ void backpropagate(NeuralNetwork* nn, const Matrix* y_true,
   }
 }
 
-Matrix* calculate_weight_gradient(const Cache* cache, int layer_index,
-                                  int total_layers) {
+Matrix* calculate_weight_gradient(const Cache* cache, size_t layer_index,
+                                  size_t total_layers) {
   ASSERT(cache != NULL, "Cache cannot be NULL.");
-  ASSERT(layer_index >= 0 && layer_index < total_layers,
-         "layer_index out of bounds.");
+  ASSERT(layer_index < total_layers, "layer_index out of bounds.");
 
   // Get activation of previous layer (or input)
   Matrix* a_prev = NULL;
@@ -139,14 +139,14 @@ Matrix* calculate_weight_gradient(const Cache* cache, int layer_index,
     a_prev = get_matrix((Cache*)cache, "input");
   } else {
     char a_prev_key[32];
-    sprintf(a_prev_key, "a_%d", layer_index - 1);
+    sprintf(a_prev_key, "a_%zu", layer_index - 1);
     a_prev = get_matrix((Cache*)cache, a_prev_key);
   }
   ASSERT(a_prev != NULL, "Cached previous activation/input not found.");
 
   // Get delta for current layer
   char delta_key[32];
-  sprintf(delta_key, "delta_%d", layer_index);
+  sprintf(delta_key, "delta_%zu", layer_index);
   Matrix* delta_i = get_matrix((Cache*)cache, delta_key);
   ASSERT(delta_i != NULL, "Cached delta for layer not found.");
 
@@ -161,14 +161,13 @@ Matrix* calculate_weight_gradient(const Cache* cache, int layer_index,
   return grad_W;
 }
 
-Matrix* calculate_bias_gradient(const Cache* cache, int layer_index,
-                                int total_layers) {
+Matrix* calculate_bias_gradient(const Cache* cache, size_t layer_index,
+                                size_t total_layers) {
   ASSERT(cache != NULL, "Cache cannot be NULL.");
-  ASSERT(layer_index >= 0 && layer_index < total_layers,
-         "layer_index out of bounds.");
+  ASSERT(layer_index < total_layers, "layer_index out of bounds.");
 
   char delta_key[32];
-  sprintf(delta_key, "delta_%d", layer_index);
+  sprintf(delta_key, "delta_%zu", layer_index);
   Matrix* delta_i = get_matrix((Cache*)cache, delta_key);
   ASSERT(delta_i != NULL, "Cached delta for layer not found.");
 
diff --git a/nn/src/neural_network/feedforward.c b/nn/src/neural_network/feedforward.c
index b77a97f..7033c38 100644
--- a/nn/src/neural_network/feedforward.c
+++ b/nn/src/neural_network/feedforward.c
@@ -12,7 +12,7 @@
 #include "neural_network.h"
 #include "utils.h"
 
-NeuralNetwork* create_network(int num_layers) {
+NeuralNetwork* create_network(size_t num_layers) {
   NeuralNetwork* nn = (NeuralNetwork*)malloc(sizeof(NeuralNetwork));
   if (nn == NULL) {
     LOG_ERROR("Memory allocation failed for Neural Network struct.");
@@ -43,7 +43,7 @@ void free_network(NeuralNetwork* nn) {
   }
 
   if (nn->layers != NULL) {
-    for (int i = 0; i < nn->num_layers; i++) {
+    for (size_t i = 0; i < nn->num_layers; i++) {
       if (nn->layers[i] != NULL) {
         if (nn->layers[i]->weights != NULL) {
           free_matrix(nn->layers[i]->weights);
@@ -73,14 +73,14 @@ Matrix* feedforward(NeuralNetwork* nn, const Matrix* input) {
 
   put_matrix(nn->cache, "input", current_output);
 
-  for (int i = 0; i < nn->num_layers; i++) {
+  for (size_t i = 0; i < nn->num_layers; i++) {
     Matrix* z_linear = dot_matrix(current_output, nn->layers[i]->weights);
     // Bias add returns a new matrix; capture it to avoid dropping the result.
     Matrix* z = add_matrix(z_linear, nn->layers[i]->bias);
 
     // Cache the intermediate pre-activation value (z).
     char z_key[32];
-    sprintf(z_key, "z_%d", i);
+    sprintf(z_key, "z_%zu", i);
     put_matrix(nn->cache, z_key, z);
 
     Matrix* a = NULL;
@@ -92,7 +92,7 @@ Matrix* feedforward(NeuralNetwork* nn, const Matrix* input) {
     }
 
     char a_key[32];
-    sprintf(a_key, "a_%d", i);
+    sprintf(a_key, "a_%zu", i);
     put_matrix(nn->cache, a_key, a);
 
     free_matrix(z_linear);