diff --git a/nn/include/backprop.h b/nn/include/backprop.h index 606f110..29aa941 100644 --- a/nn/include/backprop.h +++ b/nn/include/backprop.h @@ -23,9 +23,9 @@ void backpropagate(NeuralNetwork* nn, const Matrix* y_true, LossFunction loss_func, LossFunctionGrad loss_func_grad); /** @brief Calculate weight gradient for a specific layer. */ -Matrix* calculate_weight_gradient(const Cache* cache, int layer_index, - int total_layers); +Matrix* calculate_weight_gradient(const Cache* cache, size_t layer_index, + size_t total_layers); /** @brief Calculate bias gradient for a specific layer. */ -Matrix* calculate_bias_gradient(const Cache* cache, int layer_index, - int total_layers); +Matrix* calculate_bias_gradient(const Cache* cache, size_t layer_index, + size_t total_layers); diff --git a/nn/include/feedforward.h b/nn/include/feedforward.h index 3037009..fe6d740 100644 --- a/nn/include/feedforward.h +++ b/nn/include/feedforward.h @@ -12,7 +12,7 @@ //============================== /** @brief Allocate and initialize a network with `num_layers` slots. */ -NeuralNetwork* create_network(int num_layers); +NeuralNetwork* create_network(size_t num_layers); /** @brief Free a network and its associated resources. */ void free_network(NeuralNetwork* nn); diff --git a/nn/include/linalg.h b/nn/include/linalg.h index c2fe74d..d760990 100644 --- a/nn/include/linalg.h +++ b/nn/include/linalg.h @@ -1,5 +1,7 @@ #pragma once +#include + /** * @file linalg.h * @brief Matrix data structure and linear algebra primitives. @@ -20,8 +22,8 @@ // 1D array used, this is way more performant than a 2D array is typedef struct _Matrix { double* matrix_data; - int rows; - int cols; + size_t rows; + size_t cols; } Matrix; //============================== @@ -35,7 +37,7 @@ typedef struct _Matrix { /** @brief Read a matrix from a text file. */ Matrix* read_matrix(const char* filename); /** @brief Create an uninitialized matrix with given shape. */ -Matrix* create_matrix(int rows, int cols); +Matrix* create_matrix(size_t rows, size_t cols); /** @brief Deep copy a matrix. */ Matrix* copy_matrix(const Matrix* m); /** @brief Flatten a matrix along an axis (implementation-specific). */ @@ -57,7 +59,7 @@ int matrix_argmax(Matrix* m); // Matrix Operations //============================ /** @brief Create an identity matrix of size n×n. */ -Matrix* identity_matrix(int n); +Matrix* identity_matrix(size_t n); /** @brief Elementwise addition: result = a + b. */ Matrix* add_matrix(Matrix* a, Matrix* b); /** @brief Elementwise subtraction: result = a - b. */ diff --git a/nn/include/neural_network.h b/nn/include/neural_network.h index b924457..2e75f0f 100644 --- a/nn/include/neural_network.h +++ b/nn/include/neural_network.h @@ -37,8 +37,8 @@ typedef struct { * @brief Neural network composed of sequential fully connected layers. */ typedef struct { - Layer** layers; /**< Array of layer pointers (length = num_layers). */ - int num_layers; /**< Number of layers. */ + Layer** layers; /**< Array of layer pointers (length = num_layers). */ + size_t num_layers; /**< Number of layers. */ /** Caches intermediate forward/backward values. */ Cache* cache; diff --git a/nn/src/activation/activation.c b/nn/src/activation/activation.c index 7f5284f..7d6fbe8 100644 --- a/nn/src/activation/activation.c +++ b/nn/src/activation/activation.c @@ -22,11 +22,12 @@ Matrix* sigmoid(Matrix* m) { ASSERT(m != NULL, "Input matrix is NULL."); - LOG_INFO("Applying sigmoid activation to a %dx%d matrix.", m->rows, m->cols); + LOG_INFO("Applying sigmoid activation to a %zux%zu matrix.", m->rows, + m->cols); Matrix* result = create_matrix(m->rows, m->cols); - int total_elements = m->rows * m->cols; - for (int i = 0; i < total_elements; i++) { + size_t total_elements = m->rows * m->cols; + for (size_t i = 0; i < total_elements; i++) { result->matrix_data[i] = 1.0 / (1.0 + exp(-m->matrix_data[i])); } return result; @@ -34,12 +35,12 @@ Matrix* sigmoid(Matrix* m) { Matrix* sigmoid_prime(Matrix* m) { ASSERT(m != NULL, "Input matrix is NULL."); - LOG_INFO("Applying sigmoid_prime activation to a %dx%d matrix.", m->rows, + LOG_INFO("Applying sigmoid_prime activation to a %zux%zu matrix.", m->rows, m->cols); Matrix* result = create_matrix(m->rows, m->cols); - int total_elements = m->rows * m->cols; - for (int i = 0; i < total_elements; i++) { + size_t total_elements = m->rows * m->cols; + for (size_t i = 0; i < total_elements; i++) { double sigmoid_val = 1.0 / (1.0 + exp(-m->matrix_data[i])); result->matrix_data[i] = sigmoid_val * (1.0 - sigmoid_val); } @@ -52,11 +53,11 @@ Matrix* sigmoid_prime(Matrix* m) { Matrix* relu(Matrix* m) { ASSERT(m != NULL, "Input matrix is NULL."); - LOG_INFO("Applying ReLU activation to a %dx%d matrix.", m->rows, m->cols); + LOG_INFO("Applying ReLU activation to a %zux%zu matrix.", m->rows, m->cols); Matrix* result = create_matrix(m->rows, m->cols); - int total_elements = m->rows * m->cols; - for (int i = 0; i < total_elements; i++) { + size_t total_elements = m->rows * m->cols; + for (size_t i = 0; i < total_elements; i++) { if (m->matrix_data[i] > 0) { result->matrix_data[i] = m->matrix_data[i]; } else { @@ -68,12 +69,12 @@ Matrix* relu(Matrix* m) { Matrix* relu_prime(Matrix* m) { ASSERT(m != NULL, "Input matrix is NULL."); - LOG_INFO("Applying ReLU_prime activation to a %dx%d matrix.", m->rows, + LOG_INFO("Applying ReLU_prime activation to a %zux%zu matrix.", m->rows, m->cols); Matrix* result = create_matrix(m->rows, m->cols); - int total_elements = m->rows * m->cols; - for (int i = 0; i < total_elements; i++) { + size_t total_elements = m->rows * m->cols; + for (size_t i = 0; i < total_elements; i++) { if (m->matrix_data[i] > 0) { result->matrix_data[i] = 1; } else { @@ -89,11 +90,11 @@ Matrix* relu_prime(Matrix* m) { Matrix* tanh_activation(Matrix* m) { ASSERT(m != NULL, "Input matrix is NULL."); - LOG_INFO("Applying Tanh activation to a %dx%d matrix.", m->rows, m->cols); + LOG_INFO("Applying Tanh activation to a %zux%zu matrix.", m->rows, m->cols); Matrix* result = create_matrix(m->rows, m->cols); - int total_elements = m->rows * m->cols; - for (int i = 0; i < total_elements; i++) { + size_t total_elements = m->rows * m->cols; + for (size_t i = 0; i < total_elements; i++) { result->matrix_data[i] = tanh(m->matrix_data[i]); } return result; @@ -101,12 +102,12 @@ Matrix* tanh_activation(Matrix* m) { Matrix* tanh_prime(Matrix* m) { ASSERT(m != NULL, "Input matrix is NULL."); - LOG_INFO("Applying Tanh_prime activation to a %dx%d matrix.", m->rows, + LOG_INFO("Applying Tanh_prime activation to a %zux%zu matrix.", m->rows, m->cols); Matrix* result = create_matrix(m->rows, m->cols); - int total_elements = m->rows * m->cols; - for (int i = 0; i < total_elements; i++) { + size_t total_elements = m->rows * m->cols; + for (size_t i = 0; i < total_elements; i++) { double tanh_val = tanh(m->matrix_data[i]); result->matrix_data[i] = 1.0 - pow(tanh_val, 2); } @@ -121,12 +122,12 @@ Matrix* tanh_prime(Matrix* m) { // This will assume the alpha Matrix* leaky_relu(Matrix* m) { ASSERT(m != NULL, "Input matrix is NULL."); - LOG_INFO("Applying Leaky ReLU activation to a %dx%d matrix.", m->rows, + LOG_INFO("Applying Leaky ReLU activation to a %zux%zu matrix.", m->rows, m->cols); Matrix* result = create_matrix(m->rows, m->cols); - int total_elements = m->rows * m->cols; - for (int i = 0; i < total_elements; i++) { + size_t total_elements = m->rows * m->cols; + for (size_t i = 0; i < total_elements; i++) { if (m->matrix_data[i] > 0) { result->matrix_data[i] = m->matrix_data[i]; } else { @@ -138,12 +139,12 @@ Matrix* leaky_relu(Matrix* m) { Matrix* leaky_relu_prime(Matrix* m) { ASSERT(m != NULL, "Input matrix is NULL."); - LOG_INFO("Applying Leaky ReLU_prime activation to a %dx%d matrix.", m->rows, + LOG_INFO("Applying Leaky ReLU_prime activation to a %zux%zu matrix.", m->rows, m->cols); Matrix* result = create_matrix(m->rows, m->cols); - int total_elements = m->rows * m->cols; - for (int i = 0; i < total_elements; i++) { + size_t total_elements = m->rows * m->cols; + for (size_t i = 0; i < total_elements; i++) { if (m->matrix_data[i] > 0) { result->matrix_data[i] = 1; } else { @@ -162,13 +163,13 @@ Matrix* leaky_relu_with_alpha(Matrix* m, double leak_parameter) { LOG_INFO( "Applying Leaky ReLU with leak_parameter=%.2f activation function to a " - "%dx%d " + "%zux%zu " "matrix.", leak_parameter, m->rows, m->cols); Matrix* result = create_matrix(m->rows, m->cols); - int total_elements = m->rows * m->cols; - for (int i = 0; i < total_elements; i++) { + size_t total_elements = m->rows * m->cols; + for (size_t i = 0; i < total_elements; i++) { if (m->matrix_data[i] > 0) { result->matrix_data[i] = m->matrix_data[i]; } else { @@ -182,12 +183,13 @@ Matrix* leaky_relu_with_alpha(Matrix* m, double leak_parameter) { Matrix* leaky_relu_prime_with_alpha(Matrix* m, double leak_parameter) { ASSERT(m != NULL, "Input matrix for leaky_relu_prime is NULL."); ASSERT(leak_parameter >= 0.0, "Alpha value must be non-negative."); - LOG_INFO("Applying Leaky ReLU with alpha=%.2f derivative to a %dx%d matrix.", - leak_parameter, m->rows, m->cols); + LOG_INFO( + "Applying Leaky ReLU with alpha=%.2f derivative to a %zux%zu matrix.", + leak_parameter, m->rows, m->cols); Matrix* result = create_matrix(m->rows, m->cols); - int total_elements = m->rows * m->cols; + size_t total_elements = m->rows * m->cols; - for (int i = 0; i < total_elements; i++) { + for (size_t i = 0; i < total_elements; i++) { if (m->matrix_data[i] > 0) { result->matrix_data[i] = 1.0; } else { @@ -204,11 +206,11 @@ Matrix* leaky_relu_prime_with_alpha(Matrix* m, double leak_parameter) { Matrix* sign_activation(Matrix* m) { ASSERT(m != NULL, "Input matrix is NULL."); - LOG_INFO("Applying Sign activation to a %dx%d matrix.", m->rows, m->cols); + LOG_INFO("Applying Sign activation to a %zux%zu matrix.", m->rows, m->cols); Matrix* result = create_matrix(m->rows, m->cols); - int total_elements = m->rows * m->cols; - for (int i = 0; i < total_elements; i++) { + size_t total_elements = m->rows * m->cols; + for (size_t i = 0; i < total_elements; i++) { if (m->matrix_data[i] > 0) { result->matrix_data[i] = 1.0; } else if (m->matrix_data[i] < 0) { @@ -222,14 +224,14 @@ Matrix* sign_activation(Matrix* m) { Matrix* sign_prime(Matrix* m) { ASSERT(m != NULL, "Input matrix is NULL."); - LOG_INFO("Applying Sign_prime activation to a %dx%d matrix.", m->rows, + LOG_INFO("Applying Sign_prime activation to a %zux%zu matrix.", m->rows, m->cols); // The derivative of the sign function is 0 everywhere except at 0, where it // is undefined. For backpropagation, the derivative is commonly approximated // as 0. Matrix* result = create_matrix(m->rows, m->cols); - int total_elements = m->rows * m->cols; - for (int i = 0; i < total_elements; i++) { + size_t total_elements = m->rows * m->cols; + for (size_t i = 0; i < total_elements; i++) { result->matrix_data[i] = 0.0; } return result; @@ -241,18 +243,19 @@ Matrix* sign_prime(Matrix* m) { Matrix* identity_activation(Matrix* m) { ASSERT(m != NULL, "Input matrix is NULL."); - LOG_INFO("Applying Identity activation to a %dx%d matrix.", m->rows, m->cols); + LOG_INFO("Applying Identity activation to a %zux%zu matrix.", m->rows, + m->cols); return copy_matrix(m); } Matrix* identity_prime(Matrix* m) { ASSERT(m != NULL, "Input matrix is NULL."); - LOG_INFO("Applying Identity_prime activation to a %dx%d matrix.", m->rows, + LOG_INFO("Applying Identity_prime activation to a %zux%zu matrix.", m->rows, m->cols); Matrix* result = create_matrix(m->rows, m->cols); - int total_elements = m->rows * m->cols; - for (int i = 0; i < total_elements; i++) { + size_t total_elements = m->rows * m->cols; + for (size_t i = 0; i < total_elements; i++) { result->matrix_data[i] = 1.0; } return result; @@ -264,12 +267,12 @@ Matrix* identity_prime(Matrix* m) { Matrix* hard_tanh(Matrix* m) { ASSERT(m != NULL, "Input matrix is NULL."); - LOG_INFO("Applying Hard Tanh activation to a %dx%d matrix.", m->rows, + LOG_INFO("Applying Hard Tanh activation to a %zux%zu matrix.", m->rows, m->cols); Matrix* result = create_matrix(m->rows, m->cols); - int total_elements = m->rows * m->cols; - for (int i = 0; i < total_elements; i++) { + size_t total_elements = m->rows * m->cols; + for (size_t i = 0; i < total_elements; i++) { if (m->matrix_data[i] > 1.0) { result->matrix_data[i] = 1.0; } else if (m->matrix_data[i] < -1.0) { @@ -283,12 +286,12 @@ Matrix* hard_tanh(Matrix* m) { Matrix* hard_tanh_prime(Matrix* m) { ASSERT(m != NULL, "Input matrix is NULL."); - LOG_INFO("Applying Hard Tanh_prime activation to a %dx%d matrix.", m->rows, + LOG_INFO("Applying Hard Tanh_prime activation to a %zux%zu matrix.", m->rows, m->cols); Matrix* result = create_matrix(m->rows, m->cols); - int total_elements = m->rows * m->cols; - for (int i = 0; i < total_elements; i++) { + size_t total_elements = m->rows * m->cols; + for (size_t i = 0; i < total_elements; i++) { if (m->matrix_data[i] > -1.0 && m->matrix_data[i] < 1.0) { result->matrix_data[i] = 1.0; } else { diff --git a/nn/src/cache/cache.c b/nn/src/cache/cache.c index d7bc569..f7352d9 100644 --- a/nn/src/cache/cache.c +++ b/nn/src/cache/cache.c @@ -30,7 +30,7 @@ static unsigned int hash(const char* key) { // not the most secure, but I'll look into a better hash function // later. unsigned long long hash = 0; - int i = 0; + size_t i = 0; while (key[i] != '\0') { hash = hash * 31 + key[i]; i++; @@ -48,7 +48,7 @@ Cache* init_cache() { return NULL; } - for (int i = 0; i < HASH_MAP_SIZE; i++) { + for (size_t i = 0; i < HASH_MAP_SIZE; i++) { cache->entries[i] = NULL; } return cache; @@ -101,7 +101,7 @@ void clear_cache(Cache* cache) { if (cache == NULL) { return; } - for (int i = 0; i < HASH_MAP_SIZE; i++) { + for (size_t i = 0; i < HASH_MAP_SIZE; i++) { CacheEntry* current = cache->entries[i]; while (current != NULL) { CacheEntry* to_free = current; diff --git a/nn/src/linalg/io.c b/nn/src/linalg/io.c index c957035..77d4142 100644 --- a/nn/src/linalg/io.c +++ b/nn/src/linalg/io.c @@ -22,7 +22,7 @@ Matrix* read_matrix(const char* filename) { ASSERT(file != NULL, "Failed to open file for matrix loading."); char entry[1024]; - int rows = 0, cols = 0; + size_t rows = 0, cols = 0; if (fgets(entry, sizeof(entry), file) == NULL) { LOG_ERROR("Could not read rows from file: %s", filename); @@ -42,7 +42,7 @@ Matrix* read_matrix(const char* filename) { Matrix* m = create_matrix(rows, cols); - for (int i = 0; i < rows; i++) { + for (size_t i = 0; i < rows; i++) { if (fgets(entry, sizeof(entry), file) == NULL) { LOG_ERROR("Unexpected end of file while reading matrix data."); free_matrix(m); @@ -51,19 +51,19 @@ Matrix* read_matrix(const char* filename) { } char* line_ptr = entry; - for (int j = 0; j < cols; j++) { + for (size_t j = 0; j < cols; j++) { m->matrix_data[i * cols + j] = strtod(line_ptr, &line_ptr); } } - LOG_INFO("Successfully loaded a %dx%d matrix from %s.", m->rows, m->cols, + LOG_INFO("Successfully loaded a %zux%zu matrix from %s.", m->rows, m->cols, filename); fclose(file); return m; } -Matrix* create_matrix(int rows, int cols) { - LOG_INFO("Creating a new matrix of size %dx%d.", rows, cols); +Matrix* create_matrix(size_t rows, size_t cols) { + LOG_INFO("Creating a new matrix of size %zux%zu.", rows, cols); Matrix* matrix = (Matrix*)malloc(sizeof(Matrix)); CHECK_MALLOC(matrix, "Failed to allocate memory for Matrix struct."); @@ -83,7 +83,7 @@ Matrix* create_matrix(int rows, int cols) { Matrix* copy_matrix(const Matrix* m) { ASSERT(m != NULL, "Input matrix for copy is NULL."); - LOG_INFO("Copying a %dx%d matrix.", m->rows, m->cols); + LOG_INFO("Copying a %zux%zu matrix.", m->rows, m->cols); Matrix* new_matrix = create_matrix(m->rows, m->cols); size_t total_bytes = m->rows * m->cols * sizeof(double); @@ -98,9 +98,9 @@ Matrix* copy_matrix(const Matrix* m) { Matrix* flatten_column_wise(const Matrix* m) { Matrix* new_matrix = create_matrix(m->rows * m->cols, 1); - int k = 0; - for (int j = 0; j < m->cols; j++) { - for (int i = 0; i < m->rows; i++) { + size_t k = 0; + for (size_t j = 0; j < m->cols; j++) { + for (size_t i = 0; i < m->rows; i++) { new_matrix->matrix_data[k] = m->matrix_data[i * m->cols + j]; k++; } @@ -129,24 +129,25 @@ Matrix* flatten_matrix(Matrix* m, int axis) { void fill_matrix(Matrix* m, double n) { ASSERT(m != NULL, "Input matrix for fill_matrix is NULL."); - LOG_INFO("Filling a %dx%d matrix with the value %.2f.", m->rows, m->cols, n); + LOG_INFO("Filling a %zux%zu matrix with the value %.2f.", m->rows, m->cols, + n); - for (int i = 0; i < m->rows; i++) { - for (int j = 0; j < m->cols; j++) { + for (size_t i = 0; i < m->rows; i++) { + for (size_t j = 0; j < m->cols; j++) { m->matrix_data[i * m->cols + j] = n; } } } void randomize_matrix(Matrix* m, double n) { - LOG_INFO("Randomizing a %dx%d matrix.", m->rows, m->cols); + LOG_INFO("Randomizing a %zux%zu matrix.", m->rows, m->cols); // Apparently a 1/n or 1/n^2 scaling leads to a vanishing gradient problem double min = -1.0 / sqrt(n); double max = 1.0 / sqrt(n); double range = max - min; - for (int i = 0; i < m->rows; i++) { - for (int j = 0; j < m->cols; j++) { + for (size_t i = 0; i < m->rows; i++) { + for (size_t j = 0; j < m->cols; j++) { double random_value = (double)rand() / (double)RAND_MAX; m->matrix_data[i * m->cols + j] = min + random_value * range; } @@ -169,9 +170,9 @@ void free_matrix(Matrix* m) { void print_matrix(Matrix* m) { ASSERT(m != NULL, "Input matrix for print is NULL."); - LOG_INFO("Printing matrix of size %dx%d.", m->rows, m->cols); - for (int i = 0; i < m->rows; i++) { - for (int j = 0; j < m->cols; j++) { + LOG_INFO("Printing matrix of size %zux%zu.", m->rows, m->cols); + for (size_t i = 0; i < m->rows; i++) { + for (size_t j = 0; j < m->cols; j++) { printf("%.3f ", m->matrix_data[i * m->cols + j]); } printf("\n"); @@ -180,16 +181,16 @@ void print_matrix(Matrix* m) { void write_matrix(Matrix* m, const char* filename) { ASSERT(m != NULL, "Input matrix for save is NULL."); - LOG_INFO("Saving a %dx%d matrix to file: %s", m->rows, m->cols, filename); + LOG_INFO("Saving a %zux%zu matrix to file: %s", m->rows, m->cols, filename); FILE* file = fopen(filename, "w"); ASSERT(file != NULL, "Failed to open file for saving matrix."); - fprintf(file, "%d\n", m->rows); - fprintf(file, "%d\n", m->cols); + fprintf(file, "%zu\n", m->rows); + fprintf(file, "%zu\n", m->cols); - for (int i = 0; i < m->rows; i++) { - for (int j = 0; j < m->cols; j++) { + for (size_t i = 0; i < m->rows; i++) { + for (size_t j = 0; j < m->cols; j++) { fprintf(file, "%.3f ", m->matrix_data[i * m->cols + j]); } fprintf(file, "\n"); @@ -206,9 +207,9 @@ int matrix_argmax(Matrix* m) { double maxValue = INT_MIN; int maxIndex = 0; - for (int i = 0; i < m->rows; i++) { + for (size_t i = 0; i < m->rows; i++) { if (m->matrix_data[i] > maxValue) { - maxIndex = i; + maxIndex = (int)i; maxValue = m->matrix_data[i]; } } diff --git a/nn/src/linalg/operations.c b/nn/src/linalg/operations.c index 4f3f5c2..99a0590 100644 --- a/nn/src/linalg/operations.c +++ b/nn/src/linalg/operations.c @@ -19,13 +19,13 @@ // Functions for Matrix Operations //============================ -Matrix* identity_matrix(int n) { - LOG_INFO("Creating a %dx%d identity matrix.", n, n); +Matrix* identity_matrix(size_t n) { + LOG_INFO("Creating a %zux%zu identity matrix.", n, n); ASSERT(n > 0, "Matrix size must be greater than 0."); Matrix* m = create_matrix(n, n); // Use memset to efficiently initialize all elements to 0 memset(m->matrix_data, 0, n * n * sizeof(double)); - for (int i = 0; i < n; i++) { + for (size_t i = 0; i < n; i++) { m->matrix_data[i * n + i] = 1.0; } LOG_INFO("Identity matrix created successfully."); @@ -37,11 +37,11 @@ Matrix* add_matrix(Matrix* a, Matrix* b) { ASSERT(a->rows == b->rows && a->cols == b->cols, "Matrices must have the same dimensions for addition."); - LOG_INFO("Adding two %dx%d matrices.", a->rows, a->cols); + LOG_INFO("Adding two %zux%zu matrices.", a->rows, a->cols); Matrix* result = create_matrix(a->rows, a->cols); - int total_elements = a->rows * a->cols; + size_t total_elements = a->rows * a->cols; - for (int i = 0; i < total_elements; i++) { + for (size_t i = 0; i < total_elements; i++) { result->matrix_data[i] = a->matrix_data[i] + b->matrix_data[i]; } @@ -54,11 +54,11 @@ Matrix* subtract_matrix(Matrix* a, Matrix* b) { ASSERT(a->rows == b->rows && a->cols == b->cols, "Matrices must have the same dimensions for subtraction."); - LOG_INFO("Subtracting two %dx%d matrices.", a->rows, a->cols); + LOG_INFO("Subtracting two %zux%zu matrices.", a->rows, a->cols); Matrix* result = create_matrix(a->rows, a->cols); - int total_elements = a->rows * a->cols; + size_t total_elements = a->rows * a->cols; - for (int i = 0; i < total_elements; i++) { + for (size_t i = 0; i < total_elements; i++) { result->matrix_data[i] = a->matrix_data[i] - b->matrix_data[i]; } @@ -72,11 +72,11 @@ Matrix* multiply_matrix(Matrix* a, Matrix* b) { "Matrices must have the same dimensions for element-wise " "multiplication."); - LOG_INFO("Multiplying two %dx%d matrices element-wise.", a->rows, a->cols); + LOG_INFO("Multiplying two %zux%zu matrices element-wise.", a->rows, a->cols); Matrix* result = create_matrix(a->rows, a->cols); - int total_elements = a->rows * a->cols; + size_t total_elements = a->rows * a->cols; - for (int i = 0; i < total_elements; i++) { + for (size_t i = 0; i < total_elements; i++) { result->matrix_data[i] = a->matrix_data[i] * b->matrix_data[i]; } @@ -86,12 +86,12 @@ Matrix* multiply_matrix(Matrix* a, Matrix* b) { Matrix* apply_onto_matrix(double (*func)(double), Matrix* m) { ASSERT(m != NULL, "Input matrix cannot be NULL."); - LOG_INFO("Applying a function to each element of a %dx%d matrix.", m->rows, + LOG_INFO("Applying a function to each element of a %zux%zu matrix.", m->rows, m->cols); Matrix* result = create_matrix(m->rows, m->cols); - int total_elements = m->rows * m->cols; + size_t total_elements = m->rows * m->cols; - for (int i = 0; i < total_elements; i++) { + for (size_t i = 0; i < total_elements; i++) { result->matrix_data[i] = func(m->matrix_data[i]); } @@ -101,11 +101,11 @@ Matrix* apply_onto_matrix(double (*func)(double), Matrix* m) { Matrix* add_scalar_to_matrix(Matrix* m, double n) { ASSERT(m != NULL, "Input matrix cannot be NULL."); - LOG_INFO("Adding scalar %.2f to a %dx%d matrix.", n, m->rows, m->cols); + LOG_INFO("Adding scalar %.2f to a %zux%zu matrix.", n, m->rows, m->cols); Matrix* result = create_matrix(m->rows, m->cols); - int total_elements = m->rows * m->cols; + size_t total_elements = m->rows * m->cols; - for (int i = 0; i < total_elements; i++) { + for (size_t i = 0; i < total_elements; i++) { result->matrix_data[i] = m->matrix_data[i] + n; } @@ -119,14 +119,14 @@ Matrix* dot_matrix(Matrix* a, Matrix* b) { "The number of columns in the first matrix must equal the number of " "rows in the second matrix for dot product."); - LOG_INFO("Performing dot product on a %dx%d and a %dx%d matrix.", a->rows, + LOG_INFO("Performing dot product on a %zux%zu and a %zux%zu matrix.", a->rows, a->cols, b->rows, b->cols); Matrix* result = create_matrix(a->rows, b->cols); - for (int i = 0; i < a->rows; i++) { - for (int j = 0; j < b->cols; j++) { + for (size_t i = 0; i < a->rows; i++) { + for (size_t j = 0; j < b->cols; j++) { double sum = 0; - for (int k = 0; k < a->cols; k++) { + for (size_t k = 0; k < a->cols; k++) { sum += a->matrix_data[i * a->cols + k] * b->matrix_data[k * b->cols + j]; } @@ -134,35 +134,35 @@ Matrix* dot_matrix(Matrix* a, Matrix* b) { } } - LOG_INFO("Matrix dot product complete. Resulting matrix is %dx%d.", + LOG_INFO("Matrix dot product complete. Resulting matrix is %zux%zu.", result->rows, result->cols); return result; } Matrix* transpose_matrix(Matrix* m) { ASSERT(m != NULL, "Input matrix cannot be NULL."); - LOG_INFO("Transposing a %dx%d matrix.", m->rows, m->cols); + LOG_INFO("Transposing a %zux%zu matrix.", m->rows, m->cols); Matrix* result = create_matrix(m->cols, m->rows); - for (int i = 0; i < m->rows; i++) { - for (int j = 0; j < m->cols; j++) { + for (size_t i = 0; i < m->rows; i++) { + for (size_t j = 0; j < m->cols; j++) { result->matrix_data[j * result->cols + i] = m->matrix_data[i * m->cols + j]; } } - LOG_INFO("Matrix transpose complete. Resulting matrix is %dx%d.", + LOG_INFO("Matrix transpose complete. Resulting matrix is %zux%zu.", result->rows, result->cols); return result; } Matrix* scale_matrix(double n, Matrix* m) { ASSERT(m != NULL, "Input matrix cannot be NULL."); - LOG_INFO("Scaling a %dx%d matrix by %.2f.", m->rows, m->cols, n); + LOG_INFO("Scaling a %zux%zu matrix by %.2f.", m->rows, m->cols, n); Matrix* result = create_matrix(m->rows, m->cols); - int total_elements = m->rows * m->cols; + size_t total_elements = m->rows * m->cols; - for (int i = 0; i < total_elements; i++) { + for (size_t i = 0; i < total_elements; i++) { result->matrix_data[i] = m->matrix_data[i] * n; } diff --git a/nn/src/loss/loss.c b/nn/src/loss/loss.c index a18c25b..75d2015 100644 --- a/nn/src/loss/loss.c +++ b/nn/src/loss/loss.c @@ -54,7 +54,7 @@ double mean_squared_error(const Matrix* y_hat, const Matrix* y) { double loss = 0.0; int total_elements = y_hat->rows * y_hat->cols; - for (int i = 0; i < total_elements; i++) { + for (size_t i = 0; i < total_elements; i++) { double diff = y_hat->matrix_data[i] - y->matrix_data[i]; loss += pow(diff, 2); } @@ -69,7 +69,7 @@ double categorical_cross_entropy(const Matrix* y_hat, const Matrix* y) { double loss = 0.0; int total_elements = y_hat->rows * y_hat->cols; - for (int i = 0; i < total_elements; i++) { + for (size_t i = 0; i < total_elements; i++) { loss -= y->matrix_data[i] * log(y_hat->matrix_data[i] + EPSILON); } @@ -83,7 +83,7 @@ double mean_absolute_error(const Matrix* y_hat, const Matrix* y) { double loss = 0.0; int total_elements = y_hat->rows * y_hat->cols; - for (int i = 0; i < total_elements; i++) { + for (size_t i = 0; i < total_elements; i++) { loss += fabs(y_hat->matrix_data[i] - y->matrix_data[i]); } @@ -97,7 +97,7 @@ double binary_cross_entropy(const Matrix* y_hat, const Matrix* y) { double loss = 0.0; int total_elements = y_hat->rows * y_hat->cols; - for (int i = 0; i < total_elements; i++) { + for (size_t i = 0; i < total_elements; i++) { loss -= y->matrix_data[i] * log(y_hat->matrix_data[i] + EPSILON) + (1 - y->matrix_data[i]) * log(1 - y_hat->matrix_data[i] + EPSILON); } @@ -112,7 +112,7 @@ Matrix* mean_squared_error_gradient(const Matrix* y_hat, const Matrix* y) { Matrix* gradient = create_matrix(y_hat->rows, y_hat->cols); int total_elements = y_hat->rows * y_hat->cols; - for (int i = 0; i < total_elements; i++) { + for (size_t i = 0; i < total_elements; i++) { gradient->matrix_data[i] = 2.0 * (y_hat->matrix_data[i] - y->matrix_data[i]); } @@ -129,7 +129,7 @@ Matrix* categorical_cross_entropy_gradient(const Matrix* y_hat, Matrix* gradient = create_matrix(y_hat->rows, y_hat->cols); int total_elements = y_hat->rows * y_hat->cols; - for (int i = 0; i < total_elements; i++) { + for (size_t i = 0; i < total_elements; i++) { gradient->matrix_data[i] = -y->matrix_data[i] / (y_hat->matrix_data[i] + EPSILON); } @@ -144,7 +144,7 @@ Matrix* mean_absolute_error_gradient(const Matrix* y_hat, const Matrix* y) { Matrix* gradient = create_matrix(y_hat->rows, y_hat->cols); int total_elements = y_hat->rows * y_hat->cols; - for (int i = 0; i < total_elements; i++) { + for (size_t i = 0; i < total_elements; i++) { if (y_hat->matrix_data[i] > y->matrix_data[i]) { gradient->matrix_data[i] = 1.0; } else if (y_hat->matrix_data[i] < y->matrix_data[i]) { @@ -165,7 +165,7 @@ Matrix* binary_cross_entropy_gradient(const Matrix* y_hat, const Matrix* y) { Matrix* gradient = create_matrix(y_hat->rows, y_hat->cols); int total_elements = y_hat->rows * y_hat->cols; - for (int i = 0; i < total_elements; i++) { + for (size_t i = 0; i < total_elements; i++) { gradient->matrix_data[i] = (y_hat->matrix_data[i] - y->matrix_data[i]) / (y_hat->matrix_data[i] * (1 - y_hat->matrix_data[i]) + EPSILON); diff --git a/nn/src/neural_network/backprop.c b/nn/src/neural_network/backprop.c index e772907..5316f95 100644 --- a/nn/src/neural_network/backprop.c +++ b/nn/src/neural_network/backprop.c @@ -4,6 +4,7 @@ */ #include "backprop.h" +#include #include #include @@ -56,11 +57,11 @@ void backpropagate(NeuralNetwork* nn, const Matrix* y_true, ASSERT(y_true != NULL, "Ground truth matrix cannot be NULL."); ASSERT(loss_func_grad != NULL, "Loss gradient function cannot be NULL."); - int last_index = nn->num_layers - 1; + size_t last_index = nn->num_layers - 1; // Get y_hat from cache (activation of last layer) char a_last_key[32]; - sprintf(a_last_key, "a_%d", last_index); + sprintf(a_last_key, "a_%zu", last_index); Matrix* y_hat = get_matrix(nn->cache, a_last_key); ASSERT(y_hat != NULL, "Cached prediction (y_hat) not found."); @@ -70,7 +71,7 @@ void backpropagate(NeuralNetwork* nn, const Matrix* y_true, // delta for output layer: dL/dz = dL/da .* a'(z) char z_last_key[32]; - sprintf(z_last_key, "z_%d", last_index); + sprintf(z_last_key, "z_%zu", last_index); Matrix* z_last = get_matrix(nn->cache, z_last_key); ASSERT(z_last != NULL, "Cached z for last layer not found."); @@ -80,7 +81,7 @@ void backpropagate(NeuralNetwork* nn, const Matrix* y_true, ASSERT(delta_last != NULL, "Failed to compute delta for last layer."); char delta_last_key[32]; - sprintf(delta_last_key, "delta_%d", last_index); + sprintf(delta_last_key, "delta_%zu", last_index); put_matrix(nn->cache, delta_last_key, delta_last); // Clean up temporaries for last layer @@ -91,10 +92,10 @@ void backpropagate(NeuralNetwork* nn, const Matrix* y_true, free_matrix(delta_last); // Backpropagate through hidden layers - for (int i = last_index - 1; i >= 0; i--) { + for (size_t i = last_index - 1; i != SIZE_MAX; i--) { // delta_{i} = (delta_{i+1} dot W_{i+1}^T) .* a'_i(z_i) char delta_next_key[32]; - sprintf(delta_next_key, "delta_%d", i + 1); + sprintf(delta_next_key, "delta_%zu", i + 1); Matrix* delta_next = get_matrix(nn->cache, delta_next_key); ASSERT(delta_next != NULL, "Cached delta for next layer not found."); @@ -105,7 +106,7 @@ void backpropagate(NeuralNetwork* nn, const Matrix* y_true, Matrix* propagated = dot_matrix(delta_next, W_next_T); char z_key[32]; - sprintf(z_key, "z_%d", i); + sprintf(z_key, "z_%zu", i); Matrix* z_i = get_matrix(nn->cache, z_key); ASSERT(z_i != NULL, "Cached z for layer not found."); Matrix* act_prime_i = activation_derivative_for_layer(nn->layers[i], z_i); @@ -114,7 +115,7 @@ void backpropagate(NeuralNetwork* nn, const Matrix* y_true, ASSERT(delta_i != NULL, "Failed to compute delta for layer."); char delta_i_key[32]; - sprintf(delta_i_key, "delta_%d", i); + sprintf(delta_i_key, "delta_%zu", i); put_matrix(nn->cache, delta_i_key, delta_i); // Clean up @@ -127,11 +128,10 @@ void backpropagate(NeuralNetwork* nn, const Matrix* y_true, } } -Matrix* calculate_weight_gradient(const Cache* cache, int layer_index, - int total_layers) { +Matrix* calculate_weight_gradient(const Cache* cache, size_t layer_index, + size_t total_layers) { ASSERT(cache != NULL, "Cache cannot be NULL."); - ASSERT(layer_index >= 0 && layer_index < total_layers, - "layer_index out of bounds."); + ASSERT(layer_index < total_layers, "layer_index out of bounds."); // Get activation of previous layer (or input) Matrix* a_prev = NULL; @@ -139,14 +139,14 @@ Matrix* calculate_weight_gradient(const Cache* cache, int layer_index, a_prev = get_matrix((Cache*)cache, "input"); } else { char a_prev_key[32]; - sprintf(a_prev_key, "a_%d", layer_index - 1); + sprintf(a_prev_key, "a_%zu", layer_index - 1); a_prev = get_matrix((Cache*)cache, a_prev_key); } ASSERT(a_prev != NULL, "Cached previous activation/input not found."); // Get delta for current layer char delta_key[32]; - sprintf(delta_key, "delta_%d", layer_index); + sprintf(delta_key, "delta_%zu", layer_index); Matrix* delta_i = get_matrix((Cache*)cache, delta_key); ASSERT(delta_i != NULL, "Cached delta for layer not found."); @@ -161,14 +161,13 @@ Matrix* calculate_weight_gradient(const Cache* cache, int layer_index, return grad_W; } -Matrix* calculate_bias_gradient(const Cache* cache, int layer_index, - int total_layers) { +Matrix* calculate_bias_gradient(const Cache* cache, size_t layer_index, + size_t total_layers) { ASSERT(cache != NULL, "Cache cannot be NULL."); - ASSERT(layer_index >= 0 && layer_index < total_layers, - "layer_index out of bounds."); + ASSERT(layer_index < total_layers, "layer_index out of bounds."); char delta_key[32]; - sprintf(delta_key, "delta_%d", layer_index); + sprintf(delta_key, "delta_%zu", layer_index); Matrix* delta_i = get_matrix((Cache*)cache, delta_key); ASSERT(delta_i != NULL, "Cached delta for layer not found."); diff --git a/nn/src/neural_network/feedforward.c b/nn/src/neural_network/feedforward.c index b77a97f..7033c38 100644 --- a/nn/src/neural_network/feedforward.c +++ b/nn/src/neural_network/feedforward.c @@ -12,7 +12,7 @@ #include "neural_network.h" #include "utils.h" -NeuralNetwork* create_network(int num_layers) { +NeuralNetwork* create_network(size_t num_layers) { NeuralNetwork* nn = (NeuralNetwork*)malloc(sizeof(NeuralNetwork)); if (nn == NULL) { LOG_ERROR("Memory allocation failed for Neural Network struct."); @@ -43,7 +43,7 @@ void free_network(NeuralNetwork* nn) { } if (nn->layers != NULL) { - for (int i = 0; i < nn->num_layers; i++) { + for (size_t i = 0; i < nn->num_layers; i++) { if (nn->layers[i] != NULL) { if (nn->layers[i]->weights != NULL) { free_matrix(nn->layers[i]->weights); @@ -73,14 +73,14 @@ Matrix* feedforward(NeuralNetwork* nn, const Matrix* input) { put_matrix(nn->cache, "input", current_output); - for (int i = 0; i < nn->num_layers; i++) { + for (size_t i = 0; i < nn->num_layers; i++) { Matrix* z_linear = dot_matrix(current_output, nn->layers[i]->weights); // Bias add returns a new matrix; capture it to avoid dropping the result. Matrix* z = add_matrix(z_linear, nn->layers[i]->bias); // Cache the intermediate pre-activation value (z). char z_key[32]; - sprintf(z_key, "z_%d", i); + sprintf(z_key, "z_%zu", i); put_matrix(nn->cache, z_key, z); Matrix* a = NULL; @@ -92,7 +92,7 @@ Matrix* feedforward(NeuralNetwork* nn, const Matrix* input) { } char a_key[32]; - sprintf(a_key, "a_%d", i); + sprintf(a_key, "a_%zu", i); put_matrix(nn->cache, a_key, a); free_matrix(z_linear);