From c1cfd30ffe7cfd217a4a97cf9b94ae1ae5f518d2 Mon Sep 17 00:00:00 2001 From: Jimmy Young Date: Thu, 26 Sep 2024 01:29:10 -0600 Subject: [PATCH 01/10] Implemented covariance calculation + Covariance Test cases + NDArray::cov function in numpower to expose to PHP + Covariance implementation without Hstack since not in branch --- numpower.c | 47 +++++++++++++++ src/ndmath/statistics.c | 60 +++++++++++++++++++ src/ndmath/statistics.h | 1 + tests/math/048-ndarray-cov.phpt | 101 ++++++++++++++++++++++++++++++++ 4 files changed, 209 insertions(+) create mode 100644 tests/math/048-ndarray-cov.phpt diff --git a/numpower.c b/numpower.c index fcc4989..c64b24d 100644 --- a/numpower.c +++ b/numpower.c @@ -2894,6 +2894,52 @@ PHP_METHOD(NDArray, variance) { RETURN_NDARRAY(rtn, return_value); } +/** + * NDArray::cov + * + * @param execute_data + * @param return_value + */ +ZEND_BEGIN_ARG_INFO_EX(arginfo_ndarray_cov, 0, 0, 1) +ZEND_ARG_INFO(0, array) +ZEND_ARG_INFO(0, axis) +ZEND_END_ARG_INFO() +PHP_METHOD(NDArray, cov) { + NDArray *rtn = NULL; + zval *array; + long axis; + int i_axis; + ZEND_PARSE_PARAMETERS_START(1, 1) + Z_PARAM_ZVAL(array) + ZEND_PARSE_PARAMETERS_END(); + i_axis = (int)axis; + NDArray *nda = ZVAL_TO_NDARRAY(array); + if (nda == NULL) { + return; + } + + if (NDArray_DEVICE(nda) == NDARRAY_DEVICE_CPU) { + rtn = NDArray_cov(nda); + } else { +#ifdef HAVE_CUBLAS + if (ZEND_NUM_ARGS() == 1) { + rtn = NDArray_cov(nda); + } else { + rtn = single_reduce(nda, &i_axis, NDArray_Mean_Float); + } +#else + zend_throw_error(NULL, "GPU operations unavailable. CUBLAS not detected."); +#endif + } + if (rtn == NULL) { + return; + } + if (Z_TYPE_P(array) == IS_ARRAY) { + NDArray_FREE(nda); + } + RETURN_NDARRAY(rtn, return_value); +} + /** * NDArray::ceil * @@ -5180,6 +5226,7 @@ static const zend_function_entry class_NDArray_methods[] = { ZEND_ME(NDArray, average, arginfo_ndarray_average, ZEND_ACC_PUBLIC | ZEND_ACC_STATIC) ZEND_ME(NDArray, std, arginfo_ndarray_std, ZEND_ACC_PUBLIC | ZEND_ACC_STATIC) ZEND_ME(NDArray, quantile, arginfo_ndarray_quantile, ZEND_ACC_PUBLIC | ZEND_ACC_STATIC) + ZEND_ME(NDArray, cov, arginfo_ndarray_cov, ZEND_ACC_PUBLIC | ZEND_ACC_STATIC) // ARITHMETICS ZEND_ME(NDArray, add, arginfo_ndarray_add, ZEND_ACC_PUBLIC | ZEND_ACC_STATIC) diff --git a/src/ndmath/statistics.c b/src/ndmath/statistics.c index 5bd8aff..b232ce0 100644 --- a/src/ndmath/statistics.c +++ b/src/ndmath/statistics.c @@ -3,6 +3,8 @@ #include "string.h" #include "../initializers.h" #include "arithmetics.h" +#include "../manipulation.h" +#include "linalg.h" // Comparison function for sorting int compare_quantile(const void* a, const void* b) { @@ -151,4 +153,62 @@ NDArray_Average(NDArray *a, NDArray *weights) { NDArray_FREE(m_weights); } return rtn; +} + +/** + * NDArray::cov + * + * @param a + * @return + */ +NDArray *NDArray_cov(NDArray *a) +{ + if (a == NULL || NDArray_NUMELEMENTS(a) == 0) + { + zend_throw_error(NULL, "Input cannot be null or empty."); + return NULL; + } + if (NDArray_NDIM(a) != 2 || NDArray_SHAPE(a)[1] == 1) + { + zend_throw_error(NULL, "Input must be a 2D NDArray."); + return NULL; + } + + int cols = NDArray_SHAPE(a)[0]; + int rows = NDArray_SHAPE(a)[1]; + + float *a_data = (float *)emalloc(sizeof(float) * NDArray_NUMELEMENTS(a)); + memcpy(a_data, NDArray_FDATA(a), rows * cols * sizeof(float)); + + int *col_shape = emalloc(sizeof(int) * 2); + col_shape[0] = rows; + col_shape[1] = 1; + + NDArray **norm_vectors = emalloc(sizeof(NDArray *) * cols); + for (int i = 0; i < cols; i++) + { + NDArray *col_vector = NDArray_Zeros(col_shape, 2, NDArray_TYPE(a), NDArray_DEVICE(a)); + size_t offset = i * rows * sizeof(char); + memcpy(NDArray_FDATA(col_vector), a_data + offset, rows * sizeof(float)); + NDArray *mean = NDArray_CreateFromFloatScalar(NDArray_Sum_Float(col_vector) / NDArray_NUMELEMENTS(col_vector)); + NDArray *subtracted = NDArray_Subtract_Float(col_vector, mean); + efree(col_vector); + efree(mean); + norm_vectors[i] = subtracted; + } + efree(a_data); + NDArray *norm_a = NDArray_Reshape(NDArray_ConcatenateFlat(norm_vectors, cols), NDArray_SHAPE(a), NDArray_NDIM(a)); + for (int i = 0; i < cols; i++) + { + efree(norm_vectors[i]); + } + efree(col_shape); + efree(norm_vectors); + NDArray *norm_a_T = NDArray_Transpose(norm_a, NULL); + NDArray *multiplied = NDArray_Dot(norm_a, norm_a_T); + efree(norm_a); + efree(norm_a_T); + NDArray *rtn = NDArray_Divide_Float(multiplied, NDArray_CreateFromFloatScalar((float)rows - 1)); + efree(multiplied); + return rtn; } \ No newline at end of file diff --git a/src/ndmath/statistics.h b/src/ndmath/statistics.h index bb4c643..96f51c2 100644 --- a/src/ndmath/statistics.h +++ b/src/ndmath/statistics.h @@ -7,5 +7,6 @@ NDArray* NDArray_Quantile(NDArray *target, NDArray *q); NDArray* NDArray_Std(NDArray *a); NDArray* NDArray_Variance(NDArray *a); NDArray* NDArray_Average(NDArray *a, NDArray *weights); +NDArray* NDArray_cov(NDArray *matrix); #endif //NUMPOWER_STATISTICS_H diff --git a/tests/math/048-ndarray-cov.phpt b/tests/math/048-ndarray-cov.phpt new file mode 100644 index 0000000..10d8275 --- /dev/null +++ b/tests/math/048-ndarray-cov.phpt @@ -0,0 +1,101 @@ +--TEST-- +NDArray::cov +--FILE-- +toArray()); +$b = \NDArray::array([[1, 2, 3, 4], [5, 4, 3, 2]]); +print_r(\NDArray::cov($b)->toArray()); +$c = \NDArray::array([[1, 2, 3, 4], [5, 6, 7, 8]]); +print_r(\NDArray::cov($c)->toArray()); +$d = \NDArray::array([[1, 2, 3, 4], [1, 2, 3, 4]]); +print_r(\NDArray::cov($d)->toArray()); +$e = \NDArray::array([[1, 2, 3, 4]]); +print_r(\NDArray::cov($e)->toArray()); +$f = \NDArray::array([[0, 0, 0, 0], [0, 0, 0, 0]]); +print_r(\NDArray::cov($f)->toArray()); +?> +--EXPECT-- +Array +( + [0] => Array + ( + [0] => 7 + [1] => 2 + ) + + [1] => Array + ( + [0] => 2 + [1] => 1 + ) + +) +Array +( + [0] => Array + ( + [0] => 1.6666666269302 + [1] => -1.6666666269302 + ) + + [1] => Array + ( + [0] => -1.6666666269302 + [1] => 1.6666666269302 + ) + +) +Array +( + [0] => Array + ( + [0] => 1.6666666269302 + [1] => 1.6666666269302 + ) + + [1] => Array + ( + [0] => 1.6666666269302 + [1] => 1.6666666269302 + ) + +) +Array +( + [0] => Array + ( + [0] => 1.6666666269302 + [1] => 1.6666666269302 + ) + + [1] => Array + ( + [0] => 1.6666666269302 + [1] => 1.6666666269302 + ) + +) +Array +( + [0] => Array + ( + [0] => 1.6666666269302 + ) + +) +Array +( + [0] => Array + ( + [0] => 0 + [1] => 0 + ) + + [1] => Array + ( + [0] => 0 + [1] => 0 + ) + +) \ No newline at end of file From b49b497a336be01fd740961b4815d2a4e205ff20 Mon Sep 17 00:00:00 2001 From: Jimmy Young Date: Thu, 26 Sep 2024 13:44:11 -0600 Subject: [PATCH 02/10] Combined transpose and dot product to decrease pointer allocations Combined transpose and matrix multiplication in Covariance function. --- src/ndmath/statistics.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/src/ndmath/statistics.c b/src/ndmath/statistics.c index b232ce0..7ec0037 100644 --- a/src/ndmath/statistics.c +++ b/src/ndmath/statistics.c @@ -204,10 +204,8 @@ NDArray *NDArray_cov(NDArray *a) } efree(col_shape); efree(norm_vectors); - NDArray *norm_a_T = NDArray_Transpose(norm_a, NULL); - NDArray *multiplied = NDArray_Dot(norm_a, norm_a_T); + NDArray *multiplied = NDArray_Dot(norm_a, NDArray_Transpose(norm_a, NULL)); efree(norm_a); - efree(norm_a_T); NDArray *rtn = NDArray_Divide_Float(multiplied, NDArray_CreateFromFloatScalar((float)rows - 1)); efree(multiplied); return rtn; From 00242921c406671abda57a330d842fafc4c86cd6 Mon Sep 17 00:00:00 2001 From: Jimmy Young Date: Thu, 26 Sep 2024 13:48:30 -0600 Subject: [PATCH 03/10] Fixed naming for param in cov Fixed naming of NDArray param for covariance function --- src/ndmath/statistics.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/ndmath/statistics.h b/src/ndmath/statistics.h index 96f51c2..b192f9c 100644 --- a/src/ndmath/statistics.h +++ b/src/ndmath/statistics.h @@ -7,6 +7,6 @@ NDArray* NDArray_Quantile(NDArray *target, NDArray *q); NDArray* NDArray_Std(NDArray *a); NDArray* NDArray_Variance(NDArray *a); NDArray* NDArray_Average(NDArray *a, NDArray *weights); -NDArray* NDArray_cov(NDArray *matrix); +NDArray* NDArray_cov(NDArray *a); #endif //NUMPOWER_STATISTICS_H From 18fe72b76d1fb9320bdc428ea70a5ece27d132e3 Mon Sep 17 00:00:00 2001 From: Jimmy Young Date: Thu, 26 Sep 2024 15:44:10 -0600 Subject: [PATCH 04/10] Added Boolean for matrix param in row-major order - Cleaned up unused variables for covariance exposure function in numpower.c + added argument for inputting matrix with the rows defined as cols --- numpower.c | 18 +++++++----------- src/ndmath/statistics.c | 6 +++++- src/ndmath/statistics.h | 2 +- tests/math/048-ndarray-cov.phpt | 26 ++++++++++++++++++++++++++ 4 files changed, 39 insertions(+), 13 deletions(-) diff --git a/numpower.c b/numpower.c index c64b24d..db93b55 100644 --- a/numpower.c +++ b/numpower.c @@ -2902,31 +2902,27 @@ PHP_METHOD(NDArray, variance) { */ ZEND_BEGIN_ARG_INFO_EX(arginfo_ndarray_cov, 0, 0, 1) ZEND_ARG_INFO(0, array) -ZEND_ARG_INFO(0, axis) +ZEND_ARG_INFO(0, rowvar) ZEND_END_ARG_INFO() PHP_METHOD(NDArray, cov) { NDArray *rtn = NULL; zval *array; - long axis; - int i_axis; - ZEND_PARSE_PARAMETERS_START(1, 1) + bool rowvar = true; + ZEND_PARSE_PARAMETERS_START(1, 2) Z_PARAM_ZVAL(array) + Z_PARAM_OPTIONAL + Z_PARAM_BOOL(rowvar) ZEND_PARSE_PARAMETERS_END(); - i_axis = (int)axis; NDArray *nda = ZVAL_TO_NDARRAY(array); if (nda == NULL) { return; } if (NDArray_DEVICE(nda) == NDARRAY_DEVICE_CPU) { - rtn = NDArray_cov(nda); + rtn = NDArray_cov(nda, rowvar); } else { #ifdef HAVE_CUBLAS - if (ZEND_NUM_ARGS() == 1) { - rtn = NDArray_cov(nda); - } else { - rtn = single_reduce(nda, &i_axis, NDArray_Mean_Float); - } + rtn = NDArray_cov(nda, rowvar); #else zend_throw_error(NULL, "GPU operations unavailable. CUBLAS not detected."); #endif diff --git a/src/ndmath/statistics.c b/src/ndmath/statistics.c index 7ec0037..397b2ca 100644 --- a/src/ndmath/statistics.c +++ b/src/ndmath/statistics.c @@ -161,8 +161,12 @@ NDArray_Average(NDArray *a, NDArray *weights) { * @param a * @return */ -NDArray *NDArray_cov(NDArray *a) +NDArray *NDArray_cov(NDArray *a, bool rowvar) { + if (!rowvar) { + a = NDArray_Transpose(a, NULL); + } + if (a == NULL || NDArray_NUMELEMENTS(a) == 0) { zend_throw_error(NULL, "Input cannot be null or empty."); diff --git a/src/ndmath/statistics.h b/src/ndmath/statistics.h index b192f9c..d9c645a 100644 --- a/src/ndmath/statistics.h +++ b/src/ndmath/statistics.h @@ -7,6 +7,6 @@ NDArray* NDArray_Quantile(NDArray *target, NDArray *q); NDArray* NDArray_Std(NDArray *a); NDArray* NDArray_Variance(NDArray *a); NDArray* NDArray_Average(NDArray *a, NDArray *weights); -NDArray* NDArray_cov(NDArray *a); +NDArray* NDArray_cov(NDArray *a, bool rowvar); #endif //NUMPOWER_STATISTICS_H diff --git a/tests/math/048-ndarray-cov.phpt b/tests/math/048-ndarray-cov.phpt index 10d8275..0c501f5 100644 --- a/tests/math/048-ndarray-cov.phpt +++ b/tests/math/048-ndarray-cov.phpt @@ -14,6 +14,8 @@ $e = \NDArray::array([[1, 2, 3, 4]]); print_r(\NDArray::cov($e)->toArray()); $f = \NDArray::array([[0, 0, 0, 0], [0, 0, 0, 0]]); print_r(\NDArray::cov($f)->toArray()); +$g = \NDArray::array([[3, 7, 8], [2, 4, 3]]); +print_r(\NDArray::cov($g, False)->toArray()); ?> --EXPECT-- Array @@ -98,4 +100,28 @@ Array [1] => 0 ) +) +Array +( + [0] => Array + ( + [0] => 0.5 + [1] => 1.5 + [2] => 2.5 + ) + + [1] => Array + ( + [0] => 1.5 + [1] => 4.5 + [2] => 7.5 + ) + + [2] => Array + ( + [0] => 2.5 + [1] => 7.5 + [2] => 12.5 + ) + ) \ No newline at end of file From 8070fcb26ae2b10e4fe8b1273486d13bf1640057 Mon Sep 17 00:00:00 2001 From: Jimmy Young Date: Fri, 27 Sep 2024 22:31:43 -0600 Subject: [PATCH 05/10] Update cov function to use slice method for improved data handling + Refactored the NDArray_cov function in statistics.c to utilize the slice method for data extraction. + Replaced manual data copying with the slice method to improve code readability and maintainability. + Verified that the function maintains its original functionality and correctness after the refactor. --- src/ndmath/statistics.c | 22 +++++++++++++++++++--- 1 file changed, 19 insertions(+), 3 deletions(-) diff --git a/src/ndmath/statistics.c b/src/ndmath/statistics.c index 397b2ca..7109da8 100644 --- a/src/ndmath/statistics.c +++ b/src/ndmath/statistics.c @@ -189,17 +189,33 @@ NDArray *NDArray_cov(NDArray *a, bool rowvar) col_shape[1] = 1; NDArray **norm_vectors = emalloc(sizeof(NDArray *) * cols); + + int *indices_shape = emalloc(sizeof(int) * 2); + indices_shape[0] = 2; + indices_shape[1] = 1; + + NDArray** indices_axis = emalloc(sizeof(NDArray*) * 2); + indices_axis[0] = NDArray_Zeros(indices_shape, 1, NDArray_TYPE(a), NDArray_DEVICE(a)); + indices_axis[1] = NDArray_Zeros(indices_shape, 1, NDArray_TYPE(a), NDArray_DEVICE(a)); + + NDArray_FDATA(indices_axis[1])[0] = 0; + NDArray_FDATA(indices_axis[1])[1] = rows; + for (int i = 0; i < cols; i++) { - NDArray *col_vector = NDArray_Zeros(col_shape, 2, NDArray_TYPE(a), NDArray_DEVICE(a)); - size_t offset = i * rows * sizeof(char); - memcpy(NDArray_FDATA(col_vector), a_data + offset, rows * sizeof(float)); + NDArray_FDATA(indices_axis[0])[0] = i; + NDArray_FDATA(indices_axis[0])[1] = i + 1; + NDArray *col_vector = NDArray_Slice(a, indices_axis, 2); NDArray *mean = NDArray_CreateFromFloatScalar(NDArray_Sum_Float(col_vector) / NDArray_NUMELEMENTS(col_vector)); NDArray *subtracted = NDArray_Subtract_Float(col_vector, mean); efree(col_vector); efree(mean); norm_vectors[i] = subtracted; } + efree(indices_shape); + efree(indices_axis[0]); + efree(indices_axis[1]); + efree(indices_axis); efree(a_data); NDArray *norm_a = NDArray_Reshape(NDArray_ConcatenateFlat(norm_vectors, cols), NDArray_SHAPE(a), NDArray_NDIM(a)); for (int i = 0; i < cols; i++) From 19f3b210cd2c41c59ba2102d22b7f51d5a79bc19 Mon Sep 17 00:00:00 2001 From: Jimmy Young Date: Wed, 2 Oct 2024 18:50:47 -0600 Subject: [PATCH 06/10] Removed unused a_data array in covariance function - Removed the unused a_data array - Removed the unused col_shape array - Ensured that the function maintains its original functionality and correctness. - Verified that all tests pass successfully, confirming the correctness of the changes. --- src/ndmath/statistics.c | 9 --------- 1 file changed, 9 deletions(-) diff --git a/src/ndmath/statistics.c b/src/ndmath/statistics.c index 7109da8..3811e2c 100644 --- a/src/ndmath/statistics.c +++ b/src/ndmath/statistics.c @@ -181,13 +181,6 @@ NDArray *NDArray_cov(NDArray *a, bool rowvar) int cols = NDArray_SHAPE(a)[0]; int rows = NDArray_SHAPE(a)[1]; - float *a_data = (float *)emalloc(sizeof(float) * NDArray_NUMELEMENTS(a)); - memcpy(a_data, NDArray_FDATA(a), rows * cols * sizeof(float)); - - int *col_shape = emalloc(sizeof(int) * 2); - col_shape[0] = rows; - col_shape[1] = 1; - NDArray **norm_vectors = emalloc(sizeof(NDArray *) * cols); int *indices_shape = emalloc(sizeof(int) * 2); @@ -216,13 +209,11 @@ NDArray *NDArray_cov(NDArray *a, bool rowvar) efree(indices_axis[0]); efree(indices_axis[1]); efree(indices_axis); - efree(a_data); NDArray *norm_a = NDArray_Reshape(NDArray_ConcatenateFlat(norm_vectors, cols), NDArray_SHAPE(a), NDArray_NDIM(a)); for (int i = 0; i < cols; i++) { efree(norm_vectors[i]); } - efree(col_shape); efree(norm_vectors); NDArray *multiplied = NDArray_Dot(norm_a, NDArray_Transpose(norm_a, NULL)); efree(norm_a); From caa3cbd2df8823bc9a53614517cf16c41833087a Mon Sep 17 00:00:00 2001 From: Jimmy Young Date: Wed, 2 Oct 2024 18:57:47 -0600 Subject: [PATCH 07/10] Refactored norm to centered for readability + Changed name of norm_a to centered_a + Changed name of norm_vectors to centered_vectors - Ensured that the function maintains its original functionality and correctness. - Verified that all tests pass successfully, confirming the correctness of the changes. --- src/ndmath/statistics.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/src/ndmath/statistics.c b/src/ndmath/statistics.c index 3811e2c..d410c36 100644 --- a/src/ndmath/statistics.c +++ b/src/ndmath/statistics.c @@ -181,7 +181,7 @@ NDArray *NDArray_cov(NDArray *a, bool rowvar) int cols = NDArray_SHAPE(a)[0]; int rows = NDArray_SHAPE(a)[1]; - NDArray **norm_vectors = emalloc(sizeof(NDArray *) * cols); + NDArray **centered_vectors = emalloc(sizeof(NDArray *) * cols); int *indices_shape = emalloc(sizeof(int) * 2); indices_shape[0] = 2; @@ -203,20 +203,20 @@ NDArray *NDArray_cov(NDArray *a, bool rowvar) NDArray *subtracted = NDArray_Subtract_Float(col_vector, mean); efree(col_vector); efree(mean); - norm_vectors[i] = subtracted; + centered_vectors[i] = subtracted; } efree(indices_shape); efree(indices_axis[0]); efree(indices_axis[1]); efree(indices_axis); - NDArray *norm_a = NDArray_Reshape(NDArray_ConcatenateFlat(norm_vectors, cols), NDArray_SHAPE(a), NDArray_NDIM(a)); + NDArray *centered_a = NDArray_Reshape(NDArray_ConcatenateFlat(centered_vectors, cols), NDArray_SHAPE(a), NDArray_NDIM(a)); for (int i = 0; i < cols; i++) { - efree(norm_vectors[i]); + efree(centered_vectors[i]); } - efree(norm_vectors); - NDArray *multiplied = NDArray_Dot(norm_a, NDArray_Transpose(norm_a, NULL)); - efree(norm_a); + efree(centered_vectors); + NDArray *multiplied = NDArray_Dot(centered_a, NDArray_Transpose(centered_a, NULL)); + efree(centered_a); NDArray *rtn = NDArray_Divide_Float(multiplied, NDArray_CreateFromFloatScalar((float)rows - 1)); efree(multiplied); return rtn; From af8c992f4632d0304f99ebf761f47969fc7d4913 Mon Sep 17 00:00:00 2001 From: Jimmy Young Date: Wed, 2 Oct 2024 20:29:03 -0600 Subject: [PATCH 08/10] Change efree to NDArray_Free for NDArrays in covariance function + Replaced efree calls with NDArray_FREE for NDArrays in the covariance function to ensure proper memory management. - Ensured that the function maintains its original functionality and correctness. - Verified that all tests pass successfully, confirming the correctness of the changes. --- src/ndmath/statistics.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/src/ndmath/statistics.c b/src/ndmath/statistics.c index d410c36..63921b7 100644 --- a/src/ndmath/statistics.c +++ b/src/ndmath/statistics.c @@ -201,8 +201,8 @@ NDArray *NDArray_cov(NDArray *a, bool rowvar) NDArray *col_vector = NDArray_Slice(a, indices_axis, 2); NDArray *mean = NDArray_CreateFromFloatScalar(NDArray_Sum_Float(col_vector) / NDArray_NUMELEMENTS(col_vector)); NDArray *subtracted = NDArray_Subtract_Float(col_vector, mean); - efree(col_vector); - efree(mean); + NDArray_FREE(col_vector); + NDArray_FREE(mean); centered_vectors[i] = subtracted; } efree(indices_shape); @@ -212,12 +212,12 @@ NDArray *NDArray_cov(NDArray *a, bool rowvar) NDArray *centered_a = NDArray_Reshape(NDArray_ConcatenateFlat(centered_vectors, cols), NDArray_SHAPE(a), NDArray_NDIM(a)); for (int i = 0; i < cols; i++) { - efree(centered_vectors[i]); + NDArray_FREE(centered_vectors[i]); } efree(centered_vectors); NDArray *multiplied = NDArray_Dot(centered_a, NDArray_Transpose(centered_a, NULL)); - efree(centered_a); + NDArray_FREE(centered_a); NDArray *rtn = NDArray_Divide_Float(multiplied, NDArray_CreateFromFloatScalar((float)rows - 1)); - efree(multiplied); + NDArray_FREE(multiplied); return rtn; } \ No newline at end of file From 018a7f9db9ad9f17af12ab5a9d4432f0c4afe6be Mon Sep 17 00:00:00 2001 From: Jimmy Young Date: Wed, 2 Oct 2024 20:40:49 -0600 Subject: [PATCH 09/10] Refactor to combine mean calculation and subtraction into single centered variable + Refactored the covariance function in statistics.c to combine the mean calculation and subtraction into a single centered variable. + Simplified code by removing the intermediate mean and subtracted variables - Ensured that the function maintains its original functionality and correctness. - Verified that all tests pass successfully, confirming the correctness of the changes. --- src/ndmath/statistics.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/src/ndmath/statistics.c b/src/ndmath/statistics.c index 63921b7..272abb9 100644 --- a/src/ndmath/statistics.c +++ b/src/ndmath/statistics.c @@ -199,11 +199,9 @@ NDArray *NDArray_cov(NDArray *a, bool rowvar) NDArray_FDATA(indices_axis[0])[0] = i; NDArray_FDATA(indices_axis[0])[1] = i + 1; NDArray *col_vector = NDArray_Slice(a, indices_axis, 2); - NDArray *mean = NDArray_CreateFromFloatScalar(NDArray_Sum_Float(col_vector) / NDArray_NUMELEMENTS(col_vector)); - NDArray *subtracted = NDArray_Subtract_Float(col_vector, mean); + NDArray *centered = NDArray_Subtract_Float(col_vector, NDArray_CreateFromFloatScalar(NDArray_Sum_Float(col_vector) / NDArray_NUMELEMENTS(col_vector))); NDArray_FREE(col_vector); - NDArray_FREE(mean); - centered_vectors[i] = subtracted; + centered_vectors[i] = centered; } efree(indices_shape); efree(indices_axis[0]); From 8225a7d64aa5e20d0e541f296a21c6fd764f1c4b Mon Sep 17 00:00:00 2001 From: Jimmy Young Date: Wed, 2 Oct 2024 20:46:59 -0600 Subject: [PATCH 10/10] Refactor to move malloc of centered_vectors to reduce heap allocation time + Refactored the covariance function in statistics.c to move the allocation of the centered_vectors pointer array closer to its usage. + Reduced the time centered_vectors exists in the heap, improving memory management - Ensured that the function maintains its original functionality and correctness. - Verified that all tests pass successfully, confirming the correctness of the changes. --- src/ndmath/statistics.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/ndmath/statistics.c b/src/ndmath/statistics.c index 272abb9..a4ab217 100644 --- a/src/ndmath/statistics.c +++ b/src/ndmath/statistics.c @@ -181,8 +181,6 @@ NDArray *NDArray_cov(NDArray *a, bool rowvar) int cols = NDArray_SHAPE(a)[0]; int rows = NDArray_SHAPE(a)[1]; - NDArray **centered_vectors = emalloc(sizeof(NDArray *) * cols); - int *indices_shape = emalloc(sizeof(int) * 2); indices_shape[0] = 2; indices_shape[1] = 1; @@ -194,6 +192,7 @@ NDArray *NDArray_cov(NDArray *a, bool rowvar) NDArray_FDATA(indices_axis[1])[0] = 0; NDArray_FDATA(indices_axis[1])[1] = rows; + NDArray **centered_vectors = emalloc(sizeof(NDArray *) * cols); for (int i = 0; i < cols; i++) { NDArray_FDATA(indices_axis[0])[0] = i;