From f65af55a3f3b708ad17a16ea5b80fc8b709555a3 Mon Sep 17 00:00:00 2001 From: Gene Dan Date: Wed, 6 May 2026 17:10:39 -0500 Subject: [PATCH 1/5] REFACTOR: Reorganize type hierarchy of sparse.py. Move array-level functions to sparse.COO, and keep module-level functions to sp. Replaces xp() with xp.COO() to create sparse arrays when xp used to be callable and was used as a constructor. --- chainladder/core/dunders.py | 2 +- chainladder/core/slice.py | 4 +- chainladder/core/triangle.py | 2 +- chainladder/utils/sparse.py | 51 +++++++++++--------------- chainladder/utils/utility_functions.py | 4 +- 5 files changed, 27 insertions(+), 36 deletions(-) diff --git a/chainladder/core/dunders.py b/chainladder/core/dunders.py index e8799703..78226045 100644 --- a/chainladder/core/dunders.py +++ b/chainladder/core/dunders.py @@ -45,7 +45,7 @@ def _validate_arithmetic(self, other: Any) -> tuple: if isinstance(other, np.ndarray) and self.array_backend != 'numpy': obj = self.copy() other = obj.get_array_module().array(other) - elif isinstance(other, sp) and self.array_backend != 'sparse': + elif isinstance(other, sp.COO) and self.array_backend != 'sparse': obj = self.set_backend('sparse') else: obj = self.copy() diff --git a/chainladder/core/slice.py b/chainladder/core/slice.py index 55c29a82..d7394a16 100644 --- a/chainladder/core/slice.py +++ b/chainladder/core/slice.py @@ -103,7 +103,7 @@ def _sparse_setitem(self, key, values): (self.obj.values.coords, np.array(key)[:, None]), 1) self.obj.values.data = np.concatenate( (self.obj.values.data, np.array([values])), 0) - self.obj.values = self.obj.get_array_module()( + self.obj.values = self.obj.get_array_module().COO( self.obj.values.coords, self.obj.values.data, prune=True, has_duplicates=False, shape=self.obj.shape, fill_value=self.obj.values.fill_value) @@ -238,7 +238,7 @@ def __setitem__(self, key, value): value.values.coords[1] = i coords = np.concatenate((before.coords, value.values.coords), axis=1) data = np.concatenate((before.data, value.values.data)) - self.values = xp( + self.values = xp.COO( coords, data, shape=self.shape, prune=True, fill_value=xp.nan ) else: diff --git a/chainladder/core/triangle.py b/chainladder/core/triangle.py index dd80c21e..a6a33b52 100644 --- a/chainladder/core/triangle.py +++ b/chainladder/core/triangle.py @@ -524,7 +524,7 @@ def __init__( # Construct Sparse multidimensional array. self.values: COO = num_to_nan( - sp( + sp.COO( coords, amts, prune=True, diff --git a/chainladder/utils/sparse.py b/chainladder/utils/sparse.py index 50533d98..b47b555f 100644 --- a/chainladder/utils/sparse.py +++ b/chainladder/utils/sparse.py @@ -2,23 +2,13 @@ # License, v. 2.0. If a copy of the MPL was not distributed with this # file, You can obtain one at https://mozilla.org/MPL/2.0/. import numpy as np -import sparse -from sparse import COO as sp +import sparse as sp +from sparse import COO as COO from sparse import elemwise -import pandas as pd -import copy sp.isnan = np.isnan -sp.newaxis = np.newaxis -sp.nan = np.array([1.0, np.nan])[-1] -sp.testing = np.testing -sp.nansum = sparse.nansum -sp.nanmin = sparse.nanmin -sp.nanmax = sparse.nanmax -sp.concatenate = sparse.concatenate -sp.diagonal = sparse.diagonal -sp.zeros = sparse.zeros -sp.testing.assert_array_equal = np.testing.assert_equal +COO.nan = np.array([1.0, np.nan])[-1] +setattr(sp, 'testing', np.testing) sp.sqrt = np.sqrt sp.log = np.log sp.exp = np.exp @@ -31,27 +21,27 @@ def nan_to_num(a): if hasattr(a, "fill_value"): a = a.copy() a.data[np.isnan(a.data)] = 0.0 - return sp(coords=a.coords, data=a.data, fill_value=0.0, shape=a.shape) + return COO(coords=a.coords, data=a.data, fill_value=0.0, shape=a.shape) def ones(*args, **kwargs): - return sp(np.ones(*args, **kwargs), fill_value=sp.nan) + return COO(np.ones(*args, **kwargs), fill_value=sp.nan) def nansum(a, axis=None, keepdims=None, *args, **kwargs): - return sp(data=a.data, coords=a.coords, fill_value=0.0, shape=a.shape).sum( + return COO(data=a.data, coords=a.coords, fill_value=0.0, shape=a.shape).sum( axis=axis, keepdims=keepdims, *args, **kwargs ) -sp.nansum = nansum -def nanmean(a, axis=None, keepdims=None, *args, **kwargs): - n = sp.nansum(a, axis=axis, keepdims=keepdims) - d = sp.nansum(sp.nan_to_num(a) != 0, axis=axis, keepdims=keepdims).astype(n.dtype) - n = sp(data=n.data, coords=n.coords, fill_value=np.nan, shape=n.shape) - d = sp(data=d.data, coords=d.coords, fill_value=np.nan, shape=d.shape) + +def nanmean(a, axis=None, keepdims=None): + n = nansum(a, axis=axis, keepdims=keepdims) + d = nansum(nan_to_num(a) != 0, axis=axis, keepdims=keepdims).astype(n.dtype) + n = COO(data=n.data, coords=n.coords, fill_value=np.nan, shape=n.shape) + d = COO(data=d.data, coords=d.coords, fill_value=np.nan, shape=d.shape) out = n / d - return sp(data=out.data, coords=out.coords, fill_value=0, shape=out.shape) + return COO(data=out.data, coords=out.coords, fill_value=0, shape=out.shape) def array(a, *args, **kwargs): if kwargs.get("fill_value", None) is not None: @@ -59,13 +49,13 @@ def array(a, *args, **kwargs): else: fill_value = sp.nan if type(a) == sp: - return sp(a, *args, **kwargs, fill_value=fill_value) + return COO(a, *args, **kwargs, fill_value=fill_value) else: - return sp(np.array(a, *args, **kwargs), fill_value=fill_value) + return COO(np.array(a, *args, **kwargs), fill_value=fill_value) def arange(*args, **kwargs): - return sparse.COO.from_numpy(np.arange(*args, **kwargs)) + return COO.from_numpy(np.arange(*args, **kwargs)) def where(*args, **kwargs): @@ -76,12 +66,12 @@ def cumprod(a, axis=None, dtype=None, out=None): return array(np.cumprod(a.todense(), axis=axis, dtype=dtype, out=out)) -def floor(x, *args, **kwargs): +def floor(x): x.data = np.floor(x.data) return x - +sp.nansum = nansum sp.minimum = np.minimum sp.maximum = np.maximum sp.floor = floor @@ -90,5 +80,6 @@ def floor(x, *args, **kwargs): sp.array = array sp.nan_to_num = nan_to_num sp.ones = ones -sp.cumprod = cumprod +COO.cumprod = cumprod sp.nanmean = nanmean +sp.sum = COO.sum diff --git a/chainladder/utils/utility_functions.py b/chainladder/utils/utility_functions.py index 37c381f1..be8b0970 100644 --- a/chainladder/utils/utility_functions.py +++ b/chainladder/utils/utility_functions.py @@ -730,14 +730,14 @@ def num_to_value( arr.coords = arr.coords[:, arr.data != 0] arr.data = arr.data[arr.data != 0] - arr: COO = sp( + arr: COO = sp.COO( coords=arr.coords, data=arr.data, fill_value=sp.nan, # noqa shape=arr.shape ) else: - arr: COO = sp( + arr: COO = sp.COO( num_to_nan(np.nan_to_num(arr.todense())), fill_value=value ) From a33f181e1be37eb60f782f017fc5328803c74e7f Mon Sep 17 00:00:00 2001 From: Gene Dan Date: Wed, 6 May 2026 18:21:14 -0500 Subject: [PATCH 2/5] FIX: Apply bugbot fixes. --- chainladder/core/slice.py | 2 +- chainladder/core/triangle.py | 2 +- chainladder/utils/sparse.py | 6 +++--- chainladder/utils/weighted_regression.py | 2 +- 4 files changed, 6 insertions(+), 6 deletions(-) diff --git a/chainladder/core/slice.py b/chainladder/core/slice.py index d7394a16..4c5b98e8 100644 --- a/chainladder/core/slice.py +++ b/chainladder/core/slice.py @@ -239,7 +239,7 @@ def __setitem__(self, key, value): coords = np.concatenate((before.coords, value.values.coords), axis=1) data = np.concatenate((before.data, value.values.data)) self.values = xp.COO( - coords, data, shape=self.shape, prune=True, fill_value=xp.nan + coords, data, shape=self.shape, prune=True, fill_value=xp.COO.nan ) else: if isinstance(value, TriangleSlicer): diff --git a/chainladder/core/triangle.py b/chainladder/core/triangle.py index a6a33b52..7e2d54aa 100644 --- a/chainladder/core/triangle.py +++ b/chainladder/core/triangle.py @@ -1185,7 +1185,7 @@ def incr_to_cum(self, inplace=False): else: values = xp.nan_to_num(self.values[..., ::-1]) values = num_to_value(values, 1) - values = xp.cumprod(values, -1)[..., ::-1] + values = xp.COO.cumprod(values, -1)[..., ::-1] self.values = values * self.nan_triangle values = num_to_value(values, self.get_array_module(values).nan) else: diff --git a/chainladder/utils/sparse.py b/chainladder/utils/sparse.py index b47b555f..c90c25c8 100644 --- a/chainladder/utils/sparse.py +++ b/chainladder/utils/sparse.py @@ -25,7 +25,7 @@ def nan_to_num(a): def ones(*args, **kwargs): - return COO(np.ones(*args, **kwargs), fill_value=sp.nan) + return COO(np.ones(*args, **kwargs), fill_value=COO.nan) def nansum(a, axis=None, keepdims=None, *args, **kwargs): @@ -47,8 +47,8 @@ def array(a, *args, **kwargs): if kwargs.get("fill_value", None) is not None: fill_value = kwargs.pop("fill_value") else: - fill_value = sp.nan - if type(a) == sp: + fill_value = COO.nan + if type(a) == sp.COO: return COO(a, *args, **kwargs, fill_value=fill_value) else: return COO(np.array(a, *args, **kwargs), fill_value=fill_value) diff --git a/chainladder/utils/weighted_regression.py b/chainladder/utils/weighted_regression.py index aec67d22..12a5ef7e 100644 --- a/chainladder/utils/weighted_regression.py +++ b/chainladder/utils/weighted_regression.py @@ -53,7 +53,7 @@ def _fit_OLS(self): y[w == 0] = xp.nan else: w2 = w.copy() - w2 = sp(data=w2.data, coords=w2.coords, fill_value=sp.nan, shape=w2.shape) + w2 = sp.COO(data=w2.data, coords=w2.coords, fill_value=sp.nan, shape=w2.shape) x, y = x * w2, y * w2 with warnings.catch_warnings(): From 41bf857045c6b58e9269d234f53cc0297b66d449 Mon Sep 17 00:00:00 2001 From: Gene Dan Date: Wed, 6 May 2026 18:31:03 -0500 Subject: [PATCH 3/5] FIX: Apply bugbot fixes. --- chainladder/core/triangle.py | 2 +- chainladder/utils/sparse.py | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/chainladder/core/triangle.py b/chainladder/core/triangle.py index 7e2d54aa..a6a33b52 100644 --- a/chainladder/core/triangle.py +++ b/chainladder/core/triangle.py @@ -1185,7 +1185,7 @@ def incr_to_cum(self, inplace=False): else: values = xp.nan_to_num(self.values[..., ::-1]) values = num_to_value(values, 1) - values = xp.COO.cumprod(values, -1)[..., ::-1] + values = xp.cumprod(values, -1)[..., ::-1] self.values = values * self.nan_triangle values = num_to_value(values, self.get_array_module(values).nan) else: diff --git a/chainladder/utils/sparse.py b/chainladder/utils/sparse.py index c90c25c8..2de836b8 100644 --- a/chainladder/utils/sparse.py +++ b/chainladder/utils/sparse.py @@ -80,6 +80,7 @@ def floor(x): sp.array = array sp.nan_to_num = nan_to_num sp.ones = ones +sp.cumprod = cumprod COO.cumprod = cumprod sp.nanmean = nanmean sp.sum = COO.sum From e48a28e78d5ab851c7dfcaf699749ad62d881f34 Mon Sep 17 00:00:00 2001 From: Gene Dan Date: Wed, 6 May 2026 18:32:16 -0500 Subject: [PATCH 4/5] FIX: Apply bugbot fixes. --- chainladder/utils/utility_functions.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/chainladder/utils/utility_functions.py b/chainladder/utils/utility_functions.py index be8b0970..e044f9ca 100644 --- a/chainladder/utils/utility_functions.py +++ b/chainladder/utils/utility_functions.py @@ -733,7 +733,7 @@ def num_to_value( arr: COO = sp.COO( coords=arr.coords, data=arr.data, - fill_value=sp.nan, # noqa + fill_value=sp.COO.nan, # noqa shape=arr.shape ) else: From 07157937e7c600abb2881f7cb771c72f6835663a Mon Sep 17 00:00:00 2001 From: Gene Dan Date: Wed, 6 May 2026 20:59:39 -0500 Subject: [PATCH 5/5] TEST: Add unit tests to chainladder.utils.sparse to cover missing lines. --- chainladder/utils/tests/test_sparse.py | 110 +++++++++++++++++++++++++ 1 file changed, 110 insertions(+) create mode 100644 chainladder/utils/tests/test_sparse.py diff --git a/chainladder/utils/tests/test_sparse.py b/chainladder/utils/tests/test_sparse.py new file mode 100644 index 00000000..c8a2b211 --- /dev/null +++ b/chainladder/utils/tests/test_sparse.py @@ -0,0 +1,110 @@ +import numpy as np + +from chainladder.utils.sparse import ( + array, + floor, + COO, + where +) + + +def test_array_from_list_default_fill_value() -> None: + """ + Tests chainladder.utils.sparse.array() when no fill value is provided. + Checks whether the default nan is filled. + + Returns + ------- + None + + """ + result: COO = array([1.0, 2.0, 3.0]) + assert isinstance(result, COO) + assert np.isnan(result.fill_value) + + +def test_array_from_list_explicit_fill_value() -> None: + """ + Tests chainladder.utils.sparse.array() when a fill value of 0 is provided. + Checks whether the 0 is filled. + + Returns + ------- + + """ + result: COO = array([1, 2, 3], fill_value=0) + assert isinstance(result, COO) + assert result.fill_value == 0 + + +def test_array_from_coo_default_fill_value() -> None: + """ + Tests chainladder.utils.sparse.array() when initializing from a sparse array with a default fill value. + + Returns + ------- + None + + """ + coo = COO.from_numpy(np.array([1.0, 2.0, 3.0])) + result: COO = array(coo) + assert isinstance(result, COO) + assert np.isnan(result.fill_value) + + +def test_array_from_coo_explicit_fill_value() -> None: + """ + Tests chainladder.utils.sparse.array() when initializing from a sparse array with an explicit fill value. + + Returns + ------- + None + + """ + coo = COO.from_numpy(np.array([1, 2, 3])) + result: COO = array(coo, fill_value=0) + assert isinstance(result, COO) + assert result.fill_value == 0 + + +def test_where_selects_from_two_arrays() -> None: + """ + Tests element-wise where across sparse arrays. Calls np.where on each element triplet + (cond[i], a[i], b[i]) - returning a[i] where the condition is True and b[i] where it's False. + + Returns + ------- + None + """ + a: COO = array([1.0, 2.0, 3.0]) + b: COO = array([10.0, 20.0, 30.0]) + cond: COO = array([True, False, True]) + result: COO = where(cond, a, b) + assert isinstance(result, COO) + np.testing.assert_array_equal(result.todense(), [1.0, 20.0, 3.0]) + + +def test_floor_rounds_down() -> None: + """ + Checks floor function rounding down with positive and negative floats. + + Returns + ------- + None + """ + a: COO = array([1.2, 2.7, -0.3]) + result: COO = floor(a) + np.testing.assert_array_equal(result.todense(), [1.0, 2.0, -1.0]) + + +def test_floor_mutates_in_place() -> None: + """ + Checks in-place mutation of floor function. + + Returns + ------- + None + """ + a = array([1.2, 2.7, -0.3]) + result: COO = floor(a) + assert result is a