diff --git a/chainladder/core/dunders.py b/chainladder/core/dunders.py index e8799703..78226045 100644 --- a/chainladder/core/dunders.py +++ b/chainladder/core/dunders.py @@ -45,7 +45,7 @@ def _validate_arithmetic(self, other: Any) -> tuple: if isinstance(other, np.ndarray) and self.array_backend != 'numpy': obj = self.copy() other = obj.get_array_module().array(other) - elif isinstance(other, sp) and self.array_backend != 'sparse': + elif isinstance(other, sp.COO) and self.array_backend != 'sparse': obj = self.set_backend('sparse') else: obj = self.copy() diff --git a/chainladder/core/slice.py b/chainladder/core/slice.py index 55c29a82..4c5b98e8 100644 --- a/chainladder/core/slice.py +++ b/chainladder/core/slice.py @@ -103,7 +103,7 @@ def _sparse_setitem(self, key, values): (self.obj.values.coords, np.array(key)[:, None]), 1) self.obj.values.data = np.concatenate( (self.obj.values.data, np.array([values])), 0) - self.obj.values = self.obj.get_array_module()( + self.obj.values = self.obj.get_array_module().COO( self.obj.values.coords, self.obj.values.data, prune=True, has_duplicates=False, shape=self.obj.shape, fill_value=self.obj.values.fill_value) @@ -238,8 +238,8 @@ def __setitem__(self, key, value): value.values.coords[1] = i coords = np.concatenate((before.coords, value.values.coords), axis=1) data = np.concatenate((before.data, value.values.data)) - self.values = xp( - coords, data, shape=self.shape, prune=True, fill_value=xp.nan + self.values = xp.COO( + coords, data, shape=self.shape, prune=True, fill_value=xp.COO.nan ) else: if isinstance(value, TriangleSlicer): diff --git a/chainladder/core/triangle.py b/chainladder/core/triangle.py index dd80c21e..a6a33b52 100644 --- a/chainladder/core/triangle.py +++ b/chainladder/core/triangle.py @@ -524,7 +524,7 @@ def __init__( # Construct Sparse multidimensional array. self.values: COO = num_to_nan( - sp( + sp.COO( coords, amts, prune=True, diff --git a/chainladder/utils/sparse.py b/chainladder/utils/sparse.py index 50533d98..2de836b8 100644 --- a/chainladder/utils/sparse.py +++ b/chainladder/utils/sparse.py @@ -2,23 +2,13 @@ # License, v. 2.0. If a copy of the MPL was not distributed with this # file, You can obtain one at https://mozilla.org/MPL/2.0/. import numpy as np -import sparse -from sparse import COO as sp +import sparse as sp +from sparse import COO as COO from sparse import elemwise -import pandas as pd -import copy sp.isnan = np.isnan -sp.newaxis = np.newaxis -sp.nan = np.array([1.0, np.nan])[-1] -sp.testing = np.testing -sp.nansum = sparse.nansum -sp.nanmin = sparse.nanmin -sp.nanmax = sparse.nanmax -sp.concatenate = sparse.concatenate -sp.diagonal = sparse.diagonal -sp.zeros = sparse.zeros -sp.testing.assert_array_equal = np.testing.assert_equal +COO.nan = np.array([1.0, np.nan])[-1] +setattr(sp, 'testing', np.testing) sp.sqrt = np.sqrt sp.log = np.log sp.exp = np.exp @@ -31,41 +21,41 @@ def nan_to_num(a): if hasattr(a, "fill_value"): a = a.copy() a.data[np.isnan(a.data)] = 0.0 - return sp(coords=a.coords, data=a.data, fill_value=0.0, shape=a.shape) + return COO(coords=a.coords, data=a.data, fill_value=0.0, shape=a.shape) def ones(*args, **kwargs): - return sp(np.ones(*args, **kwargs), fill_value=sp.nan) + return COO(np.ones(*args, **kwargs), fill_value=COO.nan) def nansum(a, axis=None, keepdims=None, *args, **kwargs): - return sp(data=a.data, coords=a.coords, fill_value=0.0, shape=a.shape).sum( + return COO(data=a.data, coords=a.coords, fill_value=0.0, shape=a.shape).sum( axis=axis, keepdims=keepdims, *args, **kwargs ) -sp.nansum = nansum -def nanmean(a, axis=None, keepdims=None, *args, **kwargs): - n = sp.nansum(a, axis=axis, keepdims=keepdims) - d = sp.nansum(sp.nan_to_num(a) != 0, axis=axis, keepdims=keepdims).astype(n.dtype) - n = sp(data=n.data, coords=n.coords, fill_value=np.nan, shape=n.shape) - d = sp(data=d.data, coords=d.coords, fill_value=np.nan, shape=d.shape) + +def nanmean(a, axis=None, keepdims=None): + n = nansum(a, axis=axis, keepdims=keepdims) + d = nansum(nan_to_num(a) != 0, axis=axis, keepdims=keepdims).astype(n.dtype) + n = COO(data=n.data, coords=n.coords, fill_value=np.nan, shape=n.shape) + d = COO(data=d.data, coords=d.coords, fill_value=np.nan, shape=d.shape) out = n / d - return sp(data=out.data, coords=out.coords, fill_value=0, shape=out.shape) + return COO(data=out.data, coords=out.coords, fill_value=0, shape=out.shape) def array(a, *args, **kwargs): if kwargs.get("fill_value", None) is not None: fill_value = kwargs.pop("fill_value") else: - fill_value = sp.nan - if type(a) == sp: - return sp(a, *args, **kwargs, fill_value=fill_value) + fill_value = COO.nan + if type(a) == sp.COO: + return COO(a, *args, **kwargs, fill_value=fill_value) else: - return sp(np.array(a, *args, **kwargs), fill_value=fill_value) + return COO(np.array(a, *args, **kwargs), fill_value=fill_value) def arange(*args, **kwargs): - return sparse.COO.from_numpy(np.arange(*args, **kwargs)) + return COO.from_numpy(np.arange(*args, **kwargs)) def where(*args, **kwargs): @@ -76,12 +66,12 @@ def cumprod(a, axis=None, dtype=None, out=None): return array(np.cumprod(a.todense(), axis=axis, dtype=dtype, out=out)) -def floor(x, *args, **kwargs): +def floor(x): x.data = np.floor(x.data) return x - +sp.nansum = nansum sp.minimum = np.minimum sp.maximum = np.maximum sp.floor = floor @@ -91,4 +81,6 @@ def floor(x, *args, **kwargs): sp.nan_to_num = nan_to_num sp.ones = ones sp.cumprod = cumprod +COO.cumprod = cumprod sp.nanmean = nanmean +sp.sum = COO.sum diff --git a/chainladder/utils/tests/test_sparse.py b/chainladder/utils/tests/test_sparse.py new file mode 100644 index 00000000..c8a2b211 --- /dev/null +++ b/chainladder/utils/tests/test_sparse.py @@ -0,0 +1,110 @@ +import numpy as np + +from chainladder.utils.sparse import ( + array, + floor, + COO, + where +) + + +def test_array_from_list_default_fill_value() -> None: + """ + Tests chainladder.utils.sparse.array() when no fill value is provided. + Checks whether the default nan is filled. + + Returns + ------- + None + + """ + result: COO = array([1.0, 2.0, 3.0]) + assert isinstance(result, COO) + assert np.isnan(result.fill_value) + + +def test_array_from_list_explicit_fill_value() -> None: + """ + Tests chainladder.utils.sparse.array() when a fill value of 0 is provided. + Checks whether the 0 is filled. + + Returns + ------- + + """ + result: COO = array([1, 2, 3], fill_value=0) + assert isinstance(result, COO) + assert result.fill_value == 0 + + +def test_array_from_coo_default_fill_value() -> None: + """ + Tests chainladder.utils.sparse.array() when initializing from a sparse array with a default fill value. + + Returns + ------- + None + + """ + coo = COO.from_numpy(np.array([1.0, 2.0, 3.0])) + result: COO = array(coo) + assert isinstance(result, COO) + assert np.isnan(result.fill_value) + + +def test_array_from_coo_explicit_fill_value() -> None: + """ + Tests chainladder.utils.sparse.array() when initializing from a sparse array with an explicit fill value. + + Returns + ------- + None + + """ + coo = COO.from_numpy(np.array([1, 2, 3])) + result: COO = array(coo, fill_value=0) + assert isinstance(result, COO) + assert result.fill_value == 0 + + +def test_where_selects_from_two_arrays() -> None: + """ + Tests element-wise where across sparse arrays. Calls np.where on each element triplet + (cond[i], a[i], b[i]) - returning a[i] where the condition is True and b[i] where it's False. + + Returns + ------- + None + """ + a: COO = array([1.0, 2.0, 3.0]) + b: COO = array([10.0, 20.0, 30.0]) + cond: COO = array([True, False, True]) + result: COO = where(cond, a, b) + assert isinstance(result, COO) + np.testing.assert_array_equal(result.todense(), [1.0, 20.0, 3.0]) + + +def test_floor_rounds_down() -> None: + """ + Checks floor function rounding down with positive and negative floats. + + Returns + ------- + None + """ + a: COO = array([1.2, 2.7, -0.3]) + result: COO = floor(a) + np.testing.assert_array_equal(result.todense(), [1.0, 2.0, -1.0]) + + +def test_floor_mutates_in_place() -> None: + """ + Checks in-place mutation of floor function. + + Returns + ------- + None + """ + a = array([1.2, 2.7, -0.3]) + result: COO = floor(a) + assert result is a diff --git a/chainladder/utils/utility_functions.py b/chainladder/utils/utility_functions.py index 37c381f1..e044f9ca 100644 --- a/chainladder/utils/utility_functions.py +++ b/chainladder/utils/utility_functions.py @@ -730,14 +730,14 @@ def num_to_value( arr.coords = arr.coords[:, arr.data != 0] arr.data = arr.data[arr.data != 0] - arr: COO = sp( + arr: COO = sp.COO( coords=arr.coords, data=arr.data, - fill_value=sp.nan, # noqa + fill_value=sp.COO.nan, # noqa shape=arr.shape ) else: - arr: COO = sp( + arr: COO = sp.COO( num_to_nan(np.nan_to_num(arr.todense())), fill_value=value ) diff --git a/chainladder/utils/weighted_regression.py b/chainladder/utils/weighted_regression.py index aec67d22..12a5ef7e 100644 --- a/chainladder/utils/weighted_regression.py +++ b/chainladder/utils/weighted_regression.py @@ -53,7 +53,7 @@ def _fit_OLS(self): y[w == 0] = xp.nan else: w2 = w.copy() - w2 = sp(data=w2.data, coords=w2.coords, fill_value=sp.nan, shape=w2.shape) + w2 = sp.COO(data=w2.data, coords=w2.coords, fill_value=sp.nan, shape=w2.shape) x, y = x * w2, y * w2 with warnings.catch_warnings():