Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
39 changes: 23 additions & 16 deletions src/scanpy/tools/_score_genes.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,23 +34,30 @@ def _sparse_nanmean(x: CSBase, /, axis: Literal[0, 1]) -> NDArray[np.float64]:
if not isinstance(x, CSBase):
msg = "X must be a compressed sparse matrix"
raise TypeError(msg)
if axis not in (0, 1):
msg = "axis must be 0 or 1"
raise ValueError(msg)
Comment on lines +37 to +39

# count the number of nan elements per row/column (dep. on axis)
z = x.copy()
z.data = np.isnan(z.data)
z.eliminate_zeros()
n_elements = z.shape[axis] - z.sum(axis)

# set the nans to 0, so that a normal .sum() works
y = x.copy()
y.data[np.isnan(y.data)] = 0
y.eliminate_zeros()

# the average
s = y.sum(axis, dtype="float64") # float64 for score_genes function compatibility)
m = s / n_elements

return m
# Work in the compressed format aligned with the reduction axis and aggregate
# directly from index pointers to avoid matrix copies and eliminate_zeros().
mat = x.tocsc(copy=False) if axis == 0 else x.tocsr(copy=False)
segment_lengths = np.diff(mat.indptr)
out_size = mat.shape[1] if axis == 0 else mat.shape[0]
full_length = mat.shape[0] if axis == 0 else mat.shape[1]

segment_ids = np.repeat(np.arange(out_size), segment_lengths)
isnan = np.isnan(mat.data)

sums = np.bincount(
segment_ids[~isnan],
weights=mat.data[~isnan],
minlength=out_size,
).astype(np.float64, copy=False)
nan_counts = np.bincount(segment_ids[isnan], minlength=out_size)
Comment on lines +48 to +56
counts = full_length - nan_counts

with np.errstate(invalid="ignore", divide="ignore"):
return sums / counts
Comment on lines +59 to +60


@_doc_params(rng=doc_rng)
Expand Down
6 changes: 5 additions & 1 deletion tests/test_score_genes.py
Original file line number Diff line number Diff line change
Expand Up @@ -106,6 +106,7 @@ def test_add_score():


@pytest.mark.parametrize("axis", [0, 1])
@pytest.mark.parametrize("matrix_format", ["csr", "csc"])
@pytest.mark.parametrize(
"mk_arr",
[
Expand All @@ -130,14 +131,17 @@ def test_add_score():
],
)
def test_sparse_nanmean(
mk_arr: Callable[[], CSBase | np.ndarray], axis: Literal[0, 1]
mk_arr: Callable[[], CSBase | np.ndarray],
axis: Literal[0, 1],
matrix_format: Literal["csr", "csc"],
) -> None:
"""Check that _sparse_nanmean() is equivalent to np.nanmean()."""
from scanpy.tools._score_genes import _sparse_nanmean

arr_or_mat = mk_arr()
arr = conv.to_dense(arr_or_mat)
mat = sparse.csr_matrix(arr) if not isinstance(arr, CSBase) else arr # noqa: TID251
mat = mat.asformat(matrix_format)
np.testing.assert_allclose(
np.nanmean(arr, axis), np.array(_sparse_nanmean(mat, axis)).flatten()
)
Expand Down
Loading