scverse · Lawson-Darrow · Jun 1, 2026 · Jun 1, 2026
diff --git a/docs/release-notes/4141.perf.md b/docs/release-notes/4141.perf.md
@@ -0,0 +1 @@
+Speed up {func}`~scanpy.tl.score_genes` by computing the sparse `nanmean` in a single pass instead of copying the matrix twice {smaller}`L Darrow`
diff --git a/src/scanpy/tools/_score_genes.py b/src/scanpy/tools/_score_genes.py
@@ -4,8 +4,10 @@
 
 from typing import TYPE_CHECKING
 
+import numba
 import numpy as np
 import pandas as pd
+from fast_array_utils.numba import njit
 
 from .. import logging as logg
 from .._compat import CSBase
@@ -29,28 +31,65 @@
 type _GetSubset = Callable[[_StrIdx], np.ndarray | CSBase]
 
 
+@njit
+def _sparse_nanmean_within_slot(
+    data: NDArray, indptr: NDArray, n_out: int, divisor: int
+) -> NDArray[np.float64]:
+    """Reduce within each compressed slot (e.g. per row for CSR), ignoring NaNs."""
+    out = np.empty(n_out, dtype=np.float64)
+    for i in numba.prange(n_out):
+        total = np.float64(0)
+        n_nan = 0
+        for k in range(indptr[i], indptr[i + 1]):
+            v = data[k]
+            if np.isnan(v):
+                n_nan += 1
+            else:
+                total += v
+        out[i] = total / (divisor - n_nan)
+    return out
+
+
+@njit
+def _sparse_nanmean_across_slots(
+    data: NDArray, indices: NDArray, n_out: int, divisor: int
+) -> NDArray[np.float64]:
+    """Reduce across compressed slots (e.g. per column for CSR), ignoring NaNs."""
+    total = np.zeros(n_out, dtype=np.float64)
+    n_nan = np.zeros(n_out, dtype=np.int64)
+    for k in range(data.shape[0]):
+        v = data[k]
+        j = indices[k]
+        if np.isnan(v):
+            n_nan[j] += 1
+        else:
+            total[j] += v
+    out = np.empty(n_out, dtype=np.float64)
+    for j in numba.prange(n_out):
+        out[j] = total[j] / (divisor - n_nan[j])
+    return out
+
+
 def _sparse_nanmean(x: CSBase, /, axis: Literal[0, 1]) -> NDArray[np.float64]:
-    """np.nanmean equivalent for sparse matrices."""
+    """np.nanmean equivalent for sparse matrices.
+
+    Computed in a single pass over the stored values, avoiding the two matrix
+    copies and the sparse set-index/``eliminate_zeros`` the previous
+    implementation needed. Implicit (structural) zeros count as observed
+    values; only stored ``NaN`` entries are excluded, matching ``np.nanmean``
+    on the densified array.
+    """
     if not isinstance(x, CSBase):
         msg = "X must be a compressed sparse matrix"
         raise TypeError(msg)
 
-    # count the number of nan elements per row/column (dep. on axis)
-    z = x.copy()
-    z.data = np.isnan(z.data)
-    z.eliminate_zeros()
-    n_elements = z.shape[axis] - z.sum(axis)
-
-    # set the nans to 0, so that a normal .sum() works
-    y = x.copy()
-    y.data[np.isnan(y.data)] = 0
-    y.eliminate_zeros()
-
-    # the average
-    s = y.sum(axis, dtype="float64")  # float64 for score_genes function compatibility)
-    m = s / n_elements
-
-    return m
+    n_out = x.shape[1 - axis]  # one result per row (axis=1) or per column (axis=0)
+    divisor = x.shape[axis]  # observable positions along the reduced axis
+    # the compressed (major) axis stores rows for CSR and columns for CSC
+    reduce_within_slot = (x.format == "csr") == (axis == 1)
+    if reduce_within_slot:
+        return _sparse_nanmean_within_slot(x.data, x.indptr, n_out, divisor)
+    return _sparse_nanmean_across_slots(x.data, x.indices, n_out, divisor)
 
 
 @_doc_params(rng=doc_rng)

diff --git a/tests/test_score_genes.py b/tests/test_score_genes.py
@@ -153,6 +153,20 @@ def test_sparse_nanmean_on_dense_matrix():
         _sparse_nanmean(data, 0)
 
 
+@pytest.mark.parametrize("axis", [0, 1])
+def test_sparse_nanmean_csc(axis: Literal[0, 1]) -> None:
+    """_sparse_nanmean is equivalent to np.nanmean for CSC input too."""
+    from scanpy.tools._score_genes import _sparse_nanmean
+
+    arr = conv.to_dense(
+        _create_sparse_nan_matrix(60, 50, percent_zero=0.3, percent_nan=0.3)
+    )
+    mat = sparse.csc_matrix(arr)  # noqa: TID251
+    np.testing.assert_allclose(
+        np.nanmean(arr, axis), np.array(_sparse_nanmean(mat, axis)).flatten()
+    )
+
+
 def test_score_genes_sparse_vs_dense():
     """score_genes() should give the same result for dense and sparse matrices."""
     adata_sparse = _create_adata(100, 1000, p_zero=0.3, p_nan=0.3)
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1 @@
		Speed up {func}`~scanpy.tl.score_genes` by computing the sparse `nanmean` in a single pass instead of copying the matrix twice {smaller}`L Darrow`