scverse · Intron7 · May 27, 2026 · May 23, 2026 · May 23, 2026 · May 26, 2026
diff --git a/docs/release-notes/0.15.2.md b/docs/release-notes/0.15.2.md
@@ -3,3 +3,7 @@
 ```{rubric} Features
 ```
 * Add pseudobulk based distance metrics to {class}`~rapids_singlecell.ptg.Distance`: ``euclidean``, ``root_mean_squared_error``, ``mse``, ``mean_absolute_error``, ``pearson_distance``, ``cosine_distance``, ``r2_distance``. Matches ``pertpy.tl.Distance`` {pr}`676` {smaller}`S Dicks`
+
+```{rubric} Bug fixes
+```
+* Fixes per-batch clip threshold in `pp.highly_variable_genes(flavor="pearson_residuals", batch_key=...)`: each batch now uses its own `sqrt(n_cells_in_batch)` instead of silently reusing the first batch's value {pr}`674` {smaller}`A Mikaeili`
diff --git a/src/rapids_singlecell/preprocessing/_hvg/_pearson_residuals.py b/src/rapids_singlecell/preprocessing/_hvg/_pearson_residuals.py
@@ -73,13 +73,15 @@ def _highly_variable_pearson_residuals(
         X_batch = X_batch[:, nonzero_genes]
         if clip is None:
             n = X_batch.shape[0]
-            clip = cp.sqrt(n, dtype=dtype)
-        if clip < 0:
+            clip_batch = cp.sqrt(n, dtype=dtype)
+        else:
+            clip_batch = clip
+        if clip_batch < 0:
             raise ValueError("Pearson residuals require `clip>=0` or `clip=None`.")
 
         n_cells = X_batch.shape[0]
         n_genes = X_batch.shape[1]
-        clip_val = float(clip)
+        clip_val = float(clip_batch)
         inv_theta = 1.0 / theta
         residual_gene_var = cp.zeros(n_genes, dtype=dtype, order="C")
         stream = cp.cuda.get_current_stream().ptr

diff --git a/tests/test_hvg.py b/tests/test_hvg.py
@@ -397,6 +397,43 @@ def test_highly_variable_genes_pearson_residuals_batch(n_top_genes, dtype):
     assert len(cudata.var) == n_genes
 
 
+def test_pearson_residuals_batch_order_invariant():
+    """HVG ranking must not depend on alphabetical batch-label order."""
+    rng = np.random.default_rng(0)
+    n_big, n_small, n_genes = 5000, 200, 200
+    counts = (rng.random((n_big + n_small, n_genes)) < 0.05).astype(np.int32)
+    counts *= rng.integers(1, 31, size=counts.shape, dtype=np.int32)
+    X = csr_matrix(counts.astype(np.float32))
+
+    a1 = AnnData(X=cpx.scipy.sparse.csr_matrix(X.copy()))
+    a1.obs["batch"] = np.array(["A"] * n_big + ["B"] * n_small)
+    a1.obs["batch"] = a1.obs["batch"].astype("category")
+    rsc.pp.highly_variable_genes(
+        a1,
+        flavor="pearson_residuals",
+        n_top_genes=100,
+        batch_key="batch",
+        check_values=False,
+    )
+
+    a2 = AnnData(X=cpx.scipy.sparse.csr_matrix(X.copy()))
+    a2.obs["batch"] = np.array(["B"] * n_big + ["A"] * n_small)
+    a2.obs["batch"] = a2.obs["batch"].astype("category")
+    rsc.pp.highly_variable_genes(
+        a2,
+        flavor="pearson_residuals",
+        n_top_genes=100,
+        batch_key="batch",
+        check_values=False,
+    )
+
+    np.testing.assert_allclose(
+        a1.var["residual_variances"].to_numpy(),
+        a2.var["residual_variances"].to_numpy(),
+        atol=1e-5,
+    )
+
+
 @pytest.mark.parametrize("dtype", ["float32", "float64"])
 @pytest.mark.parametrize("sparse", [True, False])
 def test_poisson_gene_selection_compare_to_scvi(dtype, sparse):