From d06bb20d6faa5c5ea734fb48b188538d931acb83 Mon Sep 17 00:00:00 2001
From: Nico Trummer <nictru32@gmail.com>
Date: Tue, 19 May 2026 11:38:42 +0200
Subject: [PATCH 01/19] Add Symphony integration via symphonypy.

Introduces SYMPHONY_INTEGRATE module and wires it into INTEGRATE as
integration_methods symphony, with nf-test coverage and docs.
Requires symphonypy 0.2.3 for harmonypy 0.2 compatibility (symphonypy#9).
---
 README.md                                     |  1 +
 assets/multiqc_config.yml                     |  6 +-
 conf/modules.config                           |  9 +++
 docs/output.md                                |  1 +
 docs/reproducibility.md                       | 35 ++++-----
 .../local/symphony/integrate/environment.yml  |  9 +++
 modules/local/symphony/integrate/main.nf      | 34 +++++++++
 .../symphony/integrate/templates/integrate.py | 61 +++++++++++++++
 .../symphony/integrate/tests/main.nf.test     | 70 +++++++++++++++++
 .../integrate/tests/main.nf.test.snap         | 41 ++++++++++
 nextflow_schema.json                          |  6 +-
 subworkflows/local/integrate/main.nf          | 12 +++
 .../local/integrate/tests/main.nf.test        | 75 +++++++++++++++++++
 .../local/integrate/tests/main.nf.test.snap   | 55 ++++++++++++++
 14 files changed, 393 insertions(+), 22 deletions(-)
 create mode 100644 modules/local/symphony/integrate/environment.yml
 create mode 100644 modules/local/symphony/integrate/main.nf
 create mode 100644 modules/local/symphony/integrate/templates/integrate.py
 create mode 100644 modules/local/symphony/integrate/tests/main.nf.test
 create mode 100644 modules/local/symphony/integrate/tests/main.nf.test.snap

diff --git a/README.md b/README.md
index e531a984..c620552c 100644
--- a/README.md
+++ b/README.md
@@ -59,6 +59,7 @@ Steps marked with the boat icon are not yet implemented. For the other steps, th
       - [scVI](https://docs.scvi-tools.org/en/stable/user_guide/models/scvi.html)
       - [scANVI](https://docs.scvi-tools.org/en/stable/user_guide/models/scanvi.html)
       - [Harmony](https://portals.broadinstitute.org/harmony/articles/quickstart.html)
+      - [Symphony](https://symphonypy.readthedocs.io/) (via [symphonypy](https://pypi.org/project/symphonypy/))
       - [BBKNN](https://github.com/Teichlab/bbknn)
       - [Combat](https://scanpy.readthedocs.io/en/latest/api/generated/scanpy.pp.combat.html)
       - [Seurat](https://satijalab.org/seurat/articles/integration_introduction)
diff --git a/assets/multiqc_config.yml b/assets/multiqc_config.yml
index bf1be27f..1af23e69 100644
--- a/assets/multiqc_config.yml
+++ b/assets/multiqc_config.yml
@@ -20,10 +20,12 @@ report_section_order:
     order: -1006
   "harmony":
     order: -1007
-  "bbknn":
+  "symphony":
     order: -1008
-  "combat":
+  "bbknn":
     order: -1009
+  "combat":
+    order: -1010
   # If new tools are add. They need to be added here
   "nf-core-scdownstream-methods-description":
     order: -2001
diff --git a/conf/modules.config b/conf/modules.config
index 84748e8e..cf2d1c0a 100644
--- a/conf/modules.config
+++ b/conf/modules.config
@@ -388,6 +388,15 @@ process {
         ]
     }
 
+    withName: SYMPHONY_INTEGRATE {
+        publishDir = [
+            path: { "${params.outdir}/combine/integrate/symphony" },
+            mode: params.publish_dir_mode,
+            enabled: params.save_intermediates,
+            saveAs: { filename -> filename.equals('versions.yml') ? null : filename },
+        ]
+    }
+
     withName: SCANPY_BBKNN {
         publishDir = [
             path: { "${params.outdir}/combine/integrate/bbknn" },
diff --git a/docs/output.md b/docs/output.md
index b35ab9d4..cc52a656 100644
--- a/docs/output.md
+++ b/docs/output.md
@@ -35,6 +35,7 @@ The pipeline is built using [Nextflow](https://www.nextflow.io/) and processes d
       - [scVI](https://docs.scvi-tools.org/en/stable/user_guide/models/scvi.html)
       - [scANVI](https://docs.scvi-tools.org/en/stable/user_guide/models/scanvi.html)
       - [Harmony](https://portals.broadinstitute.org/harmony/articles/quickstart.html)
+      - [Symphony](https://symphonypy.readthedocs.io/) (via [symphonypy](https://pypi.org/project/symphonypy/))
       - [BBKNN](https://github.com/Teichlab/bbknn)
       - [Combat](https://scanpy.readthedocs.io/en/latest/api/generated/scanpy.pp.combat.html)
       - [Seurat](https://satijalab.org/seurat/articles/integration_introduction)
diff --git a/docs/reproducibility.md b/docs/reproducibility.md
index 2e2e4312..cba5a394 100644
--- a/docs/reproducibility.md
+++ b/docs/reproducibility.md
@@ -120,23 +120,24 @@ The **Test strategy (this branch)** column describes what the tests on this bran
 
 ### `scanpy/`
 
-| Module                   | Description                                                                                                    | Reproducibility                                                                                                                                                              | Test strategy (this branch)                                                                    |
-| ------------------------ | -------------------------------------------------------------------------------------------------------------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | ---------------------------------------------------------------------------------------------- |
-| `scanpy/bbknn`           | Constructs a batch-balanced k-nearest-neighbour graph (BBKNN) on a PCA embedding.                              | Fully deterministic — kNN construction is deterministic given the input embedding.                                                                                           | structural — versions + schema only                                                            |
-| `scanpy/cellcycle`       | Scores each cell for S-phase and G2M-phase activity and assigns a predicted cell cycle phase.                  | Fully deterministic                                                                                                                                                          | hash                                                                                           |
-| `scanpy/combat`          | Applies ComBat batch correction and then runs PCA, storing the result as `X_emb`.                              | Seeded / quasi-deterministic — ComBat is deterministic; downstream PCA floats may vary across LAPACK backends.                                                               | structural — versions + schema only                                                            |
-| `scanpy/filter`          | Filters cells and genes by count, gene, and mitochondrial percentage thresholds.                               | Fully deterministic                                                                                                                                                          | hash + structural — standard `hash` triple; multiple parameter scenarios                       |
-| `scanpy/harmony`         | Runs Harmony batch integration after log-normalisation and PCA, storing the corrected embedding as `X_emb`.    | **Non-deterministic** — Harmony is an iterative optimisation with no fixed seed; upstream PCA is also unseeded.                                                              | structural — versions + schema only; `variance_ratio` output removed                           |
-| `scanpy/hvgs`            | Selects highly variable genes and subsets the AnnData to those genes.                                          | Seeded / quasi-deterministic — HVG variance statistics rely on NumPy/SciPy floating-point operations that can produce slightly different results across library versions.    | structural — versions + schema only                                                            |
-| `scanpy/leiden`          | Performs Leiden community-detection clustering at a specified resolution.                                      | **Non-deterministic** — Leiden uses random restarts with no fixed seed.                                                                                                      | structural — range assertion on cluster count + versions + schema                              |
-| `scanpy/neighbors`       | Computes a k-nearest-neighbour graph on a specified embedding.                                                 | Fully deterministic given a fixed input embedding.                                                                                                                           | structural — versions + schema only                                                            |
-| `scanpy/paga`            | Computes PAGA coarse-grained cluster connectivity and saves a graph and plot.                                  | Fully deterministic — PAGA is a deterministic graph-summarisation step given fixed Leiden labels.                                                                            | hash                                                                                           |
-| `scanpy/pca`             | Runs PCA with `random_state=0` and stores the result under a specified key.                                    | Seeded / quasi-deterministic — seed is fixed, but float coordinates can differ across LAPACK/MKL backends.                                                                   | structural — versions + schema only                                                            |
-| `scanpy/plotqc`          | Calculates QC metrics and produces a counts-vs-genes scatter plot for MultiQC.                                 | Fully deterministic                                                                                                                                                          | hash (no H5AD output — PNG / MultiQC JSON + versions)                                          |
-| `scanpy/rankgenesgroups` | Runs differential gene expression (rank genes groups) across clusters using a configurable statistical method. | **Seeded / quasi-deterministic** — wilcoxon and t-test are deterministic in theory, but tied-rank handling and floating-point tie-breaking can differ across SciPy versions. | structural — versions + `adata.yaml`; one path with **empty h5ad** snapshots **versions only** |
-| `scanpy/readh5`          | Reads a 10x Genomics HDF5 (`.h5`) file and writes it as an AnnData H5AD.                                       | Fully deterministic                                                                                                                                                          | hash                                                                                           |
-| `scanpy/sample`          | Down-samples cells to a fixed count or fraction using `rng=0`.                                                 | Seeded / quasi-deterministic — seed is fixed, but sampled cell set may vary across NumPy versions.                                                                           | hash                                                                                           |
-| `scanpy/umap`            | Computes a UMAP embedding from a pre-built neighbour graph using `random_state=0`.                             | Seeded / quasi-deterministic — seed is fixed, but float coordinates vary across umap-learn/numba versions.                                                                   | structural — versions + schema only                                                            |
+| Module                   | Description                                                                                                                                                                                                                                                                                                                  | Reproducibility                                                                                                                                                              | Test strategy (this branch)                                                                                           |
+| ------------------------ | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | --------------------------------------------------------------------------------------------------------------------- |
+| `scanpy/bbknn`           | Constructs a batch-balanced k-nearest-neighbour graph (BBKNN) on a PCA embedding.                                                                                                                                                                                                                                            | Fully deterministic — kNN construction is deterministic given the input embedding.                                                                                           | structural — versions + schema only                                                                                   |
+| `scanpy/cellcycle`       | Scores each cell for S-phase and G2M-phase activity and assigns a predicted cell cycle phase.                                                                                                                                                                                                                                | Fully deterministic                                                                                                                                                          | hash                                                                                                                  |
+| `scanpy/combat`          | Applies ComBat batch correction and then runs PCA, storing the result as `X_emb`.                                                                                                                                                                                                                                            | Seeded / quasi-deterministic — ComBat is deterministic; downstream PCA floats may vary across LAPACK backends.                                                               | structural — versions + schema only                                                                                   |
+| `scanpy/filter`          | Filters cells and genes by count, gene, and mitochondrial percentage thresholds.                                                                                                                                                                                                                                             | Fully deterministic                                                                                                                                                          | hash + structural — standard `hash` triple; multiple parameter scenarios                                              |
+| `scanpy/harmony`         | Runs Harmony batch integration after log-normalisation and PCA, storing the corrected embedding as `X_emb`.                                                                                                                                                                                                                  | **Non-deterministic** — Harmony is an iterative optimisation with no fixed seed; upstream PCA is also unseeded.                                                              | structural — versions + schema only; `variance_ratio` output removed                                                  |
+| `symphony/integrate`     | Runs Symphony batch integration via symphonypy `harmony_integrate` after log-normalisation and PCA, storing `X_pca_symphony`, `X_emb`, and `uns['symphony']`. Requires symphonypy ≥0.2.3 ([symphonypy#8](https://github.com/potulabe/symphonypy/issues/8), [symphonypy#9](https://github.com/potulabe/symphonypy/issues/9)). | **Non-deterministic** — same Harmony backend as `scanpy/harmony`; symphonypy passes `random_seed=1` but upstream PCA is unseeded.                                            | structural — versions + schema only (nf-test blocked until symphonypy 0.2.3 is on PyPI and the Wave image is rebuilt) |
+| `scanpy/hvgs`            | Selects highly variable genes and subsets the AnnData to those genes.                                                                                                                                                                                                                                                        | Seeded / quasi-deterministic — HVG variance statistics rely on NumPy/SciPy floating-point operations that can produce slightly different results across library versions.    | structural — versions + schema only                                                                                   |
+| `scanpy/leiden`          | Performs Leiden community-detection clustering at a specified resolution.                                                                                                                                                                                                                                                    | **Non-deterministic** — Leiden uses random restarts with no fixed seed.                                                                                                      | structural — range assertion on cluster count + versions + schema                                                     |
+| `scanpy/neighbors`       | Computes a k-nearest-neighbour graph on a specified embedding.                                                                                                                                                                                                                                                               | Fully deterministic given a fixed input embedding.                                                                                                                           | structural — versions + schema only                                                                                   |
+| `scanpy/paga`            | Computes PAGA coarse-grained cluster connectivity and saves a graph and plot.                                                                                                                                                                                                                                                | Fully deterministic — PAGA is a deterministic graph-summarisation step given fixed Leiden labels.                                                                            | hash                                                                                                                  |
+| `scanpy/pca`             | Runs PCA with `random_state=0` and stores the result under a specified key.                                                                                                                                                                                                                                                  | Seeded / quasi-deterministic — seed is fixed, but float coordinates can differ across LAPACK/MKL backends.                                                                   | structural — versions + schema only                                                                                   |
+| `scanpy/plotqc`          | Calculates QC metrics and produces a counts-vs-genes scatter plot for MultiQC.                                                                                                                                                                                                                                               | Fully deterministic                                                                                                                                                          | hash (no H5AD output — PNG / MultiQC JSON + versions)                                                                 |
+| `scanpy/rankgenesgroups` | Runs differential gene expression (rank genes groups) across clusters using a configurable statistical method.                                                                                                                                                                                                               | **Seeded / quasi-deterministic** — wilcoxon and t-test are deterministic in theory, but tied-rank handling and floating-point tie-breaking can differ across SciPy versions. | structural — versions + `adata.yaml`; one path with **empty h5ad** snapshots **versions only**                        |
+| `scanpy/readh5`          | Reads a 10x Genomics HDF5 (`.h5`) file and writes it as an AnnData H5AD.                                                                                                                                                                                                                                                     | Fully deterministic                                                                                                                                                          | hash                                                                                                                  |
+| `scanpy/sample`          | Down-samples cells to a fixed count or fraction using `rng=0`.                                                                                                                                                                                                                                                               | Seeded / quasi-deterministic — seed is fixed, but sampled cell set may vary across NumPy versions.                                                                           | hash                                                                                                                  |
+| `scanpy/umap`            | Computes a UMAP embedding from a pre-built neighbour graph using `random_state=0`.                                                                                                                                                                                                                                           | Seeded / quasi-deterministic — seed is fixed, but float coordinates vary across umap-learn/numba versions.                                                                   | structural — versions + schema only                                                                                   |
 
 ### `scimilarity/`
 
diff --git a/modules/local/symphony/integrate/environment.yml b/modules/local/symphony/integrate/environment.yml
new file mode 100644
index 00000000..01ce15ca
--- /dev/null
+++ b/modules/local/symphony/integrate/environment.yml
@@ -0,0 +1,9 @@
+channels:
+  - conda-forge
+dependencies:
+  - conda-forge::python=3.13.12
+  - conda-forge::pyyaml=6.0.3
+  - conda-forge::scanpy=1.12.1
+  - pip
+  - pip:
+      - symphonypy==0.2.3
diff --git a/modules/local/symphony/integrate/main.nf b/modules/local/symphony/integrate/main.nf
new file mode 100644
index 00000000..9c008451
--- /dev/null
+++ b/modules/local/symphony/integrate/main.nf
@@ -0,0 +1,34 @@
+process SYMPHONY_INTEGRATE {
+    tag "${meta.id}"
+    label 'process_medium'
+
+    conda "${moduleDir}/environment.yml"
+    container "${workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container
+            ? 'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/74/748cbcefde497c7024bda26cfe90aacbd7c3f3095084693157542a23de793fe2/data'
+            : 'community.wave.seqera.io/library/python_pyyaml_scanpy_pip_symphonypy:604f5d2dde5f37d9'}"
+
+    input:
+    tuple val(meta), path(h5ad)
+    val(batch_col)
+    val(counts_layer)
+
+    output:
+    tuple val(meta), path("${prefix}.h5ad"), emit: h5ad
+    path "X_${prefix}.pkl"                 , emit: obsm
+    path "versions.yml"                    , emit: versions, topic: versions
+
+    script:
+    prefix = task.ext.prefix ?: "${meta.id}"
+    if ("${prefix}.h5ad" == "${h5ad}") {
+        error "Input and output names are the same, use \"task.ext.prefix\" to disambiguate!"
+    }
+    template('integrate.py')
+
+    stub:
+    prefix = task.ext.prefix ?: "${meta.id}"
+    """
+    touch ${prefix}.h5ad
+    touch X_${prefix}.pkl
+    touch versions.yml
+    """
+}
diff --git a/modules/local/symphony/integrate/templates/integrate.py b/modules/local/symphony/integrate/templates/integrate.py
new file mode 100644
index 00000000..edb393b5
--- /dev/null
+++ b/modules/local/symphony/integrate/templates/integrate.py
@@ -0,0 +1,61 @@
+#!/usr/bin/env python3
+
+# Disable OpenMP CPU topology detection for MacOS compatibility
+import os
+os.environ["KMP_AFFINITY"] = "disabled"
+
+import platform
+import yaml
+
+os.environ["MPLCONFIGDIR"] = "./tmp/mpl"
+os.environ["NUMBA_CACHE_DIR"] = "./tmp/numba"
+
+import scanpy as sc
+import symphonypy as sp
+import pandas as pd
+
+from threadpoolctl import threadpool_limits
+threadpool_limits(int("${task.cpus}"))
+
+adata = sc.read_h5ad("${h5ad}")
+
+prefix = "${prefix}"
+
+adata_processing = adata.copy()
+
+if "${counts_layer}" != "X":
+    adata_processing.X = adata.layers["${counts_layer}"]
+
+sc.pp.log1p(adata_processing)
+sc.pp.pca(adata_processing)
+
+sp.pp.harmony_integrate(
+    adata_processing,
+    key="${batch_col}",
+    flavor="python",
+    ref_basis_source="X_pca",
+    ref_basis_adjusted="X_pca_symphony",
+)
+
+adata.obsm["X_pca_symphony"] = adata_processing.obsm["X_pca_symphony"]
+adata.obsm["X_emb"] = adata_processing.obsm["X_pca_symphony"]
+adata.uns["symphony"] = adata_processing.uns["harmony"]
+
+adata.write_h5ad(f"{prefix}.h5ad")
+
+df = pd.DataFrame(adata.obsm["X_emb"], index=adata.obs_names)
+df.to_pickle(f"X_{prefix}.pkl")
+
+# Versions
+
+versions = {
+    "${task.process}": {
+        "python": platform.python_version(),
+        "scanpy": sc.__version__,
+        "symphonypy": sp.__version__,
+        "pandas": pd.__version__,
+    }
+}
+
+with open("versions.yml", "w") as f:
+    yaml.dump(versions, f)
diff --git a/modules/local/symphony/integrate/tests/main.nf.test b/modules/local/symphony/integrate/tests/main.nf.test
new file mode 100644
index 00000000..3fbc1e92
--- /dev/null
+++ b/modules/local/symphony/integrate/tests/main.nf.test
@@ -0,0 +1,70 @@
+nextflow_process {
+
+    name "Test Process SYMPHONY_INTEGRATE"
+    script "modules/local/symphony/integrate/main.nf"
+    process "SYMPHONY_INTEGRATE"
+
+    tag "modules"
+    tag "modules_local"
+
+    test("Should run without failures") {
+
+        when {
+            params {
+                outdir = "$outputDir"
+            }
+            process {
+                """
+                input[0] = channel.of([
+                        [ id: 'test' ],
+                        file(params.modules_testdata_base_path + 'genomics/homo_sapiens/scrnaseq/h5ad/combined_filtered_matrix.h5ad', checkIfExists: true)
+                    ]
+                )
+                input[1] = "sample"
+                input[2] = "X"
+                """
+            }
+        }
+
+        then {
+            def adata = anndata(process.out.h5ad[0][1])
+            assert process.success
+            assert "X_emb" in adata.obsm
+            assert "symphony" in adata.uns
+            assert snapshot(
+                path(process.out.versions[0]).yaml,
+                adata.yaml
+            ).match()
+        }
+
+    }
+
+    test("Should run without failures - stub") {
+
+        options '-stub'
+
+        when {
+            params {
+                outdir = "$outputDir"
+            }
+            process {
+                """
+                input[0] = channel.of([
+                        [ id: 'test' ],
+                        file(params.modules_testdata_base_path + 'genomics/homo_sapiens/scrnaseq/h5ad/combined_filtered_matrix.h5ad', checkIfExists: true)
+                    ]
+                )
+                input[1] = "sample"
+                input[2] = "X"
+                """
+            }
+        }
+
+        then {
+            assert process.success
+            assert snapshot(process.out).match()
+        }
+
+    }
+
+}
diff --git a/modules/local/symphony/integrate/tests/main.nf.test.snap b/modules/local/symphony/integrate/tests/main.nf.test.snap
new file mode 100644
index 00000000..64d6e427
--- /dev/null
+++ b/modules/local/symphony/integrate/tests/main.nf.test.snap
@@ -0,0 +1,41 @@
+{
+    "Should run without failures - stub": {
+        "content": [
+            {
+                "0": [
+                    [
+                        {
+                            "id": "test"
+                        },
+                        "test.h5ad:md5,d41d8cd98f00b204e9800998ecf8427e"
+                    ]
+                ],
+                "1": [
+                    "X_test.pkl:md5,d41d8cd98f00b204e9800998ecf8427e"
+                ],
+                "2": [
+                    "versions.yml:md5,d41d8cd98f00b204e9800998ecf8427e"
+                ],
+                "h5ad": [
+                    [
+                        {
+                            "id": "test"
+                        },
+                        "test.h5ad:md5,d41d8cd98f00b204e9800998ecf8427e"
+                    ]
+                ],
+                "obsm": [
+                    "X_test.pkl:md5,d41d8cd98f00b204e9800998ecf8427e"
+                ],
+                "versions": [
+                    "versions.yml:md5,d41d8cd98f00b204e9800998ecf8427e"
+                ]
+            }
+        ],
+        "timestamp": "2026-05-19T10:59:10.495670438",
+        "meta": {
+            "nf-test": "0.9.4",
+            "nextflow": "26.04.0"
+        }
+    }
+}
\ No newline at end of file
diff --git a/nextflow_schema.json b/nextflow_schema.json
index be1d18d2..bacbb557 100644
--- a/nextflow_schema.json
+++ b/nextflow_schema.json
@@ -152,8 +152,8 @@
                     "type": "string",
                     "default": "scvi",
                     "description": "Specify the tool to use for integration",
-                    "help_text": "If you want to use multiple tools, separate them with a comma. Available methods are: scvi, scanvi, harmony, bbknn, combat, seurat, scimilarity, pca, expimap",
-                    "pattern": "^((scvi|scanvi|harmony|bbknn|combat|seurat|scimilarity|pca|expimap)(,(scvi|scanvi|harmony|bbknn|combat|seurat|scimilarity|pca|expimap))*)?$"
+                    "help_text": "If you want to use multiple tools, separate them with a comma. Available methods are: scvi, scanvi, harmony, symphony, bbknn, combat, seurat, scimilarity, pca, expimap",
+                    "pattern": "^((scvi|scanvi|harmony|symphony|bbknn|combat|seurat|scimilarity|pca|expimap)(,(scvi|scanvi|harmony|symphony|bbknn|combat|seurat|scimilarity|pca|expimap))*)?$"
                 },
                 "integration_hvgs": {
                     "type": "integer",
@@ -228,7 +228,7 @@
                     "type": "string",
                     "description": "The keys in the obsm of the base AnnData object that contain the embeddings (without leading `X_`). Required if `input` is not provided - otherwise it is ignored.",
                     "help_text": "If the `input` parameter is not provided (no new data to add), integration will not be performed. In order to be able to utilize existing integration results, you need to provide the keys in the obsm of the base AnnData object that contain the embeddings (without leading `X_`).",
-                    "pattern": "^((scvi|scanvi|harmony|bbknn|combat|seurat)(,(scvi|scanvi|harmony|bbknn|combat|seurat))*)?$"
+                    "pattern": "^((scvi|scanvi|harmony|symphony|bbknn|combat|seurat)(,(scvi|scanvi|harmony|symphony|bbknn|combat|seurat))*)?$"
                 }
             }
         },
diff --git a/subworkflows/local/integrate/main.nf b/subworkflows/local/integrate/main.nf
index ebcce693..e790bf96 100644
--- a/subworkflows/local/integrate/main.nf
+++ b/subworkflows/local/integrate/main.nf
@@ -3,6 +3,7 @@ include { SCANPY_FILTER      } from '../../../modules/local/scanpy/filter'
 include { SCVITOOLS_SCVI     } from '../../../modules/local/scvitools/scvi'
 include { SCVITOOLS_SCANVI   } from '../../../modules/local/scvitools/scanvi'
 include { SCANPY_HARMONY     } from '../../../modules/local/scanpy/harmony'
+include { SYMPHONY_INTEGRATE } from '../../../modules/local/symphony/integrate'
 include { SCANPY_BBKNN       } from '../../../modules/local/scanpy/bbknn'
 include { SCANPY_COMBAT      } from '../../../modules/local/scanpy/combat'
 include { SCANPY_PCA         } from '../../../modules/local/scanpy/pca'
@@ -115,6 +116,17 @@ workflow INTEGRATE {
         ch_obsm = ch_obsm.mix(SCANPY_HARMONY.out.obsm)
     }
 
+    if (methods.contains('symphony')) {
+        SYMPHONY_INTEGRATE (
+            ch_h5ad_hvg.map { _meta, h5ad -> [[id: 'symphony'], h5ad] },
+            "batch",
+            "X"
+        )
+        ch_versions = ch_versions.mix(SYMPHONY_INTEGRATE.out.versions)
+        ch_integrations = ch_integrations.mix(SYMPHONY_INTEGRATE.out.h5ad)
+        ch_obsm = ch_obsm.mix(SYMPHONY_INTEGRATE.out.obsm)
+    }
+
     if (methods.contains('bbknn')) {
         SCANPY_BBKNN (
             ch_h5ad_hvg.map { _meta, h5ad -> [[id: 'bbknn'], h5ad] },
diff --git a/subworkflows/local/integrate/tests/main.nf.test b/subworkflows/local/integrate/tests/main.nf.test
index 9562659e..60cb1fd2 100644
--- a/subworkflows/local/integrate/tests/main.nf.test
+++ b/subworkflows/local/integrate/tests/main.nf.test
@@ -82,6 +82,81 @@ nextflow_workflow {
 
     }
 
+    test("Should run without failures - symphony - stub") {
+
+        options '-stub'
+
+        when {
+            params {
+                outdir = "$outputDir"
+            }
+            workflow {
+                """
+                input[0] = channel.of([
+                    [id: 'test'],
+                    file(params.pipelines_testdata_base_path + '/anndata-variations/batch_correct_name.h5ad', checkIfExists: true)
+                ])
+                input[1] = false
+                input[2] = 2000
+                input[3] = []
+                input[4] = ['symphony']
+                input[5] = null
+                input[6] = null
+                input[7] = []
+                input[8] = []
+                input[9] = null
+                input[10] = null
+                input[11] = 'condition'
+                """
+            }
+        }
+
+        then {
+            assert workflow.success
+            assert snapshot(workflow.out).match()
+        }
+
+    }
+
+    test("Should run without failures - symphony") {
+
+        when {
+            params {
+                outdir = "$outputDir"
+            }
+            workflow {
+                """
+                input[0] = channel.of([
+                    [id: 'test'],
+                    file(params.pipelines_testdata_base_path + '/anndata-variations/batch_correct_name.h5ad', checkIfExists: true)
+                ])
+                input[1] = false
+                input[2] = 2000
+                input[3] = []
+                input[4] = ['symphony']
+                input[5] = null
+                input[6] = null
+                input[7] = []
+                input[8] = []
+                input[9] = null
+                input[10] = null
+                input[11] = 'condition'
+                """
+            }
+        }
+
+        then {
+            def adata = anndata(workflow.out.integrations[0][1])
+            assert workflow.success
+            assert "X_emb" in adata.obsm
+            assert snapshot(
+                workflow.out.versions,
+                adata.yaml
+            ).match()
+        }
+
+    }
+
     test("Should run without failures - bbknn - stub") {
 
         options '-stub'
diff --git a/subworkflows/local/integrate/tests/main.nf.test.snap b/subworkflows/local/integrate/tests/main.nf.test.snap
index 4feacdc8..c3a97012 100644
--- a/subworkflows/local/integrate/tests/main.nf.test.snap
+++ b/subworkflows/local/integrate/tests/main.nf.test.snap
@@ -44,6 +44,61 @@
             "nextflow": "26.04.0"
         }
     },
+    "Should run without failures - symphony - stub": {
+        "content": [
+            {
+                "0": [
+                    [
+                        {
+                            "id": "symphony"
+                        },
+                        "symphony.h5ad:md5,d41d8cd98f00b204e9800998ecf8427e"
+                    ]
+                ],
+                "1": [
+                    
+                ],
+                "2": [
+                    
+                ],
+                "3": [
+                    "X_symphony.pkl:md5,d41d8cd98f00b204e9800998ecf8427e"
+                ],
+                "4": [
+                    "versions.yml:md5,d41d8cd98f00b204e9800998ecf8427e",
+                    "versions.yml:md5,d41d8cd98f00b204e9800998ecf8427e",
+                    "versions.yml:md5,d41d8cd98f00b204e9800998ecf8427e"
+                ],
+                "integrations": [
+                    [
+                        {
+                            "id": "symphony"
+                        },
+                        "symphony.h5ad:md5,d41d8cd98f00b204e9800998ecf8427e"
+                    ]
+                ],
+                "obs": [
+                    
+                ],
+                "obsm": [
+                    "X_symphony.pkl:md5,d41d8cd98f00b204e9800998ecf8427e"
+                ],
+                "var": [
+                    
+                ],
+                "versions": [
+                    "versions.yml:md5,d41d8cd98f00b204e9800998ecf8427e",
+                    "versions.yml:md5,d41d8cd98f00b204e9800998ecf8427e",
+                    "versions.yml:md5,d41d8cd98f00b204e9800998ecf8427e"
+                ]
+            }
+        ],
+        "timestamp": "2026-05-19T11:01:29.131789506",
+        "meta": {
+            "nf-test": "0.9.4",
+            "nextflow": "26.04.0"
+        }
+    },
     "Should run without failures - bbknn - stub": {
         "content": [
             {

From 9177225030754ecfaed8e22f3575659768ace90f Mon Sep 17 00:00:00 2001
From: Nico Trummer <nico@Nicos-MacBook-Air.local>
Date: Tue, 26 May 2026 17:19:19 +0200
Subject: [PATCH 02/19] Fix Symphony module container build and version
 reporting.

Update symphonypy to 0.2.4 with a rebuilt Wave image, and use importlib.metadata for package versions since symphonypy does not expose __version__.

Co-authored-by: Cursor <cursoragent@cursor.com>
---
 .../local/symphony/integrate/environment.yml  |  5 +-
 modules/local/symphony/integrate/main.nf      |  4 +-
 .../symphony/integrate/templates/integrate.py |  5 +-
 .../integrate/tests/main.nf.test.snap         | 52 +++++++++++++++++++
 4 files changed, 60 insertions(+), 6 deletions(-)

diff --git a/modules/local/symphony/integrate/environment.yml b/modules/local/symphony/integrate/environment.yml
index 01ce15ca..1e4070eb 100644
--- a/modules/local/symphony/integrate/environment.yml
+++ b/modules/local/symphony/integrate/environment.yml
@@ -1,9 +1,10 @@
 channels:
   - conda-forge
+  - bioconda
 dependencies:
-  - conda-forge::python=3.13.12
+  - conda-forge::python=3.13.13
   - conda-forge::pyyaml=6.0.3
   - conda-forge::scanpy=1.12.1
   - pip
   - pip:
-      - symphonypy==0.2.3
+      - symphonypy==0.2.4
diff --git a/modules/local/symphony/integrate/main.nf b/modules/local/symphony/integrate/main.nf
index 9c008451..7eb4898f 100644
--- a/modules/local/symphony/integrate/main.nf
+++ b/modules/local/symphony/integrate/main.nf
@@ -4,8 +4,8 @@ process SYMPHONY_INTEGRATE {
 
     conda "${moduleDir}/environment.yml"
     container "${workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container
-            ? 'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/74/748cbcefde497c7024bda26cfe90aacbd7c3f3095084693157542a23de793fe2/data'
-            : 'community.wave.seqera.io/library/python_pyyaml_scanpy_pip_symphonypy:604f5d2dde5f37d9'}"
+            ? 'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/51/512121548a21b4d1bb8acfd5e30a75c5c2103ddd00cf1de4713c682b7e6b5387/data'
+            : 'community.wave.seqera.io/library/python_pyyaml_scanpy_pip_symphonypy:2198c27c5c9392d5'}"
 
     input:
     tuple val(meta), path(h5ad)
diff --git a/modules/local/symphony/integrate/templates/integrate.py b/modules/local/symphony/integrate/templates/integrate.py
index edb393b5..32de73b5 100644
--- a/modules/local/symphony/integrate/templates/integrate.py
+++ b/modules/local/symphony/integrate/templates/integrate.py
@@ -4,6 +4,7 @@
 import os
 os.environ["KMP_AFFINITY"] = "disabled"
 
+import importlib.metadata
 import platform
 import yaml
 
@@ -51,8 +52,8 @@
 versions = {
     "${task.process}": {
         "python": platform.python_version(),
-        "scanpy": sc.__version__,
-        "symphonypy": sp.__version__,
+        "scanpy": importlib.metadata.version("scanpy"),
+        "symphonypy": importlib.metadata.version("symphonypy"),
         "pandas": pd.__version__,
     }
 }
diff --git a/modules/local/symphony/integrate/tests/main.nf.test.snap b/modules/local/symphony/integrate/tests/main.nf.test.snap
index 64d6e427..39b39257 100644
--- a/modules/local/symphony/integrate/tests/main.nf.test.snap
+++ b/modules/local/symphony/integrate/tests/main.nf.test.snap
@@ -37,5 +37,57 @@
             "nf-test": "0.9.4",
             "nextflow": "26.04.0"
         }
+    },
+    "Should run without failures": {
+        "content": [
+            {
+                "SYMPHONY_INTEGRATE": {
+                    "pandas": "2.3.3",
+                    "python": "3.13.13",
+                    "scanpy": "1.12.1",
+                    "symphonypy": "0.2.4"
+                }
+            },
+            {
+                "n_obs": 38234,
+                "n_vars": 9887,
+                "obs": {
+                    "index": "_index",
+                    "columns": [
+                        "sample"
+                    ]
+                },
+                "var": {
+                    "index": "_index",
+                    "columns": [
+                        
+                    ]
+                },
+                "layers": [
+                    
+                ],
+                "obsm": [
+                    "X_emb",
+                    "X_pca_symphony"
+                ],
+                "varm": [
+                    
+                ],
+                "obsp": [
+                    
+                ],
+                "varp": [
+                    
+                ],
+                "uns": [
+                    "symphony"
+                ]
+            }
+        ],
+        "timestamp": "2026-05-26T17:19:00.719609",
+        "meta": {
+            "nf-test": "0.9.5",
+            "nextflow": "26.04.2"
+        }
     }
 }
\ No newline at end of file

From 1f038c6f82729d58b0b4cb6044093afcbb3bdbf8 Mon Sep 17 00:00:00 2001
From: Nico Trummer <nico@Nicos-MacBook-Air.local>
Date: Tue, 26 May 2026 17:38:01 +0200
Subject: [PATCH 03/19] Rename Symphony process to SYMPHONY_HARMONYINTEGRATE.

Align the process name with symphonypy's harmony_integrate entry point and update references and nf-test snapshots.

Co-authored-by: Cursor <cursoragent@cursor.com>
---
 conf/modules.config                                    |  2 +-
 modules/local/symphony/integrate/main.nf               |  2 +-
 modules/local/symphony/integrate/tests/main.nf.test    |  4 ++--
 .../local/symphony/integrate/tests/main.nf.test.snap   |  4 ++--
 subworkflows/local/integrate/main.nf                   | 10 +++++-----
 5 files changed, 11 insertions(+), 11 deletions(-)

diff --git a/conf/modules.config b/conf/modules.config
index cf2d1c0a..5d1e3ec8 100644
--- a/conf/modules.config
+++ b/conf/modules.config
@@ -388,7 +388,7 @@ process {
         ]
     }
 
-    withName: SYMPHONY_INTEGRATE {
+    withName: SYMPHONY_HARMONYINTEGRATE {
         publishDir = [
             path: { "${params.outdir}/combine/integrate/symphony" },
             mode: params.publish_dir_mode,
diff --git a/modules/local/symphony/integrate/main.nf b/modules/local/symphony/integrate/main.nf
index 7eb4898f..93a0e116 100644
--- a/modules/local/symphony/integrate/main.nf
+++ b/modules/local/symphony/integrate/main.nf
@@ -1,4 +1,4 @@
-process SYMPHONY_INTEGRATE {
+process SYMPHONY_HARMONYINTEGRATE {
     tag "${meta.id}"
     label 'process_medium'
 
diff --git a/modules/local/symphony/integrate/tests/main.nf.test b/modules/local/symphony/integrate/tests/main.nf.test
index 3fbc1e92..ea5cd9d0 100644
--- a/modules/local/symphony/integrate/tests/main.nf.test
+++ b/modules/local/symphony/integrate/tests/main.nf.test
@@ -1,8 +1,8 @@
 nextflow_process {
 
-    name "Test Process SYMPHONY_INTEGRATE"
+    name "Test Process SYMPHONY_HARMONYINTEGRATE"
     script "modules/local/symphony/integrate/main.nf"
-    process "SYMPHONY_INTEGRATE"
+    process "SYMPHONY_HARMONYINTEGRATE"
 
     tag "modules"
     tag "modules_local"
diff --git a/modules/local/symphony/integrate/tests/main.nf.test.snap b/modules/local/symphony/integrate/tests/main.nf.test.snap
index 39b39257..6b03b90a 100644
--- a/modules/local/symphony/integrate/tests/main.nf.test.snap
+++ b/modules/local/symphony/integrate/tests/main.nf.test.snap
@@ -41,7 +41,7 @@
     "Should run without failures": {
         "content": [
             {
-                "SYMPHONY_INTEGRATE": {
+                "SYMPHONY_HARMONYINTEGRATE": {
                     "pandas": "2.3.3",
                     "python": "3.13.13",
                     "scanpy": "1.12.1",
@@ -84,7 +84,7 @@
                 ]
             }
         ],
-        "timestamp": "2026-05-26T17:19:00.719609",
+        "timestamp": "2026-05-26T17:36:49.568823",
         "meta": {
             "nf-test": "0.9.5",
             "nextflow": "26.04.2"
diff --git a/subworkflows/local/integrate/main.nf b/subworkflows/local/integrate/main.nf
index e790bf96..b89d4f72 100644
--- a/subworkflows/local/integrate/main.nf
+++ b/subworkflows/local/integrate/main.nf
@@ -3,7 +3,7 @@ include { SCANPY_FILTER      } from '../../../modules/local/scanpy/filter'
 include { SCVITOOLS_SCVI     } from '../../../modules/local/scvitools/scvi'
 include { SCVITOOLS_SCANVI   } from '../../../modules/local/scvitools/scanvi'
 include { SCANPY_HARMONY     } from '../../../modules/local/scanpy/harmony'
-include { SYMPHONY_INTEGRATE } from '../../../modules/local/symphony/integrate'
+include { SYMPHONY_HARMONYINTEGRATE } from '../../../modules/local/symphony/integrate'
 include { SCANPY_BBKNN       } from '../../../modules/local/scanpy/bbknn'
 include { SCANPY_COMBAT      } from '../../../modules/local/scanpy/combat'
 include { SCANPY_PCA         } from '../../../modules/local/scanpy/pca'
@@ -117,14 +117,14 @@ workflow INTEGRATE {
     }
 
     if (methods.contains('symphony')) {
-        SYMPHONY_INTEGRATE (
+        SYMPHONY_HARMONYINTEGRATE (
             ch_h5ad_hvg.map { _meta, h5ad -> [[id: 'symphony'], h5ad] },
             "batch",
             "X"
         )
-        ch_versions = ch_versions.mix(SYMPHONY_INTEGRATE.out.versions)
-        ch_integrations = ch_integrations.mix(SYMPHONY_INTEGRATE.out.h5ad)
-        ch_obsm = ch_obsm.mix(SYMPHONY_INTEGRATE.out.obsm)
+        ch_versions = ch_versions.mix(SYMPHONY_HARMONYINTEGRATE.out.versions)
+        ch_integrations = ch_integrations.mix(SYMPHONY_HARMONYINTEGRATE.out.h5ad)
+        ch_obsm = ch_obsm.mix(SYMPHONY_HARMONYINTEGRATE.out.obsm)
     }
 
     if (methods.contains('bbknn')) {

From 1808682450196a185cd253a984580a42c378269a Mon Sep 17 00:00:00 2001
From: Nico Trummer <nico@nicos-air.speedport.ip>
Date: Tue, 26 May 2026 19:52:28 +0200
Subject: [PATCH 04/19] Retire scanpy/harmony in favor of
 symphony/harmonyintegrate.

Route the harmony integration method through SYMPHONY_HARMONYINTEGRATE, remove the duplicate symphony option, and delete the old scanpy/harmony module.

Co-authored-by: Cursor <cursoragent@cursor.com>
---
 README.md                                     |   3 +-
 assets/multiqc_config.yml                     |   2 -
 conf/modules.config                           |  11 +-
 docs/output.md                                |   3 +-
 docs/reproducibility.md                       |   8 +-
 modules/local/scanpy/harmony/environment.yml  |   8 -
 modules/local/scanpy/harmony/main.nf          |  34 ----
 .../local/scanpy/harmony/templates/harmony.py |  71 --------
 .../local/scanpy/harmony/tests/main.nf.test   |  69 -------
 .../scanpy/harmony/tests/main.nf.test.snap    |  92 ----------
 .../environment.yml                           |   0
 .../{integrate => harmonyintegrate}/main.nf   |  10 +-
 .../templates/harmonyintegrate.py}            |   0
 .../tests/main.nf.test                        |   3 +-
 .../tests/main.nf.test.snap                   |   0
 nextflow_schema.json                          |   6 +-
 subworkflows/local/integrate/main.nf          |  56 +++---
 .../local/integrate/tests/main.nf.test        |  77 +-------
 .../local/integrate/tests/main.nf.test.snap   | 168 ++++++++++--------
 19 files changed, 150 insertions(+), 471 deletions(-)
 delete mode 100644 modules/local/scanpy/harmony/environment.yml
 delete mode 100644 modules/local/scanpy/harmony/main.nf
 delete mode 100644 modules/local/scanpy/harmony/templates/harmony.py
 delete mode 100644 modules/local/scanpy/harmony/tests/main.nf.test
 delete mode 100644 modules/local/scanpy/harmony/tests/main.nf.test.snap
 rename modules/local/symphony/{integrate => harmonyintegrate}/environment.yml (100%)
 rename modules/local/symphony/{integrate => harmonyintegrate}/main.nf (72%)
 rename modules/local/symphony/{integrate/templates/integrate.py => harmonyintegrate/templates/harmonyintegrate.py} (100%)
 rename modules/local/symphony/{integrate => harmonyintegrate}/tests/main.nf.test (94%)
 rename modules/local/symphony/{integrate => harmonyintegrate}/tests/main.nf.test.snap (100%)

diff --git a/README.md b/README.md
index c620552c..8433a1fd 100644
--- a/README.md
+++ b/README.md
@@ -58,8 +58,7 @@ Steps marked with the boat icon are not yet implemented. For the other steps, th
    3. Integration
       - [scVI](https://docs.scvi-tools.org/en/stable/user_guide/models/scvi.html)
       - [scANVI](https://docs.scvi-tools.org/en/stable/user_guide/models/scanvi.html)
-      - [Harmony](https://portals.broadinstitute.org/harmony/articles/quickstart.html)
-      - [Symphony](https://symphonypy.readthedocs.io/) (via [symphonypy](https://pypi.org/project/symphonypy/))
+      - [Harmony](https://portals.broadinstitute.org/harmony/articles/quickstart.html) (via [symphonypy](https://pypi.org/project/symphonypy/))
       - [BBKNN](https://github.com/Teichlab/bbknn)
       - [Combat](https://scanpy.readthedocs.io/en/latest/api/generated/scanpy.pp.combat.html)
       - [Seurat](https://satijalab.org/seurat/articles/integration_introduction)
diff --git a/assets/multiqc_config.yml b/assets/multiqc_config.yml
index 1af23e69..945264e4 100644
--- a/assets/multiqc_config.yml
+++ b/assets/multiqc_config.yml
@@ -20,8 +20,6 @@ report_section_order:
     order: -1006
   "harmony":
     order: -1007
-  "symphony":
-    order: -1008
   "bbknn":
     order: -1009
   "combat":
diff --git a/conf/modules.config b/conf/modules.config
index 5d1e3ec8..41dacb79 100644
--- a/conf/modules.config
+++ b/conf/modules.config
@@ -379,18 +379,9 @@ process {
         ]
     }
 
-    withName: SCANPY_HARMONY {
-        publishDir = [
-            path: { "${params.outdir}/combine/integrate/harmony" },
-            mode: params.publish_dir_mode,
-            enabled: params.save_intermediates,
-            saveAs: { filename -> filename.equals('versions.yml') ? null : filename },
-        ]
-    }
-
     withName: SYMPHONY_HARMONYINTEGRATE {
         publishDir = [
-            path: { "${params.outdir}/combine/integrate/symphony" },
+            path: { "${params.outdir}/combine/integrate/harmony" },
             mode: params.publish_dir_mode,
             enabled: params.save_intermediates,
             saveAs: { filename -> filename.equals('versions.yml') ? null : filename },
diff --git a/docs/output.md b/docs/output.md
index cc52a656..ca948532 100644
--- a/docs/output.md
+++ b/docs/output.md
@@ -34,8 +34,7 @@ The pipeline is built using [Nextflow](https://www.nextflow.io/) and processes d
    3. Integration
       - [scVI](https://docs.scvi-tools.org/en/stable/user_guide/models/scvi.html)
       - [scANVI](https://docs.scvi-tools.org/en/stable/user_guide/models/scanvi.html)
-      - [Harmony](https://portals.broadinstitute.org/harmony/articles/quickstart.html)
-      - [Symphony](https://symphonypy.readthedocs.io/) (via [symphonypy](https://pypi.org/project/symphonypy/))
+      - [Harmony](https://portals.broadinstitute.org/harmony/articles/quickstart.html) (via [symphonypy](https://pypi.org/project/symphonypy/))
       - [BBKNN](https://github.com/Teichlab/bbknn)
       - [Combat](https://scanpy.readthedocs.io/en/latest/api/generated/scanpy.pp.combat.html)
       - [Seurat](https://satijalab.org/seurat/articles/integration_introduction)
diff --git a/docs/reproducibility.md b/docs/reproducibility.md
index cba5a394..814322dc 100644
--- a/docs/reproducibility.md
+++ b/docs/reproducibility.md
@@ -126,8 +126,6 @@ The **Test strategy (this branch)** column describes what the tests on this bran
 | `scanpy/cellcycle`       | Scores each cell for S-phase and G2M-phase activity and assigns a predicted cell cycle phase.                                                                                                                                                                                                                                | Fully deterministic                                                                                                                                                          | hash                                                                                                                  |
 | `scanpy/combat`          | Applies ComBat batch correction and then runs PCA, storing the result as `X_emb`.                                                                                                                                                                                                                                            | Seeded / quasi-deterministic — ComBat is deterministic; downstream PCA floats may vary across LAPACK backends.                                                               | structural — versions + schema only                                                                                   |
 | `scanpy/filter`          | Filters cells and genes by count, gene, and mitochondrial percentage thresholds.                                                                                                                                                                                                                                             | Fully deterministic                                                                                                                                                          | hash + structural — standard `hash` triple; multiple parameter scenarios                                              |
-| `scanpy/harmony`         | Runs Harmony batch integration after log-normalisation and PCA, storing the corrected embedding as `X_emb`.                                                                                                                                                                                                                  | **Non-deterministic** — Harmony is an iterative optimisation with no fixed seed; upstream PCA is also unseeded.                                                              | structural — versions + schema only; `variance_ratio` output removed                                                  |
-| `symphony/integrate`     | Runs Symphony batch integration via symphonypy `harmony_integrate` after log-normalisation and PCA, storing `X_pca_symphony`, `X_emb`, and `uns['symphony']`. Requires symphonypy ≥0.2.3 ([symphonypy#8](https://github.com/potulabe/symphonypy/issues/8), [symphonypy#9](https://github.com/potulabe/symphonypy/issues/9)). | **Non-deterministic** — same Harmony backend as `scanpy/harmony`; symphonypy passes `random_seed=1` but upstream PCA is unseeded.                                            | structural — versions + schema only (nf-test blocked until symphonypy 0.2.3 is on PyPI and the Wave image is rebuilt) |
 | `scanpy/hvgs`            | Selects highly variable genes and subsets the AnnData to those genes.                                                                                                                                                                                                                                                        | Seeded / quasi-deterministic — HVG variance statistics rely on NumPy/SciPy floating-point operations that can produce slightly different results across library versions.    | structural — versions + schema only                                                                                   |
 | `scanpy/leiden`          | Performs Leiden community-detection clustering at a specified resolution.                                                                                                                                                                                                                                                    | **Non-deterministic** — Leiden uses random restarts with no fixed seed.                                                                                                      | structural — range assertion on cluster count + versions + schema                                                     |
 | `scanpy/neighbors`       | Computes a k-nearest-neighbour graph on a specified embedding.                                                                                                                                                                                                                                                               | Fully deterministic given a fixed input embedding.                                                                                                                           | structural — versions + schema only                                                                                   |
@@ -139,6 +137,12 @@ The **Test strategy (this branch)** column describes what the tests on this bran
 | `scanpy/sample`          | Down-samples cells to a fixed count or fraction using `rng=0`.                                                                                                                                                                                                                                                               | Seeded / quasi-deterministic — seed is fixed, but sampled cell set may vary across NumPy versions.                                                                           | hash                                                                                                                  |
 | `scanpy/umap`            | Computes a UMAP embedding from a pre-built neighbour graph using `random_state=0`.                                                                                                                                                                                                                                           | Seeded / quasi-deterministic — seed is fixed, but float coordinates vary across umap-learn/numba versions.                                                                   | structural — versions + schema only                                                                                   |
 
+### `symphony/`
+
+| Module                         | Description                                                                                                                                                                                                                                                                                                                  | Reproducibility                                                                                                                                   | Test strategy (this branch)         |
+| ------------------------------ | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------- | ----------------------------------- |
+| `symphony/harmonyintegrate`    | Runs Harmony batch integration via symphonypy `harmony_integrate` after log-normalisation and PCA, storing `X_pca_symphony`, `X_emb`, and `uns['symphony']`. Requires symphonypy ≥0.2.3 ([symphonypy#8](https://github.com/potulabe/symphonypy/issues/8), [symphonypy#9](https://github.com/potulabe/symphonypy/issues/9)). | **Non-deterministic** — Harmony is an iterative optimisation; symphonypy passes `random_seed=1` but upstream PCA is unseeded.                     | structural — versions + schema only |
+
 ### `scimilarity/`
 
 | Module                   | Description                                                                                                   | Reproducibility                                                                                                                                                                                                     | Test strategy (this branch) |
diff --git a/modules/local/scanpy/harmony/environment.yml b/modules/local/scanpy/harmony/environment.yml
deleted file mode 100644
index c048fcad..00000000
--- a/modules/local/scanpy/harmony/environment.yml
+++ /dev/null
@@ -1,8 +0,0 @@
-channels:
-  - conda-forge
-  - bioconda
-dependencies:
-  - conda-forge::python=3.13.12
-  - bioconda::harmonypy=0.2.0
-  - conda-forge::pyyaml=6.0.3
-  - conda-forge::scanpy=1.12
diff --git a/modules/local/scanpy/harmony/main.nf b/modules/local/scanpy/harmony/main.nf
deleted file mode 100644
index 315d1553..00000000
--- a/modules/local/scanpy/harmony/main.nf
+++ /dev/null
@@ -1,34 +0,0 @@
-process SCANPY_HARMONY {
-    tag "${meta.id}"
-    label 'process_medium'
-
-    conda "${moduleDir}/environment.yml"
-    container "${workflow.containerEngine in ['singularity', 'apptainer'] && !task.ext.singularity_pull_docker_container
-            ? 'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/45/45339bf761a2cf0cdb058492bc37f3df8b05b363731d491d1d3a14e9ba0b8f55/data'
-            : 'community.wave.seqera.io/library/harmonypy_anndata_leidenalg_numpy_pruned:43066d5f86f18261'}"
-
-    input:
-    tuple val(meta), path(h5ad)
-    val(batch_col)
-    val(counts_layer)
-
-    output:
-    tuple val(meta), path("${prefix}.h5ad"), emit: h5ad
-    path "X_${prefix}.pkl"                 , emit: obsm
-    path "versions.yml"                    , emit: versions, topic: versions
-
-    script:
-    prefix = task.ext.prefix ?: "${meta.id}"
-    if ("${prefix}.h5ad" == "${h5ad}") {
-        error "Input and output names are the same, use \"task.ext.prefix\" to disambiguate!"
-    }
-    template('harmony.py')
-
-    stub:
-    prefix = task.ext.prefix ?: "${meta.id}"
-    """
-    touch ${prefix}.h5ad
-    touch X_${prefix}.pkl
-    touch versions.yml
-    """
-}
diff --git a/modules/local/scanpy/harmony/templates/harmony.py b/modules/local/scanpy/harmony/templates/harmony.py
deleted file mode 100644
index 61b307ef..00000000
--- a/modules/local/scanpy/harmony/templates/harmony.py
+++ /dev/null
@@ -1,71 +0,0 @@
-#!/usr/bin/env python3
-
-# Disable OpenMP CPU topology detection for MacOS compatibility
-import os
-os.environ["KMP_AFFINITY"] = "disabled"
-
-import platform
-import yaml
-
-os.environ["MPLCONFIGDIR"] = "./tmp/mpl"
-os.environ["NUMBA_CACHE_DIR"] = "./tmp/numba"
-
-import harmonypy
-import scanpy as sc
-import pandas as pd
-
-from threadpoolctl import threadpool_limits
-threadpool_limits(int("${task.cpus}"))
-
-adata = sc.read_h5ad("${h5ad}")
-
-prefix = "${prefix}"
-
-adata_processing = adata.copy()
-
-if "${counts_layer}" != "X":
-    adata_processing.X = adata.layers["${counts_layer}"]
-
-sc.pp.log1p(adata_processing)
-sc.pp.pca(adata_processing)
-
-harmony_out = harmonypy.run_harmony(
-    adata_processing.obsm["X_pca"].astype("float64"),
-    adata_processing.obs,
-    "${batch_col}",
-)
-
-emb = harmony_out.Z_corr
-
-# harmonypy 0.2.0 changed Z_corr orientation; accept either layout.
-# See https://github.com/potulabe/symphonypy/issues/8
-if emb.shape == adata_processing.obsm["X_pca"].shape:
-    adata_processing.obsm["X_emb"] = emb
-elif emb.T.shape == adata_processing.obsm["X_pca"].shape:
-    adata_processing.obsm["X_emb"] = emb.T
-else:
-    raise ValueError(
-        f"Unexpected Harmony embedding shape {emb.shape}; "
-        f"expected {adata_processing.obsm['X_pca'].shape} or its transpose."
-    )
-
-adata.obsm["X_emb"] = adata_processing.obsm["X_emb"]
-
-adata.write_h5ad(f"{prefix}.h5ad")
-
-df = pd.DataFrame(adata.obsm["X_emb"], index=adata.obs_names)
-df.to_pickle(f"X_{prefix}.pkl")
-
-# Versions
-
-versions = {
-    "${task.process}": {
-        "python": platform.python_version(),
-        "scanpy": sc.__version__,
-        "harmonypy": harmonypy.__version__,
-        "pandas": pd.__version__
-    }
-}
-
-with open("versions.yml", "w") as f:
-    yaml.dump(versions, f)
diff --git a/modules/local/scanpy/harmony/tests/main.nf.test b/modules/local/scanpy/harmony/tests/main.nf.test
deleted file mode 100644
index 8c6b2478..00000000
--- a/modules/local/scanpy/harmony/tests/main.nf.test
+++ /dev/null
@@ -1,69 +0,0 @@
-nextflow_process {
-
-    name "Test Process SCANPY_HARMONY"
-    script "modules/local/scanpy/harmony/main.nf"
-    process "SCANPY_HARMONY"
-
-    tag "modules"
-    tag "modules_local"
-
-    test("Should run without failures") {
-
-        when {
-            params {
-                outdir = "$outputDir"
-            }
-            process {
-                """
-                input[0] = channel.of([
-                        [ id: 'test' ],
-                        file(params.modules_testdata_base_path + 'genomics/homo_sapiens/scrnaseq/h5ad/combined_filtered_matrix.h5ad', checkIfExists: true)
-                    ]
-                )
-                input[1] = "sample"
-                input[2] = "X"
-                """
-            }
-        }
-
-        then {
-            def adata = anndata(process.out.h5ad[0][1])
-            assert process.success
-            assert "X_emb" in adata.obsm
-            assert snapshot(
-                path(process.out.versions[0]).yaml,
-                adata.yaml
-            ).match()
-        }
-
-    }
-
-    test("Should run without failures - stub") {
-
-        options '-stub'
-
-        when {
-            params {
-                outdir = "$outputDir"
-            }
-            process {
-                """
-                input[0] = channel.of([
-                        [ id: 'test' ],
-                        file(params.modules_testdata_base_path + 'genomics/homo_sapiens/scrnaseq/h5ad/combined_filtered_matrix.h5ad', checkIfExists: true)
-                    ]
-                )
-                input[1] = "sample"
-                input[2] = "X"
-                """
-            }
-        }
-
-        then {
-            assert process.success
-            assert snapshot(process.out).match()
-        }
-
-    }
-
-}
diff --git a/modules/local/scanpy/harmony/tests/main.nf.test.snap b/modules/local/scanpy/harmony/tests/main.nf.test.snap
deleted file mode 100644
index 4a71e69d..00000000
--- a/modules/local/scanpy/harmony/tests/main.nf.test.snap
+++ /dev/null
@@ -1,92 +0,0 @@
-{
-    "Should run without failures - stub": {
-        "content": [
-            {
-                "0": [
-                    [
-                        {
-                            "id": "test"
-                        },
-                        "test.h5ad:md5,d41d8cd98f00b204e9800998ecf8427e"
-                    ]
-                ],
-                "1": [
-                    "X_test.pkl:md5,d41d8cd98f00b204e9800998ecf8427e"
-                ],
-                "2": [
-                    "versions.yml:md5,d41d8cd98f00b204e9800998ecf8427e"
-                ],
-                "h5ad": [
-                    [
-                        {
-                            "id": "test"
-                        },
-                        "test.h5ad:md5,d41d8cd98f00b204e9800998ecf8427e"
-                    ]
-                ],
-                "obsm": [
-                    "X_test.pkl:md5,d41d8cd98f00b204e9800998ecf8427e"
-                ],
-                "versions": [
-                    "versions.yml:md5,d41d8cd98f00b204e9800998ecf8427e"
-                ]
-            }
-        ],
-        "timestamp": "2026-03-22T10:56:43.269700775",
-        "meta": {
-            "nf-test": "0.9.4",
-            "nextflow": "25.10.2"
-        }
-    },
-    "Should run without failures": {
-        "content": [
-            {
-                "SCANPY_HARMONY": {
-                    "harmonypy": "0.2.0",
-                    "pandas": "2.3.3",
-                    "python": "3.13.12",
-                    "scanpy": "1.12"
-                }
-            },
-            {
-                "n_obs": 38234,
-                "n_vars": 9887,
-                "obs": {
-                    "index": "_index",
-                    "columns": [
-                        "sample"
-                    ]
-                },
-                "var": {
-                    "index": "_index",
-                    "columns": [
-                        
-                    ]
-                },
-                "layers": [
-                    
-                ],
-                "obsm": [
-                    "X_emb"
-                ],
-                "varm": [
-                    
-                ],
-                "obsp": [
-                    
-                ],
-                "varp": [
-                    
-                ],
-                "uns": [
-                    
-                ]
-            }
-        ],
-        "timestamp": "2026-03-29T11:17:47.094134151",
-        "meta": {
-            "nf-test": "0.9.4",
-            "nextflow": "25.10.2"
-        }
-    }
-}
diff --git a/modules/local/symphony/integrate/environment.yml b/modules/local/symphony/harmonyintegrate/environment.yml
similarity index 100%
rename from modules/local/symphony/integrate/environment.yml
rename to modules/local/symphony/harmonyintegrate/environment.yml
diff --git a/modules/local/symphony/integrate/main.nf b/modules/local/symphony/harmonyintegrate/main.nf
similarity index 72%
rename from modules/local/symphony/integrate/main.nf
rename to modules/local/symphony/harmonyintegrate/main.nf
index 93a0e116..9a109b6a 100644
--- a/modules/local/symphony/integrate/main.nf
+++ b/modules/local/symphony/harmonyintegrate/main.nf
@@ -13,21 +13,23 @@ process SYMPHONY_HARMONYINTEGRATE {
     val(counts_layer)
 
     output:
-    tuple val(meta), path("${prefix}.h5ad"), emit: h5ad
-    path "X_${prefix}.pkl"                 , emit: obsm
-    path "versions.yml"                    , emit: versions, topic: versions
+    tuple val(meta), path("${prefix}.h5ad")          , emit: h5ad
+    tuple val(meta), path("${prefix}_reference.h5ad"), emit: reference
+    path "X_${prefix}.pkl"                           , emit: obsm
+    path "versions.yml"                              , emit: versions, topic: versions
 
     script:
     prefix = task.ext.prefix ?: "${meta.id}"
     if ("${prefix}.h5ad" == "${h5ad}") {
         error "Input and output names are the same, use \"task.ext.prefix\" to disambiguate!"
     }
-    template('integrate.py')
+    template('harmonyintegrate.py')
 
     stub:
     prefix = task.ext.prefix ?: "${meta.id}"
     """
     touch ${prefix}.h5ad
+    touch ${prefix}_reference.h5ad
     touch X_${prefix}.pkl
     touch versions.yml
     """
diff --git a/modules/local/symphony/integrate/templates/integrate.py b/modules/local/symphony/harmonyintegrate/templates/harmonyintegrate.py
similarity index 100%
rename from modules/local/symphony/integrate/templates/integrate.py
rename to modules/local/symphony/harmonyintegrate/templates/harmonyintegrate.py
diff --git a/modules/local/symphony/integrate/tests/main.nf.test b/modules/local/symphony/harmonyintegrate/tests/main.nf.test
similarity index 94%
rename from modules/local/symphony/integrate/tests/main.nf.test
rename to modules/local/symphony/harmonyintegrate/tests/main.nf.test
index ea5cd9d0..52dc354e 100644
--- a/modules/local/symphony/integrate/tests/main.nf.test
+++ b/modules/local/symphony/harmonyintegrate/tests/main.nf.test
@@ -1,7 +1,7 @@
 nextflow_process {
 
     name "Test Process SYMPHONY_HARMONYINTEGRATE"
-    script "modules/local/symphony/integrate/main.nf"
+    script "modules/local/symphony/harmonyintegrate/main.nf"
     process "SYMPHONY_HARMONYINTEGRATE"
 
     tag "modules"
@@ -30,6 +30,7 @@ nextflow_process {
             def adata = anndata(process.out.h5ad[0][1])
             assert process.success
             assert "X_emb" in adata.obsm
+            assert "X_pca_symphony" in adata.obsm
             assert "symphony" in adata.uns
             assert snapshot(
                 path(process.out.versions[0]).yaml,
diff --git a/modules/local/symphony/integrate/tests/main.nf.test.snap b/modules/local/symphony/harmonyintegrate/tests/main.nf.test.snap
similarity index 100%
rename from modules/local/symphony/integrate/tests/main.nf.test.snap
rename to modules/local/symphony/harmonyintegrate/tests/main.nf.test.snap
diff --git a/nextflow_schema.json b/nextflow_schema.json
index bacbb557..be1d18d2 100644
--- a/nextflow_schema.json
+++ b/nextflow_schema.json
@@ -152,8 +152,8 @@
                     "type": "string",
                     "default": "scvi",
                     "description": "Specify the tool to use for integration",
-                    "help_text": "If you want to use multiple tools, separate them with a comma. Available methods are: scvi, scanvi, harmony, symphony, bbknn, combat, seurat, scimilarity, pca, expimap",
-                    "pattern": "^((scvi|scanvi|harmony|symphony|bbknn|combat|seurat|scimilarity|pca|expimap)(,(scvi|scanvi|harmony|symphony|bbknn|combat|seurat|scimilarity|pca|expimap))*)?$"
+                    "help_text": "If you want to use multiple tools, separate them with a comma. Available methods are: scvi, scanvi, harmony, bbknn, combat, seurat, scimilarity, pca, expimap",
+                    "pattern": "^((scvi|scanvi|harmony|bbknn|combat|seurat|scimilarity|pca|expimap)(,(scvi|scanvi|harmony|bbknn|combat|seurat|scimilarity|pca|expimap))*)?$"
                 },
                 "integration_hvgs": {
                     "type": "integer",
@@ -228,7 +228,7 @@
                     "type": "string",
                     "description": "The keys in the obsm of the base AnnData object that contain the embeddings (without leading `X_`). Required if `input` is not provided - otherwise it is ignored.",
                     "help_text": "If the `input` parameter is not provided (no new data to add), integration will not be performed. In order to be able to utilize existing integration results, you need to provide the keys in the obsm of the base AnnData object that contain the embeddings (without leading `X_`).",
-                    "pattern": "^((scvi|scanvi|harmony|symphony|bbknn|combat|seurat)(,(scvi|scanvi|harmony|symphony|bbknn|combat|seurat))*)?$"
+                    "pattern": "^((scvi|scanvi|harmony|bbknn|combat|seurat)(,(scvi|scanvi|harmony|bbknn|combat|seurat))*)?$"
                 }
             }
         },
diff --git a/subworkflows/local/integrate/main.nf b/subworkflows/local/integrate/main.nf
index b89d4f72..7af3e357 100644
--- a/subworkflows/local/integrate/main.nf
+++ b/subworkflows/local/integrate/main.nf
@@ -2,8 +2,8 @@ include { SCANPY_HVGS        } from '../../../modules/local/scanpy/hvgs'
 include { SCANPY_FILTER      } from '../../../modules/local/scanpy/filter'
 include { SCVITOOLS_SCVI     } from '../../../modules/local/scvitools/scvi'
 include { SCVITOOLS_SCANVI   } from '../../../modules/local/scvitools/scanvi'
-include { SCANPY_HARMONY     } from '../../../modules/local/scanpy/harmony'
-include { SYMPHONY_HARMONYINTEGRATE } from '../../../modules/local/symphony/integrate'
+include { SYMPHONY_HARMONYINTEGRATE } from '../../../modules/local/symphony/harmonyintegrate'
+include { SYMPHONY_MAPEMBEDDING      } from '../../../modules/local/symphony/mapembedding'
 include { SCANPY_BBKNN       } from '../../../modules/local/scanpy/bbknn'
 include { SCANPY_COMBAT      } from '../../../modules/local/scanpy/combat'
 include { SCANPY_PCA         } from '../../../modules/local/scanpy/pca'
@@ -24,10 +24,12 @@ workflow INTEGRATE {
     scvi_categorical_covariates // list of string
     scvi_continuous_covariates  // list of string
     scimilarity_model           // path
+    harmony_reference           // path
     expimap_gmt                 // path
     condition_col               // string
 
     main:
+    ch_versions = channel.empty()
     ch_obs = channel.empty()
     ch_var = channel.empty()
     ch_obsm = channel.empty()
@@ -41,6 +43,7 @@ workflow INTEGRATE {
             n_hvgs,
             excluded_genes
         )
+        ch_versions = ch_versions.mix(SCANPY_HVGS.out.versions)
         ch_h5ad_hvg = SCANPY_HVGS.out.h5ad
 
         // See issue 215
@@ -60,6 +63,7 @@ workflow INTEGRATE {
             []
         )
         ch_h5ad_hvg = SCANPY_FILTER.out.h5ad
+        ch_versions = ch_versions.mix(SCANPY_FILTER.out.versions)
     }
     else {
         ch_h5ad_hvg = ch_h5ad
@@ -69,6 +73,7 @@ workflow INTEGRATE {
         SEURAT_INTEGRATION (
             ch_h5ad_hvg.map { _meta, h5ad -> [[id: 'seurat'], h5ad] }, "batch"
         )
+        ch_versions = ch_versions.mix(SEURAT_INTEGRATION.out.versions)
         ch_integrations = ch_integrations.mix(SEURAT_INTEGRATION.out.h5ad)
     }
 
@@ -83,6 +88,7 @@ workflow INTEGRATE {
             scvi_categorical_covariates,
             scvi_continuous_covariates,
         )
+        ch_versions = ch_versions.mix(SCVITOOLS_SCVI.out.versions)
         ch_integrations = ch_integrations.mix(SCVITOOLS_SCVI.out.h5ad)
         ch_obsm = ch_obsm.mix(SCVITOOLS_SCVI.out.obsm)
     }
@@ -101,30 +107,34 @@ workflow INTEGRATE {
             scvi_categorical_covariates,
             scvi_continuous_covariates,
         )
+        ch_versions = ch_versions.mix(SCVITOOLS_SCANVI.out.versions)
         ch_integrations = ch_integrations.mix(SCVITOOLS_SCANVI.out.h5ad)
         ch_obs = ch_obs.mix(SCVITOOLS_SCANVI.out.obs)
         ch_obsm = ch_obsm.mix(SCVITOOLS_SCANVI.out.obsm)
     }
 
     if (methods.contains('harmony')) {
-        SCANPY_HARMONY (
-            ch_h5ad_hvg.map { _meta, h5ad -> [[id: 'harmony'], h5ad] },
-            "batch",
-            "X"
-        )
-        ch_integrations = ch_integrations.mix(SCANPY_HARMONY.out.h5ad)
-        ch_obsm = ch_obsm.mix(SCANPY_HARMONY.out.obsm)
-    }
-
-    if (methods.contains('symphony')) {
-        SYMPHONY_HARMONYINTEGRATE (
-            ch_h5ad_hvg.map { _meta, h5ad -> [[id: 'symphony'], h5ad] },
-            "batch",
-            "X"
-        )
-        ch_versions = ch_versions.mix(SYMPHONY_HARMONYINTEGRATE.out.versions)
-        ch_integrations = ch_integrations.mix(SYMPHONY_HARMONYINTEGRATE.out.h5ad)
-        ch_obsm = ch_obsm.mix(SYMPHONY_HARMONYINTEGRATE.out.obsm)
+        if (harmony_reference) {
+            SYMPHONY_MAPEMBEDDING (
+                ch_h5ad.map { _meta, h5ad -> [[id: 'harmony'], h5ad] },
+                channel.value([[id: 'harmony'], harmony_reference]),
+                "batch",
+                "X"
+            )
+            ch_versions = ch_versions.mix(SYMPHONY_MAPEMBEDDING.out.versions)
+            ch_integrations = ch_integrations.mix(SYMPHONY_MAPEMBEDDING.out.h5ad)
+            ch_obsm = ch_obsm.mix(SYMPHONY_MAPEMBEDDING.out.obsm)
+        }
+        else {
+            SYMPHONY_HARMONYINTEGRATE (
+                ch_h5ad_hvg.map { _meta, h5ad -> [[id: 'harmony'], h5ad] },
+                "batch",
+                "X"
+            )
+            ch_versions = ch_versions.mix(SYMPHONY_HARMONYINTEGRATE.out.versions)
+            ch_integrations = ch_integrations.mix(SYMPHONY_HARMONYINTEGRATE.out.h5ad)
+            ch_obsm = ch_obsm.mix(SYMPHONY_HARMONYINTEGRATE.out.obsm)
+        }
     }
 
     if (methods.contains('bbknn')) {
@@ -132,6 +142,7 @@ workflow INTEGRATE {
             ch_h5ad_hvg.map { _meta, h5ad -> [[id: 'bbknn'], h5ad] },
             "batch"
         )
+        ch_versions = ch_versions.mix(SCANPY_BBKNN.out.versions)
         ch_integrations = ch_integrations.mix(SCANPY_BBKNN.out.h5ad)
     }
 
@@ -140,6 +151,7 @@ workflow INTEGRATE {
             ch_h5ad_hvg.map { _meta, h5ad -> [[id: 'combat'], h5ad] },
             "batch"
         )
+        ch_versions = ch_versions.mix(SCANPY_COMBAT.out.versions)
         ch_integrations = ch_integrations.mix(SCANPY_COMBAT.out.h5ad)
         ch_obsm = ch_obsm.mix(SCANPY_COMBAT.out.obsm)
     }
@@ -149,6 +161,7 @@ workflow INTEGRATE {
             ch_h5ad_hvg.map { _meta, h5ad -> [[id: 'pca'], h5ad] },
             "X_emb"
         )
+        ch_versions = ch_versions.mix(SCANPY_PCA.out.versions)
         ch_integrations = ch_integrations.mix(SCANPY_PCA.out.h5ad)
         ch_obsm = ch_obsm.mix(SCANPY_PCA.out.obsm)
     }
@@ -162,6 +175,7 @@ workflow INTEGRATE {
             condition_col,
             "X"
         )
+        ch_versions = ch_versions.mix(SCARCHES_EXPIMAP.out.versions)
         ch_integrations = ch_integrations.mix(SCARCHES_EXPIMAP.out.h5ad)
         ch_obsm = ch_obsm.mix(SCARCHES_EXPIMAP.out.obsm)
     }
@@ -171,6 +185,7 @@ workflow INTEGRATE {
             ch_h5ad.map { _meta, h5ad -> [[id: 'scimilarity'], h5ad] },
             scimilarity_model,
         )
+        ch_versions = ch_versions.mix(SCIMILARITY.out.versions)
         ch_integrations = ch_integrations.mix(SCIMILARITY.out.integrations)
         ch_obs = ch_obs.mix(SCIMILARITY.out.obs)
         ch_obsm = ch_obsm.mix(SCIMILARITY.out.obsm)
@@ -181,4 +196,5 @@ workflow INTEGRATE {
     obs          = ch_obs // channel: [ pkl ]
     var          = ch_var // channel: [ pkl ]
     obsm         = ch_obsm // channel: [ pkl ]
+    versions     = ch_versions // channel: [ versions.yml ]
 }
diff --git a/subworkflows/local/integrate/tests/main.nf.test b/subworkflows/local/integrate/tests/main.nf.test
index 60cb1fd2..0a59e5fb 100644
--- a/subworkflows/local/integrate/tests/main.nf.test
+++ b/subworkflows/local/integrate/tests/main.nf.test
@@ -74,81 +74,8 @@ nextflow_workflow {
             def adata = anndata(workflow.out.integrations[0][1])
             assert workflow.success
             assert "X_emb" in adata.obsm
-            assert snapshot(
-                workflow.out.versions,
-                adata.yaml
-            ).match()
-        }
-
-    }
-
-    test("Should run without failures - symphony - stub") {
-
-        options '-stub'
-
-        when {
-            params {
-                outdir = "$outputDir"
-            }
-            workflow {
-                """
-                input[0] = channel.of([
-                    [id: 'test'],
-                    file(params.pipelines_testdata_base_path + '/anndata-variations/batch_correct_name.h5ad', checkIfExists: true)
-                ])
-                input[1] = false
-                input[2] = 2000
-                input[3] = []
-                input[4] = ['symphony']
-                input[5] = null
-                input[6] = null
-                input[7] = []
-                input[8] = []
-                input[9] = null
-                input[10] = null
-                input[11] = 'condition'
-                """
-            }
-        }
-
-        then {
-            assert workflow.success
-            assert snapshot(workflow.out).match()
-        }
-
-    }
-
-    test("Should run without failures - symphony") {
-
-        when {
-            params {
-                outdir = "$outputDir"
-            }
-            workflow {
-                """
-                input[0] = channel.of([
-                    [id: 'test'],
-                    file(params.pipelines_testdata_base_path + '/anndata-variations/batch_correct_name.h5ad', checkIfExists: true)
-                ])
-                input[1] = false
-                input[2] = 2000
-                input[3] = []
-                input[4] = ['symphony']
-                input[5] = null
-                input[6] = null
-                input[7] = []
-                input[8] = []
-                input[9] = null
-                input[10] = null
-                input[11] = 'condition'
-                """
-            }
-        }
-
-        then {
-            def adata = anndata(workflow.out.integrations[0][1])
-            assert workflow.success
-            assert "X_emb" in adata.obsm
+            assert "X_pca_symphony" in adata.obsm
+            assert "symphony" in adata.uns
             assert snapshot(
                 workflow.out.versions,
                 adata.yaml
diff --git a/subworkflows/local/integrate/tests/main.nf.test.snap b/subworkflows/local/integrate/tests/main.nf.test.snap
index c3a97012..17d238af 100644
--- a/subworkflows/local/integrate/tests/main.nf.test.snap
+++ b/subworkflows/local/integrate/tests/main.nf.test.snap
@@ -19,51 +19,6 @@
                 "3": [
                     "X_harmony.pkl:md5,d41d8cd98f00b204e9800998ecf8427e"
                 ],
-                "integrations": [
-                    [
-                        {
-                            "id": "harmony"
-                        },
-                        "harmony.h5ad:md5,d41d8cd98f00b204e9800998ecf8427e"
-                    ]
-                ],
-                "obs": [
-                    
-                ],
-                "obsm": [
-                    "X_harmony.pkl:md5,d41d8cd98f00b204e9800998ecf8427e"
-                ],
-                "var": [
-                    
-                ]
-            }
-        ],
-        "timestamp": "2026-05-20T20:47:09.819743733",
-        "meta": {
-            "nf-test": "0.9.4",
-            "nextflow": "26.04.0"
-        }
-    },
-    "Should run without failures - symphony - stub": {
-        "content": [
-            {
-                "0": [
-                    [
-                        {
-                            "id": "symphony"
-                        },
-                        "symphony.h5ad:md5,d41d8cd98f00b204e9800998ecf8427e"
-                    ]
-                ],
-                "1": [
-                    
-                ],
-                "2": [
-                    
-                ],
-                "3": [
-                    "X_symphony.pkl:md5,d41d8cd98f00b204e9800998ecf8427e"
-                ],
                 "4": [
                     "versions.yml:md5,d41d8cd98f00b204e9800998ecf8427e",
                     "versions.yml:md5,d41d8cd98f00b204e9800998ecf8427e",
@@ -72,16 +27,16 @@
                 "integrations": [
                     [
                         {
-                            "id": "symphony"
+                            "id": "harmony"
                         },
-                        "symphony.h5ad:md5,d41d8cd98f00b204e9800998ecf8427e"
+                        "harmony.h5ad:md5,d41d8cd98f00b204e9800998ecf8427e"
                     ]
                 ],
                 "obs": [
                     
                 ],
                 "obsm": [
-                    "X_symphony.pkl:md5,d41d8cd98f00b204e9800998ecf8427e"
+                    "X_harmony.pkl:md5,d41d8cd98f00b204e9800998ecf8427e"
                 ],
                 "var": [
                     
@@ -93,10 +48,10 @@
                 ]
             }
         ],
-        "timestamp": "2026-05-19T11:01:29.131789506",
+        "timestamp": "2026-03-28T23:05:37.694952307",
         "meta": {
             "nf-test": "0.9.4",
-            "nextflow": "26.04.0"
+            "nextflow": "25.10.2"
         }
     },
     "Should run without failures - bbknn - stub": {
@@ -118,6 +73,11 @@
                 ],
                 "3": [
                     
+                ],
+                "4": [
+                    "versions.yml:md5,d41d8cd98f00b204e9800998ecf8427e",
+                    "versions.yml:md5,d41d8cd98f00b204e9800998ecf8427e",
+                    "versions.yml:md5,d41d8cd98f00b204e9800998ecf8427e"
                 ],
                 "integrations": [
                     [
@@ -135,21 +95,30 @@
                 ],
                 "var": [
                     
+                ],
+                "versions": [
+                    "versions.yml:md5,d41d8cd98f00b204e9800998ecf8427e",
+                    "versions.yml:md5,d41d8cd98f00b204e9800998ecf8427e",
+                    "versions.yml:md5,d41d8cd98f00b204e9800998ecf8427e"
                 ]
             }
         ],
-        "timestamp": "2026-05-20T20:49:35.863158377",
+        "timestamp": "2026-03-25T15:47:40.215608271",
         "meta": {
             "nf-test": "0.9.4",
-            "nextflow": "26.04.0"
+            "nextflow": "25.10.2"
         }
     },
     "Should run without failures - combat": {
         "content": [
-            null,
+            [
+                "versions.yml:md5,20020d8c9cf585aaa75dd5a14aa5d3ae",
+                "versions.yml:md5,a6c1e0a77e0d31423a9d77edba85127d",
+                "versions.yml:md5,d28b65c4c18c54e1abc34040b584b823"
+            ],
             {
                 "n_obs": 12940,
-                "n_vars": 2077,
+                "n_vars": 2000,
                 "obs": {
                     "index": "_index",
                     "columns": [
@@ -209,7 +178,7 @@
                 ]
             }
         ],
-        "timestamp": "2026-05-20T20:52:30.761335469",
+        "timestamp": "2026-05-28T14:03:08.524818368",
         "meta": {
             "nf-test": "0.9.4",
             "nextflow": "26.04.0"
@@ -235,6 +204,11 @@
                 "3": [
                     "X_pca_pca.pkl:md5,d41d8cd98f00b204e9800998ecf8427e"
                 ],
+                "4": [
+                    "versions.yml:md5,d41d8cd98f00b204e9800998ecf8427e",
+                    "versions.yml:md5,d41d8cd98f00b204e9800998ecf8427e",
+                    "versions.yml:md5,d41d8cd98f00b204e9800998ecf8427e"
+                ],
                 "integrations": [
                     [
                         {
@@ -251,13 +225,18 @@
                 ],
                 "var": [
                     
+                ],
+                "versions": [
+                    "versions.yml:md5,d41d8cd98f00b204e9800998ecf8427e",
+                    "versions.yml:md5,d41d8cd98f00b204e9800998ecf8427e",
+                    "versions.yml:md5,d41d8cd98f00b204e9800998ecf8427e"
                 ]
             }
         ],
-        "timestamp": "2026-05-20T20:53:05.651566126",
+        "timestamp": "2026-04-10T16:50:54.506012382",
         "meta": {
             "nf-test": "0.9.4",
-            "nextflow": "26.04.0"
+            "nextflow": "25.10.2"
         }
     },
     "Should run without failures - extension mode - stub": {
@@ -280,6 +259,9 @@
                 "3": [
                     "X_harmony.pkl:md5,d41d8cd98f00b204e9800998ecf8427e"
                 ],
+                "4": [
+                    "versions.yml:md5,d41d8cd98f00b204e9800998ecf8427e"
+                ],
                 "integrations": [
                     [
                         {
@@ -296,21 +278,28 @@
                 ],
                 "var": [
                     
+                ],
+                "versions": [
+                    "versions.yml:md5,d41d8cd98f00b204e9800998ecf8427e"
                 ]
             }
         ],
-        "timestamp": "2026-05-20T20:52:48.773987405",
+        "timestamp": "2026-03-28T23:09:49.392744851",
         "meta": {
             "nf-test": "0.9.4",
-            "nextflow": "26.04.0"
+            "nextflow": "25.10.2"
         }
     },
     "Should run without failures - pca": {
         "content": [
-            null,
+            [
+                "versions.yml:md5,20020d8c9cf585aaa75dd5a14aa5d3ae",
+                "versions.yml:md5,87a2cb96724430656d9c1276e91e0208",
+                "versions.yml:md5,d28b65c4c18c54e1abc34040b584b823"
+            ],
             {
                 "n_obs": 12940,
-                "n_vars": 2077,
+                "n_vars": 2000,
                 "obs": {
                     "index": "_index",
                     "columns": [
@@ -368,7 +357,7 @@
                 ]
             }
         ],
-        "timestamp": "2026-05-20T20:53:47.047398518",
+        "timestamp": "2026-05-28T14:04:36.10115423",
         "meta": {
             "nf-test": "0.9.4",
             "nextflow": "26.04.0"
@@ -394,6 +383,11 @@
                 "3": [
                     "combat.pkl:md5,d41d8cd98f00b204e9800998ecf8427e"
                 ],
+                "4": [
+                    "versions.yml:md5,d41d8cd98f00b204e9800998ecf8427e",
+                    "versions.yml:md5,d41d8cd98f00b204e9800998ecf8427e",
+                    "versions.yml:md5,d41d8cd98f00b204e9800998ecf8427e"
+                ],
                 "integrations": [
                     [
                         {
@@ -410,21 +404,30 @@
                 ],
                 "var": [
                     
+                ],
+                "versions": [
+                    "versions.yml:md5,d41d8cd98f00b204e9800998ecf8427e",
+                    "versions.yml:md5,d41d8cd98f00b204e9800998ecf8427e",
+                    "versions.yml:md5,d41d8cd98f00b204e9800998ecf8427e"
                 ]
             }
         ],
-        "timestamp": "2026-05-20T20:51:21.560122268",
+        "timestamp": "2026-03-25T15:49:26.334091777",
         "meta": {
             "nf-test": "0.9.4",
-            "nextflow": "26.04.0"
+            "nextflow": "25.10.2"
         }
     },
     "Should run without failures - harmony": {
         "content": [
-            null,
+            [
+                "versions.yml:md5,0941a4daea5c41d9e3259be11e9f2263",
+                "versions.yml:md5,20020d8c9cf585aaa75dd5a14aa5d3ae",
+                "versions.yml:md5,d28b65c4c18c54e1abc34040b584b823"
+            ],
             {
                 "n_obs": 12940,
-                "n_vars": 2077,
+                "n_vars": 2000,
                 "obs": {
                     "index": "_index",
                     "columns": [
@@ -464,7 +467,8 @@
                     "counts"
                 ],
                 "obsm": [
-                    "X_emb"
+                    "X_emb",
+                    "X_symphony"
                 ],
                 "varm": [
                     
@@ -481,7 +485,7 @@
                 ]
             }
         ],
-        "timestamp": "2026-05-20T20:48:46.532961357",
+        "timestamp": "2026-05-28T14:18:52.042984469",
         "meta": {
             "nf-test": "0.9.4",
             "nextflow": "26.04.0"
@@ -489,10 +493,14 @@
     },
     "Should run without failures - bbknn": {
         "content": [
-            null,
+            [
+                "versions.yml:md5,20020d8c9cf585aaa75dd5a14aa5d3ae",
+                "versions.yml:md5,ccf730637c4c61a84ac4a002bf9832e0",
+                "versions.yml:md5,d28b65c4c18c54e1abc34040b584b823"
+            ],
             {
                 "n_obs": 12940,
-                "n_vars": 2077,
+                "n_vars": 2000,
                 "obs": {
                     "index": "_index",
                     "columns": [
@@ -552,7 +560,7 @@
                 ]
             }
         ],
-        "timestamp": "2026-05-20T20:51:00.345519561",
+        "timestamp": "2026-05-28T14:01:44.359301169",
         "meta": {
             "nf-test": "0.9.4",
             "nextflow": "26.04.0"
@@ -560,7 +568,9 @@
     },
     "Should run without failures - expimap": {
         "content": [
-            null,
+            [
+                "versions.yml:md5,44d9a1bbfabdc0ecae8adc586c7c1b2d"
+            ],
             {
                 "n_obs": 12940,
                 "n_vars": 9887,
@@ -596,10 +606,10 @@
                 ]
             }
         ],
-        "timestamp": "2026-05-20T20:55:15.288341932",
+        "timestamp": "2026-04-11T15:55:42.640248171",
         "meta": {
             "nf-test": "0.9.4",
-            "nextflow": "26.04.0"
+            "nextflow": "25.10.4"
         }
     },
     "Should run without failures - expimap - stub": {
@@ -622,6 +632,9 @@
                 "3": [
                     "X_expimap.pkl:md5,d41d8cd98f00b204e9800998ecf8427e"
                 ],
+                "4": [
+                    "versions.yml:md5,d41d8cd98f00b204e9800998ecf8427e"
+                ],
                 "integrations": [
                     [
                         {
@@ -638,13 +651,16 @@
                 ],
                 "var": [
                     
+                ],
+                "versions": [
+                    "versions.yml:md5,d41d8cd98f00b204e9800998ecf8427e"
                 ]
             }
         ],
-        "timestamp": "2026-05-20T20:54:13.304991035",
+        "timestamp": "2026-04-11T09:14:13.34716941",
         "meta": {
             "nf-test": "0.9.4",
-            "nextflow": "26.04.0"
+            "nextflow": "25.10.2"
         }
     }
 }
\ No newline at end of file

From 23109e826a27d99ebe51edea34b6237e86e80bdc Mon Sep 17 00:00:00 2001
From: Nico Trummer <nictru32@gmail.com>
Date: Thu, 28 May 2026 14:45:44 +0200
Subject: [PATCH 05/19] Add Symphony reference mapping support

---
 .gitignore                                    |  3 +
 conf/modules.config                           | 16 ++++
 contrib/nf-core-test-datasets/.gitattributes  |  1 +
 contrib/nf-core-test-datasets/README.md       | 49 ++++++++++
 .../nf-core-test-datasets/build.params.json   |  8 ++
 .../collect-artifacts.sh                      | 15 +++
 docs/output.md                                |  1 +
 docs/reproducibility.md                       | 39 ++++----
 docs/usage.md                                 | 12 ++-
 main.nf                                       |  7 ++
 modules/local/scanpy/hvgs/templates/hvgs.py   |  1 +
 .../local/scanpy/hvgs/tests/main.nf.test.snap | 44 ++++-----
 modules/local/scanpy/pca/templates/pca.py     |  3 +-
 .../local/scanpy/pca/tests/main.nf.test.snap  | 17 ++--
 .../templates/harmonyintegrate.py             | 75 ++++++++++-----
 .../harmonyintegrate/tests/main.nf.test       | 13 ++-
 .../harmonyintegrate/tests/main.nf.test.snap  | 67 +++++++++++--
 .../symphony/mapembedding/environment.yml     | 10 ++
 modules/local/symphony/mapembedding/main.nf   | 35 +++++++
 .../mapembedding/templates/map_embedding.py   | 59 ++++++++++++
 .../symphony/mapembedding/tests/main.nf.test  | 93 +++++++++++++++++++
 .../mapembedding/tests/main.nf.test.snap      | 93 +++++++++++++++++++
 nextflow.config                               |  1 +
 nextflow_schema.json                          |  8 ++
 subworkflows/local/combine/main.nf            |  2 +
 .../local/integrate/tests/main.nf.test        | 36 ++++---
 .../main.nf                                   |  8 +-
 tests/main_pipeline_extend.nf.test            |  5 +-
 tests/main_pipeline_extend.nf.test.snap       | 21 ++++-
 tests/main_pipeline_reference_mapping.nf.test |  5 +-
 ...in_pipeline_reference_mapping.nf.test.snap | 21 ++++-
 workflows/scdownstream.nf                     |  2 +
 32 files changed, 656 insertions(+), 114 deletions(-)
 create mode 100644 contrib/nf-core-test-datasets/.gitattributes
 create mode 100644 contrib/nf-core-test-datasets/README.md
 create mode 100644 contrib/nf-core-test-datasets/build.params.json
 create mode 100755 contrib/nf-core-test-datasets/collect-artifacts.sh
 create mode 100644 modules/local/symphony/mapembedding/environment.yml
 create mode 100644 modules/local/symphony/mapembedding/main.nf
 create mode 100644 modules/local/symphony/mapembedding/templates/map_embedding.py
 create mode 100644 modules/local/symphony/mapembedding/tests/main.nf.test
 create mode 100644 modules/local/symphony/mapembedding/tests/main.nf.test.snap

diff --git a/.gitignore b/.gitignore
index 5516a066..7c60a3c2 100644
--- a/.gitignore
+++ b/.gitignore
@@ -9,3 +9,6 @@ testing*
 null/
 .lineage/
 .nf-test*
+tests/assets/*.h5ad
+contrib/nf-core-test-datasets/build_output/
+contrib/nf-core-test-datasets/extension_base/
diff --git a/conf/modules.config b/conf/modules.config
index 41dacb79..da4a96cb 100644
--- a/conf/modules.config
+++ b/conf/modules.config
@@ -380,6 +380,22 @@ process {
     }
 
     withName: SYMPHONY_HARMONYINTEGRATE {
+        publishDir = [
+            path: { "${params.outdir}/combine/integrate/harmony" },
+            mode: params.publish_dir_mode,
+            saveAs: { filename ->
+                if (filename.endsWith('_reference.h5ad')) {
+                    return 'harmony_reference.h5ad'
+                }
+                if (params.save_intermediates && !filename.equals('versions.yml')) {
+                    return filename
+                }
+                return null
+            },
+        ]
+    }
+
+    withName: SYMPHONY_MAPEMBEDDING {
         publishDir = [
             path: { "${params.outdir}/combine/integrate/harmony" },
             mode: params.publish_dir_mode,
diff --git a/contrib/nf-core-test-datasets/.gitattributes b/contrib/nf-core-test-datasets/.gitattributes
new file mode 100644
index 00000000..914d733f
--- /dev/null
+++ b/contrib/nf-core-test-datasets/.gitattributes
@@ -0,0 +1 @@
+extension_base/*.h5ad filter=lfs diff=lfs merge=lfs -text
diff --git a/contrib/nf-core-test-datasets/README.md b/contrib/nf-core-test-datasets/README.md
new file mode 100644
index 00000000..b9b58a05
--- /dev/null
+++ b/contrib/nf-core-test-datasets/README.md
@@ -0,0 +1,49 @@
+# nf-core/test-datasets update — `scdownstream/extension_base`
+
+Copy the contents of `extension_base/` into the **`scdownstream` branch** of [nf-core/test-datasets](https://github.com/nf-core/test-datasets), replacing the existing files in `scdownstream/extension_base/`.
+
+## Files
+
+| File                                    | Purpose                                                      |
+| --------------------------------------- | ------------------------------------------------------------ |
+| `extension_base/model.pt`               | scVI checkpoint for reference mapping / extension            |
+| `extension_base/merged.h5ad`            | Finalized atlas (`base_adata`) for extension                 |
+| `extension_base/harmony_reference.h5ad` | Symphony reference for Harmony reference mapping / extension |
+
+All three must come from the **same pipeline run** (see below).
+
+> **Note:** If `extension_base/` already contains files but you have not run `collect-artifacts.sh` yet, `merged.h5ad` and `model.pt` may still be the current test-datasets versions and `harmony_reference.h5ad` from a Harmony-only build. **Do not open the test-datasets PR until you have run a unified build and `collect-artifacts.sh`** — all three files must be replaced together.
+
+## How these were generated
+
+```bash
+# From the scdownstream repo root, with nf-core conda env active:
+nextflow run main.nf -profile test,apptainer -params-file contrib/nf-core-test-datasets/build.params.json
+
+# Populate extension_base/ from the build output:
+./contrib/nf-core-test-datasets/collect-artifacts.sh
+```
+
+Build parameters match the consolidated pipeline tests (`main_pipeline_reference_mapping.nf.test`, `main_pipeline_extend.nf.test`):
+
+- Input: `samplesheet.csv` (full atlas)
+- Integration: `scvi,harmony`
+- HVGs: 500
+
+## PR checklist (test-datasets repo)
+
+1. Check out branch `scdownstream`.
+2. Ensure Git LFS is enabled (`git lfs install`).
+3. Copy `extension_base/*` into `scdownstream/extension_base/` (overwrite `model.pt` and `merged.h5ad`, add `harmony_reference.h5ad`).
+4. Add or extend `.gitattributes` on the test-datasets repo:
+
+   ```
+   scdownstream/extension_base/*.h5ad filter=lfs diff=lfs merge=lfs -text
+   ```
+
+5. Commit and open PR against `scdownstream`.
+6. After merge, re-run `nftu` on the scdownstream pipeline reference-mapping and extend tests.
+
+## Pipeline version
+
+Record the scdownstream commit/tag used when generating these files in your PR description.
diff --git a/contrib/nf-core-test-datasets/build.params.json b/contrib/nf-core-test-datasets/build.params.json
new file mode 100644
index 00000000..8faa4a16
--- /dev/null
+++ b/contrib/nf-core-test-datasets/build.params.json
@@ -0,0 +1,8 @@
+{
+    "input": "https://github.com/nf-core/test-datasets/raw/refs/heads/scdownstream/samplesheet.csv",
+    "integration_methods": "scvi,harmony",
+    "integration_hvgs": 500,
+    "doublet_detection": "scrublet,scdblfinder",
+    "celltypist_model": "Adult_COVID19_PBMC",
+    "outdir": "contrib/nf-core-test-datasets/build_output"
+}
diff --git a/contrib/nf-core-test-datasets/collect-artifacts.sh b/contrib/nf-core-test-datasets/collect-artifacts.sh
new file mode 100755
index 00000000..75fa3d4e
--- /dev/null
+++ b/contrib/nf-core-test-datasets/collect-artifacts.sh
@@ -0,0 +1,15 @@
+#!/usr/bin/env bash
+set -euo pipefail
+
+ROOT="$(cd "$(dirname "${BASH_SOURCE[0]}")/../.." && pwd)"
+OUT="${ROOT}/contrib/nf-core-test-datasets/build_output"
+DEST="${ROOT}/contrib/nf-core-test-datasets/extension_base"
+
+mkdir -p "${DEST}"
+
+cp "${OUT}/combine/integrate/scvi/scvi_model/model.pt" "${DEST}/model.pt"
+cp "${OUT}/finalized/merged.h5ad" "${DEST}/merged.h5ad"
+cp "${OUT}/combine/integrate/harmony/harmony_reference.h5ad" "${DEST}/harmony_reference.h5ad"
+
+echo "Collected artifacts into ${DEST}:"
+ls -lh "${DEST}"
diff --git a/docs/output.md b/docs/output.md
index ca948532..38d7356b 100644
--- a/docs/output.md
+++ b/docs/output.md
@@ -98,6 +98,7 @@ The `preprocess` directory contains a subdirectory for each sample, which contai
     - `${tool}`
       - `*.h5ad/*.rds`: The integrated H5AD or RDS file.
       - `X_${tool}.pkl`: Low-dimensional representation of the integrated data.
+      - `harmony_reference.h5ad` (Harmony only): Compact Symphony reference AnnData for query mapping, published from de novo Harmony runs.
 
 </details>
 
diff --git a/docs/reproducibility.md b/docs/reproducibility.md
index 814322dc..78769582 100644
--- a/docs/reproducibility.md
+++ b/docs/reproducibility.md
@@ -120,28 +120,29 @@ The **Test strategy (this branch)** column describes what the tests on this bran
 
 ### `scanpy/`
 
-| Module                   | Description                                                                                                                                                                                                                                                                                                                  | Reproducibility                                                                                                                                                              | Test strategy (this branch)                                                                                           |
-| ------------------------ | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | --------------------------------------------------------------------------------------------------------------------- |
-| `scanpy/bbknn`           | Constructs a batch-balanced k-nearest-neighbour graph (BBKNN) on a PCA embedding.                                                                                                                                                                                                                                            | Fully deterministic — kNN construction is deterministic given the input embedding.                                                                                           | structural — versions + schema only                                                                                   |
-| `scanpy/cellcycle`       | Scores each cell for S-phase and G2M-phase activity and assigns a predicted cell cycle phase.                                                                                                                                                                                                                                | Fully deterministic                                                                                                                                                          | hash                                                                                                                  |
-| `scanpy/combat`          | Applies ComBat batch correction and then runs PCA, storing the result as `X_emb`.                                                                                                                                                                                                                                            | Seeded / quasi-deterministic — ComBat is deterministic; downstream PCA floats may vary across LAPACK backends.                                                               | structural — versions + schema only                                                                                   |
-| `scanpy/filter`          | Filters cells and genes by count, gene, and mitochondrial percentage thresholds.                                                                                                                                                                                                                                             | Fully deterministic                                                                                                                                                          | hash + structural — standard `hash` triple; multiple parameter scenarios                                              |
-| `scanpy/hvgs`            | Selects highly variable genes and subsets the AnnData to those genes.                                                                                                                                                                                                                                                        | Seeded / quasi-deterministic — HVG variance statistics rely on NumPy/SciPy floating-point operations that can produce slightly different results across library versions.    | structural — versions + schema only                                                                                   |
-| `scanpy/leiden`          | Performs Leiden community-detection clustering at a specified resolution.                                                                                                                                                                                                                                                    | **Non-deterministic** — Leiden uses random restarts with no fixed seed.                                                                                                      | structural — range assertion on cluster count + versions + schema                                                     |
-| `scanpy/neighbors`       | Computes a k-nearest-neighbour graph on a specified embedding.                                                                                                                                                                                                                                                               | Fully deterministic given a fixed input embedding.                                                                                                                           | structural — versions + schema only                                                                                   |
-| `scanpy/paga`            | Computes PAGA coarse-grained cluster connectivity and saves a graph and plot.                                                                                                                                                                                                                                                | Fully deterministic — PAGA is a deterministic graph-summarisation step given fixed Leiden labels.                                                                            | hash                                                                                                                  |
-| `scanpy/pca`             | Runs PCA with `random_state=0` and stores the result under a specified key.                                                                                                                                                                                                                                                  | Seeded / quasi-deterministic — seed is fixed, but float coordinates can differ across LAPACK/MKL backends.                                                                   | structural — versions + schema only                                                                                   |
-| `scanpy/plotqc`          | Calculates QC metrics and produces a counts-vs-genes scatter plot for MultiQC.                                                                                                                                                                                                                                               | Fully deterministic                                                                                                                                                          | hash (no H5AD output — PNG / MultiQC JSON + versions)                                                                 |
-| `scanpy/rankgenesgroups` | Runs differential gene expression (rank genes groups) across clusters using a configurable statistical method.                                                                                                                                                                                                               | **Seeded / quasi-deterministic** — wilcoxon and t-test are deterministic in theory, but tied-rank handling and floating-point tie-breaking can differ across SciPy versions. | structural — versions + `adata.yaml`; one path with **empty h5ad** snapshots **versions only**                        |
-| `scanpy/readh5`          | Reads a 10x Genomics HDF5 (`.h5`) file and writes it as an AnnData H5AD.                                                                                                                                                                                                                                                     | Fully deterministic                                                                                                                                                          | hash                                                                                                                  |
-| `scanpy/sample`          | Down-samples cells to a fixed count or fraction using `rng=0`.                                                                                                                                                                                                                                                               | Seeded / quasi-deterministic — seed is fixed, but sampled cell set may vary across NumPy versions.                                                                           | hash                                                                                                                  |
-| `scanpy/umap`            | Computes a UMAP embedding from a pre-built neighbour graph using `random_state=0`.                                                                                                                                                                                                                                           | Seeded / quasi-deterministic — seed is fixed, but float coordinates vary across umap-learn/numba versions.                                                                   | structural — versions + schema only                                                                                   |
+| Module                   | Description                                                                                                                                             | Reproducibility                                                                                                                                                                                  | Test strategy (this branch)                                                                    |
+| ------------------------ | ------------------------------------------------------------------------------------------------------------------------------------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ | ---------------------------------------------------------------------------------------------- |
+| `scanpy/bbknn`           | Constructs a batch-balanced k-nearest-neighbour graph (BBKNN) on a PCA embedding.                                                                       | Fully deterministic — kNN construction is deterministic given the input embedding.                                                                                                               | structural — versions + schema only                                                            |
+| `scanpy/cellcycle`       | Scores each cell for S-phase and G2M-phase activity and assigns a predicted cell cycle phase.                                                           | Fully deterministic                                                                                                                                                                              | hash                                                                                           |
+| `scanpy/combat`          | Applies ComBat batch correction and then runs PCA, storing the result as `X_emb`.                                                                       | Seeded / quasi-deterministic — ComBat is deterministic; downstream PCA floats may vary across LAPACK backends.                                                                                   | structural — versions + schema only                                                            |
+| `scanpy/filter`          | Filters cells and genes by count, gene, and mitochondrial percentage thresholds.                                                                        | Fully deterministic                                                                                                                                                                              | hash + structural — standard `hash` triple; multiple parameter scenarios                       |
+| `scanpy/hvgs`            | Normalizes counts (`normalize_total` → `log1p`), selects highly variable genes, and subsets the AnnData to those genes while keeping raw counts in `X`. | Seeded / quasi-deterministic — HVG variance statistics rely on NumPy/SciPy floating-point operations that can produce slightly different results across library versions.                        | structural — versions + schema only                                                            |
+| `scanpy/leiden`          | Performs Leiden community-detection clustering at a specified resolution.                                                                               | **Non-deterministic** — Leiden uses random restarts with no fixed seed.                                                                                                                          | structural — range assertion on cluster count + versions + schema                              |
+| `scanpy/neighbors`       | Computes a k-nearest-neighbour graph on a specified embedding.                                                                                          | Fully deterministic given a fixed input embedding.                                                                                                                                               | structural — versions + schema only                                                            |
+| `scanpy/paga`            | Computes PAGA coarse-grained cluster connectivity and saves a graph and plot.                                                                           | Fully deterministic — PAGA is a deterministic graph-summarisation step given fixed Leiden labels.                                                                                                | hash                                                                                           |
+| `scanpy/pca`             | Runs library-size normalization, log1p, and PCA with `random_state=0`, storing the result under a specified key.                                        | Seeded / quasi-deterministic — seed is fixed, but float coordinates can differ across LAPACK/MKL backends. Embeddings differ from earlier pipeline versions that ran PCA on unnormalized counts. | structural — versions + schema only                                                            |
+| `scanpy/plotqc`          | Calculates QC metrics and produces a counts-vs-genes scatter plot for MultiQC.                                                                          | Fully deterministic                                                                                                                                                                              | hash (no H5AD output — PNG / MultiQC JSON + versions)                                          |
+| `scanpy/rankgenesgroups` | Runs differential gene expression (rank genes groups) across clusters using a configurable statistical method.                                          | **Seeded / quasi-deterministic** — wilcoxon and t-test are deterministic in theory, but tied-rank handling and floating-point tie-breaking can differ across SciPy versions.                     | structural — versions + `adata.yaml`; one path with **empty h5ad** snapshots **versions only** |
+| `scanpy/readh5`          | Reads a 10x Genomics HDF5 (`.h5`) file and writes it as an AnnData H5AD.                                                                                | Fully deterministic                                                                                                                                                                              | hash                                                                                           |
+| `scanpy/sample`          | Down-samples cells to a fixed count or fraction using `rng=0`.                                                                                          | Seeded / quasi-deterministic — seed is fixed, but sampled cell set may vary across NumPy versions.                                                                                               | hash                                                                                           |
+| `scanpy/umap`            | Computes a UMAP embedding from a pre-built neighbour graph using `random_state=0`.                                                                      | Seeded / quasi-deterministic — seed is fixed, but float coordinates vary across umap-learn/numba versions.                                                                                       | structural — versions + schema only                                                            |
 
 ### `symphony/`
 
-| Module                         | Description                                                                                                                                                                                                                                                                                                                  | Reproducibility                                                                                                                                   | Test strategy (this branch)         |
-| ------------------------------ | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------- | ----------------------------------- |
-| `symphony/harmonyintegrate`    | Runs Harmony batch integration via symphonypy `harmony_integrate` after log-normalisation and PCA, storing `X_pca_symphony`, `X_emb`, and `uns['symphony']`. Requires symphonypy ≥0.2.3 ([symphonypy#8](https://github.com/potulabe/symphonypy/issues/8), [symphonypy#9](https://github.com/potulabe/symphonypy/issues/9)). | **Non-deterministic** — Harmony is an iterative optimisation; symphonypy passes `random_seed=1` but upstream PCA is unseeded.                     | structural — versions + schema only |
+| Module                      | Description                                                                                                                                                                                                                                                                                                                                                                                                                                                                         | Reproducibility                                                                                                               | Test strategy (this branch)         |
+| --------------------------- | ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | ----------------------------------------------------------------------------------------------------------------------------- | ----------------------------------- |
+| `symphony/harmonyintegrate` | Runs Harmony batch integration via symphonypy after normalize_total → log1p → scale(max_value=10) → PCA(zero_center=False), storing `X_symphony`, `X_emb`, Symphony reference metadata (`var` mean/std/HVG, `varm['PCs']`, `uns['harmony']`, `uns['normalize']`), and publishing a compact `harmony_reference.h5ad`. Requires symphonypy ≥0.2.3 ([symphonypy#8](https://github.com/potulabe/symphonypy/issues/8), [symphonypy#9](https://github.com/potulabe/symphonypy/issues/9)). | **Non-deterministic** — Harmony is an iterative optimisation; symphonypy passes `random_seed=1` but upstream PCA is unseeded. | structural — versions + schema only |
+| `symphony/mapembedding`     | Maps query cells onto a Symphony reference via symphonypy `map_embedding`, storing mapped coordinates in `X_symphony` and `X_emb`.                                                                                                                                                                                                                                                                                                                                                  | **Non-deterministic** — inherits Harmony mapping variability.                                                                 | structural — versions + schema only |
 
 ### `scimilarity/`
 
diff --git a/docs/usage.md b/docs/usage.md
index 50ad2c88..6a387656 100644
--- a/docs/usage.md
+++ b/docs/usage.md
@@ -216,14 +216,16 @@ nextflow run nf-core/scdownstream --input samplesheet.csv --outdir results \
 ### Reference mapping and extension
 
 **Reference mapping** means **mapping new cells into a latent space using a pre-trained model** instead of training that integration step only on the query data.
-In this pipeline this can be done using **scVI**, **scANVI**, and **scimilarity**.
-To enable it, add the corresponding method to [`integration_methods`](https://nf-co.re/scdownstream/parameters#integration_methods) (`scvi`, `scanvi`, and/or `scimilarity`) and set the matching model parameters for each method you use: [`scvi_model`](https://nf-co.re/scdownstream/parameters#scvi_model), [`scanvi_model`](https://nf-co.re/scdownstream/parameters#scanvi_model), and [`scimilarity_model`](https://nf-co.re/scdownstream/parameters#scimilarity_model) (see the [parameter reference](https://nf-co.re/scdownstream/parameters) for file types, defaults, and help text).
+In this pipeline this can be done using **scVI**, **scANVI**, **scimilarity**, and **Harmony (Symphony)**.
+To enable it, add the corresponding method to [`integration_methods`](https://nf-co.re/scdownstream/parameters#integration_methods) (`scvi`, `scanvi`, `scimilarity`, and/or `harmony`) and set the matching model parameters for each method you use: [`scvi_model`](https://nf-co.re/scdownstream/parameters#scvi_model), [`scanvi_model`](https://nf-co.re/scdownstream/parameters#scanvi_model), [`scimilarity_model`](https://nf-co.re/scdownstream/parameters#scimilarity_model), and [`harmony_reference`](https://nf-co.re/scdownstream/parameters#harmony_reference) (see the [parameter reference](https://nf-co.re/scdownstream/parameters) for file types, defaults, and help text).
+
+For Harmony reference mapping, provide the compact Symphony reference AnnData from a prior de novo run (`{outdir}/combine/integrate/harmony/harmony_reference.h5ad`). It contains the gene statistics, PCA loadings, Harmony centroids, and normalization metadata required for query mapping.
 
 **Extension** is for users that have outputs of a previous run of `nf-core/scdownstream` and want to extend it with new data, without re-running the integration from scratch.
-It only works if `scvi`, `scanvi` and/or `scimilarity` have been enabled in `integration_methods` in the original pipeline run.
-Other integration methods than the three mentioned before are not supported for this.
+It only works if `scvi`, `scanvi`, `scimilarity`, and/or `harmony` have been enabled in `integration_methods` in the original pipeline run.
+Other integration methods than the four mentioned before are not supported for this.
 In simple terms, in this setup the workflow is: (1) project new data into the latent space learned from the data in the original run, and then (2) combine the datasets.
-For (1), provide the same checkpoints as for reference mapping ([`scvi_model`](https://nf-co.re/scdownstream/parameters#scvi_model), [`scanvi_model`](https://nf-co.re/scdownstream/parameters#scanvi_model), [`scimilarity_model`](https://nf-co.re/scdownstream/parameters#scimilarity_model)).
+For (1), provide the same checkpoints as for reference mapping ([`scvi_model`](https://nf-co.re/scdownstream/parameters#scvi_model), [`scanvi_model`](https://nf-co.re/scdownstream/parameters#scanvi_model), [`scimilarity_model`](https://nf-co.re/scdownstream/parameters#scimilarity_model), [`harmony_reference`](https://nf-co.re/scdownstream/parameters#harmony_reference)).
 For (2), pass the integrated `.h5ad` from the original run as [`base_adata`](https://nf-co.re/scdownstream/parameters#base_adata).
 
 Pre-trained scVI models are also shared on [scvi-hub](https://huggingface.co/scvi-tools).
diff --git a/main.nf b/main.nf
index b99f8488..7aa62964 100644
--- a/main.nf
+++ b/main.nf
@@ -61,6 +61,7 @@ workflow NFCORE_SCDOWNSTREAM {
     scvi_categorical_covariates   //   value: string
     scvi_continuous_covariates    //   value: string
     scimilarity_model             //   value: string
+    harmony_reference             //   value: string
     expimap_gmt                   //   value: string
     skip_liana                    //   value: boolean
     skip_rankgenesgroups          //   value: boolean
@@ -117,6 +118,7 @@ workflow NFCORE_SCDOWNSTREAM {
         scvi_categorical_covariates,
         scvi_continuous_covariates,
         scimilarity_model,
+        harmony_reference,
         expimap_gmt,
         skip_liana,
         skip_rankgenesgroups,
@@ -180,6 +182,10 @@ workflow {
 
     def analysis_plan = analysisPlanToList()
 
+    def harmony_reference = params.harmony_reference
+        ? file(params.harmony_reference, checkIfExists: true)
+        : null
+
     NFCORE_SCDOWNSTREAM (
         PIPELINE_INITIALISATION.out.samplesheet,
         ch_base_adata,
@@ -211,6 +217,7 @@ workflow {
         params.scvi_categorical_covariates,
         params.scvi_continuous_covariates,
         params.scimilarity_model,
+        harmony_reference,
         params.expimap_gmt,
         params.skip_liana,
         params.skip_rankgenesgroups,
diff --git a/modules/local/scanpy/hvgs/templates/hvgs.py b/modules/local/scanpy/hvgs/templates/hvgs.py
index 70e43873..9e8bf0a1 100644
--- a/modules/local/scanpy/hvgs/templates/hvgs.py
+++ b/modules/local/scanpy/hvgs/templates/hvgs.py
@@ -41,6 +41,7 @@
 
     raw_counts = adata.X.copy()
 
+    sc.pp.normalize_total(adata)
     sc.pp.log1p(adata)
     sc.pp.highly_variable_genes(adata, **kwargs)
 
diff --git a/modules/local/scanpy/hvgs/tests/main.nf.test.snap b/modules/local/scanpy/hvgs/tests/main.nf.test.snap
index 0fb740d0..e8a736f2 100644
--- a/modules/local/scanpy/hvgs/tests/main.nf.test.snap
+++ b/modules/local/scanpy/hvgs/tests/main.nf.test.snap
@@ -9,7 +9,7 @@
             },
             {
                 "n_obs": 38234,
-                "n_vars": 100,
+                "n_vars": 101,
                 "obs": {
                     "index": "_index",
                     "columns": [
@@ -26,19 +26,19 @@
                     ]
                 },
                 "layers": [
-
+                    
                 ],
                 "obsm": [
-
+                    
                 ],
                 "varm": [
-
+                    
                 ],
                 "obsp": [
-
+                    
                 ],
                 "varp": [
-
+                    
                 ],
                 "uns": [
                     "hvg",
@@ -46,10 +46,10 @@
                 ]
             }
         ],
-        "timestamp": "2026-03-29T11:18:05.314404083",
+        "timestamp": "2026-05-28T12:02:56.794195774",
         "meta": {
             "nf-test": "0.9.4",
-            "nextflow": "25.10.2"
+            "nextflow": "26.04.0"
         }
     },
     "Should run without a specified number of HVGs": {
@@ -62,7 +62,7 @@
             },
             {
                 "n_obs": 38234,
-                "n_vars": 251,
+                "n_vars": 111,
                 "obs": {
                     "index": "_index",
                     "columns": [
@@ -79,19 +79,19 @@
                     ]
                 },
                 "layers": [
-
+                    
                 ],
                 "obsm": [
-
+                    
                 ],
                 "varm": [
-
+                    
                 ],
                 "obsp": [
-
+                    
                 ],
                 "varp": [
-
+                    
                 ],
                 "uns": [
                     "hvg",
@@ -99,10 +99,10 @@
                 ]
             }
         ],
-        "timestamp": "2026-03-29T11:17:05.806436168",
+        "timestamp": "2026-05-28T12:02:32.511106972",
         "meta": {
             "nf-test": "0.9.4",
-            "nextflow": "25.10.2"
+            "nextflow": "26.04.0"
         }
     },
     "Should run without failures - stub": {
@@ -171,19 +171,19 @@
                     ]
                 },
                 "layers": [
-
+                    
                 ],
                 "obsm": [
-
+                    
                 ],
                 "varm": [
-
+                    
                 ],
                 "obsp": [
-
+                    
                 ],
                 "varp": [
-
+                    
                 ],
                 "uns": [
                     "hvg",
@@ -197,4 +197,4 @@
             "nextflow": "25.10.2"
         }
     }
-}
+}
\ No newline at end of file
diff --git a/modules/local/scanpy/pca/templates/pca.py b/modules/local/scanpy/pca/templates/pca.py
index 666ab4cb..b3554d0a 100644
--- a/modules/local/scanpy/pca/templates/pca.py
+++ b/modules/local/scanpy/pca/templates/pca.py
@@ -21,7 +21,8 @@
 prefix = "${prefix}"
 key_added = "${key_added}"
 
-# Run PCA
+sc.pp.normalize_total(adata)
+sc.pp.log1p(adata)
 sc.pp.pca(adata, random_state=0, key_added=key_added)
 
 adata.write_h5ad(f"{prefix}.h5ad")
diff --git a/modules/local/scanpy/pca/tests/main.nf.test.snap b/modules/local/scanpy/pca/tests/main.nf.test.snap
index dd729e71..1ae72069 100644
--- a/modules/local/scanpy/pca/tests/main.nf.test.snap
+++ b/modules/local/scanpy/pca/tests/main.nf.test.snap
@@ -59,11 +59,11 @@
                 "var": {
                     "index": "_index",
                     "columns": [
-
+                        
                     ]
                 },
                 "layers": [
-
+                    
                 ],
                 "obsm": [
                     "X_pca"
@@ -72,20 +72,21 @@
                     "X_pca"
                 ],
                 "obsp": [
-
+                    
                 ],
                 "varp": [
-
+                    
                 ],
                 "uns": [
-                    "X_pca"
+                    "X_pca",
+                    "log1p"
                 ]
             }
         ],
-        "timestamp": "2026-03-29T11:17:21.253081099",
+        "timestamp": "2026-05-28T12:10:47.461951809",
         "meta": {
             "nf-test": "0.9.4",
-            "nextflow": "25.10.2"
+            "nextflow": "26.04.0"
         }
     }
-}
+}
\ No newline at end of file
diff --git a/modules/local/symphony/harmonyintegrate/templates/harmonyintegrate.py b/modules/local/symphony/harmonyintegrate/templates/harmonyintegrate.py
index 32de73b5..1afb31d4 100644
--- a/modules/local/symphony/harmonyintegrate/templates/harmonyintegrate.py
+++ b/modules/local/symphony/harmonyintegrate/templates/harmonyintegrate.py
@@ -1,53 +1,80 @@
 #!/usr/bin/env python3
 
-# Disable OpenMP CPU topology detection for MacOS compatibility
 import os
+
 os.environ["KMP_AFFINITY"] = "disabled"
+os.environ["MPLCONFIGDIR"] = "./tmp/mpl"
+os.environ["NUMBA_CACHE_DIR"] = "./tmp/numba"
 
 import importlib.metadata
 import platform
-import yaml
-
-os.environ["MPLCONFIGDIR"] = "./tmp/mpl"
-os.environ["NUMBA_CACHE_DIR"] = "./tmp/numba"
 
+import numpy as np
+import pandas as pd
 import scanpy as sc
 import symphonypy as sp
-import pandas as pd
-
+import yaml
+from anndata import AnnData
+from scipy.sparse import csr_matrix
 from threadpoolctl import threadpool_limits
+
+
+def build_reference(adata, target_sum):
+    harmony = adata.uns["harmony"]
+    return AnnData(
+        X=csr_matrix((0, adata.n_vars), dtype=np.float32),
+        var=adata.var[["mean", "std", "highly_variable"]].copy(),
+        varm={"PCs": adata.varm["PCs"].copy()},
+        uns={
+            "harmony": {
+                "Nr": harmony["Nr"],
+                "C": harmony["C"],
+                "K": harmony["K"],
+                "sigma": harmony.get("sigma"),
+                "ref_basis_loadings": harmony["ref_basis_loadings"],
+            },
+            "normalize": {"target_sum": target_sum},
+        },
+    )
+
+
 threadpool_limits(int("${task.cpus}"))
 
 adata = sc.read_h5ad("${h5ad}")
-
+adata_proc = adata.copy()
 prefix = "${prefix}"
+batch_col = "${batch_col}"
+counts_layer = "${counts_layer}"
 
-adata_processing = adata.copy()
-
-if "${counts_layer}" != "X":
-    adata_processing.X = adata.layers["${counts_layer}"]
+if counts_layer != "X":
+    adata_proc.X = adata_proc.layers[counts_layer]
 
-sc.pp.log1p(adata_processing)
-sc.pp.pca(adata_processing)
+target_sum = float(np.median(np.asarray(adata_proc.X.sum(axis=1)).ravel()))
+sc.pp.normalize_total(adata_proc, target_sum=target_sum)
+sc.pp.log1p(adata_proc)
+sc.pp.scale(adata_proc, max_value=10)
+sc.pp.pca(adata_proc, n_comps=30, zero_center=False)
+if "highly_variable" not in adata_proc.var.columns:
+    adata_proc.var["highly_variable"] = True
 
 sp.pp.harmony_integrate(
-    adata_processing,
-    key="${batch_col}",
+    adata_proc,
+    key=batch_col,
     flavor="python",
     ref_basis_source="X_pca",
-    ref_basis_adjusted="X_pca_symphony",
+    ref_basis_adjusted="X_symphony",
 )
 
-adata.obsm["X_pca_symphony"] = adata_processing.obsm["X_pca_symphony"]
-adata.obsm["X_emb"] = adata_processing.obsm["X_pca_symphony"]
-adata.uns["symphony"] = adata_processing.uns["harmony"]
+adata_proc.uns["symphony"] = adata_proc.uns["harmony"]
+adata_proc.uns["normalize"] = {"target_sum": target_sum}
 
-adata.write_h5ad(f"{prefix}.h5ad")
+build_reference(adata_proc, target_sum).write_h5ad(f"{prefix}_reference.h5ad")
 
-df = pd.DataFrame(adata.obsm["X_emb"], index=adata.obs_names)
-df.to_pickle(f"X_{prefix}.pkl")
+adata.obsm["X_symphony"] = adata_proc.obsm["X_symphony"]
+adata.obsm["X_emb"] = adata_proc.obsm["X_symphony"]
+adata.write_h5ad(f"{prefix}.h5ad")
 
-# Versions
+pd.DataFrame(adata.obsm["X_emb"], index=adata.obs_names).to_pickle(f"X_{prefix}.pkl")
 
 versions = {
     "${task.process}": {
diff --git a/modules/local/symphony/harmonyintegrate/tests/main.nf.test b/modules/local/symphony/harmonyintegrate/tests/main.nf.test
index 52dc354e..47825dc4 100644
--- a/modules/local/symphony/harmonyintegrate/tests/main.nf.test
+++ b/modules/local/symphony/harmonyintegrate/tests/main.nf.test
@@ -28,13 +28,20 @@ nextflow_process {
 
         then {
             def adata = anndata(process.out.h5ad[0][1])
+            def reference = anndata(process.out.reference[0][1])
             assert process.success
             assert "X_emb" in adata.obsm
-            assert "X_pca_symphony" in adata.obsm
-            assert "symphony" in adata.uns
+            assert "harmony" in reference.uns
+            assert "normalize" in reference.uns
+            assert "mean" in reference.var.colnames
+            assert "std" in reference.var.colnames
+            assert "highly_variable" in reference.var.colnames
+            assert "PCs" in reference.varm
+            assert reference.n_obs == 0
             assert snapshot(
                 path(process.out.versions[0]).yaml,
-                adata.yaml
+                adata.yaml,
+                reference.yaml
             ).match()
         }
 
diff --git a/modules/local/symphony/harmonyintegrate/tests/main.nf.test.snap b/modules/local/symphony/harmonyintegrate/tests/main.nf.test.snap
index 6b03b90a..06b31468 100644
--- a/modules/local/symphony/harmonyintegrate/tests/main.nf.test.snap
+++ b/modules/local/symphony/harmonyintegrate/tests/main.nf.test.snap
@@ -11,9 +11,17 @@
                     ]
                 ],
                 "1": [
-                    "X_test.pkl:md5,d41d8cd98f00b204e9800998ecf8427e"
+                    [
+                        {
+                            "id": "test"
+                        },
+                        "test_reference.h5ad:md5,d41d8cd98f00b204e9800998ecf8427e"
+                    ]
                 ],
                 "2": [
+                    "X_test.pkl:md5,d41d8cd98f00b204e9800998ecf8427e"
+                ],
+                "3": [
                     "versions.yml:md5,d41d8cd98f00b204e9800998ecf8427e"
                 ],
                 "h5ad": [
@@ -27,12 +35,20 @@
                 "obsm": [
                     "X_test.pkl:md5,d41d8cd98f00b204e9800998ecf8427e"
                 ],
+                "reference": [
+                    [
+                        {
+                            "id": "test"
+                        },
+                        "test_reference.h5ad:md5,d41d8cd98f00b204e9800998ecf8427e"
+                    ]
+                ],
                 "versions": [
                     "versions.yml:md5,d41d8cd98f00b204e9800998ecf8427e"
                 ]
             }
         ],
-        "timestamp": "2026-05-19T10:59:10.495670438",
+        "timestamp": "2026-05-28T13:28:41.939214142",
         "meta": {
             "nf-test": "0.9.4",
             "nextflow": "26.04.0"
@@ -68,7 +84,7 @@
                 ],
                 "obsm": [
                     "X_emb",
-                    "X_pca_symphony"
+                    "X_symphony"
                 ],
                 "varm": [
                     
@@ -80,14 +96,51 @@
                     
                 ],
                 "uns": [
-                    "symphony"
+                    
+                ]
+            },
+            {
+                "n_obs": 0,
+                "n_vars": 9887,
+                "obs": {
+                    "index": "_index",
+                    "columns": [
+                        
+                    ]
+                },
+                "var": {
+                    "index": "_index",
+                    "columns": [
+                        "highly_variable",
+                        "mean",
+                        "std"
+                    ]
+                },
+                "layers": [
+                    
+                ],
+                "obsm": [
+                    
+                ],
+                "varm": [
+                    "PCs"
+                ],
+                "obsp": [
+                    
+                ],
+                "varp": [
+                    
+                ],
+                "uns": [
+                    "harmony",
+                    "normalize"
                 ]
             }
         ],
-        "timestamp": "2026-05-26T17:36:49.568823",
+        "timestamp": "2026-05-28T14:41:42.365043934",
         "meta": {
-            "nf-test": "0.9.5",
-            "nextflow": "26.04.2"
+            "nf-test": "0.9.4",
+            "nextflow": "26.04.0"
         }
     }
 }
\ No newline at end of file
diff --git a/modules/local/symphony/mapembedding/environment.yml b/modules/local/symphony/mapembedding/environment.yml
new file mode 100644
index 00000000..1e4070eb
--- /dev/null
+++ b/modules/local/symphony/mapembedding/environment.yml
@@ -0,0 +1,10 @@
+channels:
+  - conda-forge
+  - bioconda
+dependencies:
+  - conda-forge::python=3.13.13
+  - conda-forge::pyyaml=6.0.3
+  - conda-forge::scanpy=1.12.1
+  - pip
+  - pip:
+      - symphonypy==0.2.4
diff --git a/modules/local/symphony/mapembedding/main.nf b/modules/local/symphony/mapembedding/main.nf
new file mode 100644
index 00000000..36bd70ce
--- /dev/null
+++ b/modules/local/symphony/mapembedding/main.nf
@@ -0,0 +1,35 @@
+process SYMPHONY_MAPEMBEDDING {
+    tag "${meta.id}"
+    label 'process_medium'
+
+    conda "${moduleDir}/environment.yml"
+    container "${workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container
+            ? 'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/51/512121548a21b4d1bb8acfd5e30a75c5c2103ddd00cf1de4713c682b7e6b5387/data'
+            : 'community.wave.seqera.io/library/python_pyyaml_scanpy_pip_symphonypy:2198c27c5c9392d5'}"
+
+    input:
+    tuple val(meta), path(h5ad)
+    tuple val(meta2), path(reference_h5ad, stageAs: 'reference/reference.h5ad')
+    val(batch_col)
+    val(counts_layer)
+
+    output:
+    tuple val(meta), path("${prefix}.h5ad"), emit: h5ad
+    path "X_${prefix}.pkl"                 , emit: obsm
+    path "versions.yml"                    , emit: versions, topic: versions
+
+    script:
+    prefix = task.ext.prefix ?: "${meta.id}"
+    if ("${prefix}.h5ad" == "${h5ad}") {
+        error "Input and output names are the same, use \"task.ext.prefix\" to disambiguate!"
+    }
+    template('map_embedding.py')
+
+    stub:
+    prefix = task.ext.prefix ?: "${meta.id}"
+    """
+    touch ${prefix}.h5ad
+    touch X_${prefix}.pkl
+    touch versions.yml
+    """
+}
diff --git a/modules/local/symphony/mapembedding/templates/map_embedding.py b/modules/local/symphony/mapembedding/templates/map_embedding.py
new file mode 100644
index 00000000..4ade854e
--- /dev/null
+++ b/modules/local/symphony/mapembedding/templates/map_embedding.py
@@ -0,0 +1,59 @@
+#!/usr/bin/env python3
+
+import os
+
+os.environ["KMP_AFFINITY"] = "disabled"
+os.environ["MPLCONFIGDIR"] = "./tmp/mpl"
+os.environ["NUMBA_CACHE_DIR"] = "./tmp/numba"
+
+import importlib.metadata
+import platform
+
+import pandas as pd
+import scanpy as sc
+import symphonypy as sp
+import yaml
+from threadpoolctl import threadpool_limits
+
+
+threadpool_limits(int("${task.cpus}"))
+
+adata = sc.read_h5ad("${h5ad}")
+adata_proc = adata.copy()
+adata_ref = sc.read_h5ad("reference/reference.h5ad")
+prefix = "${prefix}"
+batch_col = "${batch_col}"
+counts_layer = "${counts_layer}"
+
+if counts_layer != "X":
+    adata_proc.X = adata_proc.layers[counts_layer]
+
+target_sum = float(adata_ref.uns["normalize"]["target_sum"])
+sc.pp.normalize_total(adata_proc, target_sum=target_sum)
+sc.pp.log1p(adata_proc)
+
+sp.tl.map_embedding(
+    adata_proc,
+    adata_ref,
+    key=batch_col,
+    transferred_adjusted_basis="X_symphony",
+    use_genes_column="highly_variable",
+)
+
+adata.obsm["X_symphony"] = adata_proc.obsm["X_symphony"]
+adata.obsm["X_emb"] = adata_proc.obsm["X_symphony"]
+
+adata.write_h5ad(f"{prefix}.h5ad")
+pd.DataFrame(adata.obsm["X_emb"], index=adata.obs_names).to_pickle(f"X_{prefix}.pkl")
+
+versions = {
+    "${task.process}": {
+        "python": platform.python_version(),
+        "scanpy": importlib.metadata.version("scanpy"),
+        "symphonypy": importlib.metadata.version("symphonypy"),
+        "pandas": pd.__version__,
+    }
+}
+
+with open("versions.yml", "w") as f:
+    yaml.dump(versions, f)
diff --git a/modules/local/symphony/mapembedding/tests/main.nf.test b/modules/local/symphony/mapembedding/tests/main.nf.test
new file mode 100644
index 00000000..e51fe09e
--- /dev/null
+++ b/modules/local/symphony/mapembedding/tests/main.nf.test
@@ -0,0 +1,93 @@
+nextflow_process {
+
+    name "Test Process SYMPHONY_MAPEMBEDDING"
+    script "modules/local/symphony/mapembedding/main.nf"
+    process "SYMPHONY_MAPEMBEDDING"
+
+    tag "modules"
+    tag "modules_local"
+
+    setup {
+        run("SYMPHONY_HARMONYINTEGRATE") {
+            script "modules/local/symphony/harmonyintegrate/main.nf"
+            process {
+                """
+                input[0] = channel.of([
+                        [ id: 'harmony' ],
+                        file(params.modules_testdata_base_path + 'genomics/homo_sapiens/scrnaseq/h5ad/combined_filtered_matrix.h5ad', checkIfExists: true)
+                    ]
+                )
+                input[1] = "sample"
+                input[2] = "X"
+                """
+            }
+        }
+    }
+
+    test("Should run without failures") {
+
+        when {
+            params {
+                outdir = "$outputDir"
+            }
+            process {
+                """
+                input[0] = channel.of([
+                        [ id: 'harmony' ],
+                        file(params.modules_testdata_base_path + 'genomics/homo_sapiens/scrnaseq/h5ad/combined_filtered_matrix.h5ad', checkIfExists: true)
+                    ]
+                )
+                input[1] = SYMPHONY_HARMONYINTEGRATE.out.reference
+                input[2] = "sample"
+                input[3] = "X"
+                """
+            }
+        }
+
+        then {
+            def adata = anndata(process.out.h5ad[0][1])
+            assert process.success
+            assert "X_emb" in adata.obsm
+            assert "X_symphony" in adata.obsm
+            assert snapshot(
+                path(process.out.versions[0]).yaml,
+                adata.yaml
+            ).match()
+        }
+
+    }
+
+    test("Should run without failures - stub") {
+
+        options '-stub'
+
+        when {
+            params {
+                outdir = "$outputDir"
+            }
+            process {
+                """
+                input[0] = channel.of([
+                        [ id: 'harmony' ],
+                        file(params.modules_testdata_base_path + 'genomics/homo_sapiens/scrnaseq/h5ad/combined_filtered_matrix.h5ad', checkIfExists: true)
+                    ]
+                )
+                input[1] = channel.of([
+                        [ id: 'harmony' ],
+                        file(params.modules_testdata_base_path + 'genomics/homo_sapiens/scrnaseq/h5ad/combined_filtered_matrix.h5ad', checkIfExists: true)
+                    ]
+                )
+                input[2] = "sample"
+                input[3] = "X"
+                """
+            }
+        }
+
+        then {
+            assert process.success
+            assert snapshot(process.out).match()
+        }
+
+    }
+
+}
diff --git a/modules/local/symphony/mapembedding/tests/main.nf.test.snap b/modules/local/symphony/mapembedding/tests/main.nf.test.snap
new file mode 100644
index 00000000..f46df979
--- /dev/null
+++ b/modules/local/symphony/mapembedding/tests/main.nf.test.snap
@@ -0,0 +1,93 @@
+{
+    "Should run without failures - stub": {
+        "content": [
+            {
+                "0": [
+                    [
+                        {
+                            "id": "harmony"
+                        },
+                        "harmony.h5ad:md5,d41d8cd98f00b204e9800998ecf8427e"
+                    ]
+                ],
+                "1": [
+                    "X_harmony.pkl:md5,d41d8cd98f00b204e9800998ecf8427e"
+                ],
+                "2": [
+                    "versions.yml:md5,d41d8cd98f00b204e9800998ecf8427e"
+                ],
+                "h5ad": [
+                    [
+                        {
+                            "id": "harmony"
+                        },
+                        "harmony.h5ad:md5,d41d8cd98f00b204e9800998ecf8427e"
+                    ]
+                ],
+                "obsm": [
+                    "X_harmony.pkl:md5,d41d8cd98f00b204e9800998ecf8427e"
+                ],
+                "versions": [
+                    "versions.yml:md5,d41d8cd98f00b204e9800998ecf8427e"
+                ]
+            }
+        ],
+        "timestamp": "2026-05-28T08:08:04.937392964",
+        "meta": {
+            "nf-test": "0.9.4",
+            "nextflow": "26.04.0"
+        }
+    },
+    "Should run without failures": {
+        "content": [
+            {
+                "SYMPHONY_MAPEMBEDDING": {
+                    "pandas": "2.3.3",
+                    "python": "3.13.13",
+                    "scanpy": "1.12.1",
+                    "symphonypy": "0.2.4"
+                }
+            },
+            {
+                "n_obs": 38234,
+                "n_vars": 9887,
+                "obs": {
+                    "index": "_index",
+                    "columns": [
+                        "sample"
+                    ]
+                },
+                "var": {
+                    "index": "_index",
+                    "columns": [
+                        
+                    ]
+                },
+                "layers": [
+                    
+                ],
+                "obsm": [
+                    "X_emb",
+                    "X_symphony"
+                ],
+                "varm": [
+                    
+                ],
+                "obsp": [
+                    
+                ],
+                "varp": [
+                    
+                ],
+                "uns": [
+                    
+                ]
+            }
+        ],
+        "timestamp": "2026-05-28T13:56:04.774526372",
+        "meta": {
+            "nf-test": "0.9.4",
+            "nextflow": "26.04.0"
+        }
+    }
+}
\ No newline at end of file
diff --git a/nextflow.config b/nextflow.config
index 1774ba60..f2d6c135 100644
--- a/nextflow.config
+++ b/nextflow.config
@@ -43,6 +43,7 @@ params {
     scvi_model                    = null
     scanvi_model                  = null
     scimilarity_model             = 'https://zenodo.org/records/10685499/files/model_v1.1.tar.gz'
+    harmony_reference             = null
     expimap_gmt                   = null
 
     // Extension options
diff --git a/nextflow_schema.json b/nextflow_schema.json
index be1d18d2..43b1d63f 100644
--- a/nextflow_schema.json
+++ b/nextflow_schema.json
@@ -167,6 +167,14 @@
                     "description": "Optional file containing a list of gene symbols (one per line). If provided, these genes will be excluded from highly variable genes selection for integration.",
                     "exists": true
                 },
+                "harmony_reference": {
+                    "type": "string",
+                    "format": "file-path",
+                    "description": "Path to a Symphony reference AnnData, only relevant if Harmony is selected in `integration_methods`. If provided, query cells will be mapped onto this reference instead of running de novo Harmony integration.",
+                    "help_text": "The file should be in the .h5ad format. It is produced by a prior de novo Harmony run as `{outdir}/combine/integrate/harmony/harmony_reference.h5ad` and contains the compact Symphony reference metadata required for query mapping. Required for Harmony reference mapping and when extending an atlas with `--base_adata`.",
+                    "pattern": "^\\S+\\.h5ad$",
+                    "exists": true
+                },
                 "scvi_model": {
                     "type": "string",
                     "format": "file-path",
diff --git a/subworkflows/local/combine/main.nf b/subworkflows/local/combine/main.nf
index 1c150fb2..80ec76c1 100644
--- a/subworkflows/local/combine/main.nf
+++ b/subworkflows/local/combine/main.nf
@@ -17,6 +17,7 @@ workflow COMBINE {
     scvi_categorical_covariates //   value: string
     scvi_continuous_covariates  //   value: string
     scimilarity_model           //   value: string
+    harmony_reference           //   value: string
     expimap_gmt                 //   value: string
     condition_col               //   value: string
     scib                        //   value: boolean
@@ -52,6 +53,7 @@ workflow COMBINE {
         scvi_categorical_covariates,
         scvi_continuous_covariates,
         scimilarity_model,
+        harmony_reference,
         expimap_gmt,
         condition_col
     )
diff --git a/subworkflows/local/integrate/tests/main.nf.test b/subworkflows/local/integrate/tests/main.nf.test
index 0a59e5fb..e01cd89a 100644
--- a/subworkflows/local/integrate/tests/main.nf.test
+++ b/subworkflows/local/integrate/tests/main.nf.test
@@ -31,7 +31,8 @@ nextflow_workflow {
                 input[8] = []
                 input[9] = null
                 input[10] = null
-                input[11] = 'condition'
+                input[11] = null
+                input[12] = 'condition'
                 """
             }
         }
@@ -65,7 +66,8 @@ nextflow_workflow {
                 input[8] = []
                 input[9] = null
                 input[10] = null
-                input[11] = 'condition'
+                input[11] = null
+                input[12] = 'condition'
                 """
             }
         }
@@ -74,8 +76,7 @@ nextflow_workflow {
             def adata = anndata(workflow.out.integrations[0][1])
             assert workflow.success
             assert "X_emb" in adata.obsm
-            assert "X_pca_symphony" in adata.obsm
-            assert "symphony" in adata.uns
+            assert "X_symphony" in adata.obsm
             assert snapshot(
                 workflow.out.versions,
                 adata.yaml
@@ -108,7 +109,8 @@ nextflow_workflow {
                 input[8] = []
                 input[9] = null
                 input[10] = null
-                input[11] = 'condition'
+                input[11] = null
+                input[12] = 'condition'
                 """
             }
         }
@@ -142,7 +144,8 @@ nextflow_workflow {
                 input[8] = []
                 input[9] = null
                 input[10] = null
-                input[11] = 'condition'
+                input[11] = null
+                input[12] = 'condition'
                 """
             }
         }
@@ -187,7 +190,8 @@ nextflow_workflow {
                 input[8] = []
                 input[9] = null
                 input[10] = null
-                input[11] = 'condition'
+                input[11] = null
+                input[12] = 'condition'
                 """
             }
         }
@@ -221,7 +225,8 @@ nextflow_workflow {
                 input[8] = []
                 input[9] = null
                 input[10] = null
-                input[11] = 'condition'
+                input[11] = null
+                input[12] = 'condition'
                 """
             }
         }
@@ -262,7 +267,8 @@ nextflow_workflow {
                 input[8] = []
                 input[9] = null
                 input[10] = null
-                input[11] = 'condition'
+                input[11] = null
+                input[12] = 'condition'
                 """
             }
         }
@@ -298,7 +304,8 @@ nextflow_workflow {
                 input[8] = []
                 input[9] = null
                 input[10] = null
-                input[11] = 'condition'
+                input[11] = null
+                input[12] = 'condition'
                 """
             }
         }
@@ -332,7 +339,8 @@ nextflow_workflow {
                 input[8] = []
                 input[9] = null
                 input[10] = null
-                input[11] = 'condition'
+                input[11] = null
+                input[12] = 'condition'
                 """
             }
         }
@@ -373,7 +381,8 @@ nextflow_workflow {
                 input[8] = []
                 input[9] = null
                 input[10] = null
-                input[11] = 'batch'
+                input[11] = null
+                input[12] = 'batch'
                 """
             }
         }
@@ -407,7 +416,8 @@ nextflow_workflow {
                 input[8] = []
                 input[9] = null
                 input[10] = null
-                input[11] = 'batch'
+                input[11] = null
+                input[12] = 'batch'
                 """
             }
         }
diff --git a/subworkflows/local/utils_nfcore_scdownstream_pipeline/main.nf b/subworkflows/local/utils_nfcore_scdownstream_pipeline/main.nf
index 6d25a1e1..5f4ba6eb 100644
--- a/subworkflows/local/utils_nfcore_scdownstream_pipeline/main.nf
+++ b/subworkflows/local/utils_nfcore_scdownstream_pipeline/main.nf
@@ -181,8 +181,8 @@ def validateInputParameters() {
     }
 
     def integration_methods = params.integration_methods.split(',').collect { it -> it.trim().toLowerCase() }
-    if (params.input && params.base_adata && (integration_methods - ['scvi', 'scanvi', 'scimilarity']).size() > 0) {
-        throw new Exception("Only scvi, scanvi and scimilarity integration methods are supported if base_adata is provided")
+    if (params.input && params.base_adata && (integration_methods - ['scvi', 'scanvi', 'scimilarity', 'harmony']).size() > 0) {
+        throw new Exception("Only scvi, scanvi, scimilarity and harmony integration methods are supported if base_adata is provided")
     }
 
     if (params.base_adata && 'scvi' in integration_methods && !params.scvi_model) {
@@ -197,6 +197,10 @@ def validateInputParameters() {
         throw new Exception("If base_adata is provided and scimilarity is used as integration method, scimilarity_model must be provided.")
     }
 
+    if (params.base_adata && 'harmony' in integration_methods && !params.harmony_reference) {
+        throw new Exception("If base_adata is provided and harmony is used as integration method, harmony_reference must be provided.")
+    }
+
     // Validate sample_n and sample_fraction parameters
     if (params.sample_n && params.sample_fraction) {
         throw new Exception("Both sample_n and sample_fraction are set. Please use only one of them.")
diff --git a/tests/main_pipeline_extend.nf.test b/tests/main_pipeline_extend.nf.test
index 083adbb7..365d4b30 100644
--- a/tests/main_pipeline_extend.nf.test
+++ b/tests/main_pipeline_extend.nf.test
@@ -4,17 +4,18 @@ nextflow_pipeline {
     script "main.nf"
     tag "pipeline"
 
-    test("Should perform scvi reference extension") {
+    test("Should perform scvi and harmony reference extension") {
 
         when {
             params {
                 input = 'https://github.com/nf-core/test-datasets/raw/refs/heads/scdownstream/samplesheet_single.csv'
-                integration_methods = 'scvi'
+                integration_methods = 'scvi,harmony'
                 doublet_detection   = 'scrublet,scdblfinder'
                 celltypist_model    = 'Adult_COVID19_PBMC'
                 integration_hvgs    = 500
                 outdir              = "$outputDir"
                 scvi_model          = 'https://github.com/nf-core/test-datasets/raw/refs/heads/scdownstream/extension_base/model.pt'
+                harmony_reference   = 'https://github.com/nf-core/test-datasets/raw/refs/heads/scdownstream/extension_base/harmony_reference.h5ad'
                 base_adata          = 'https://github.com/nf-core/test-datasets/raw/refs/heads/scdownstream/extension_base/merged.h5ad'
             }
         }
diff --git a/tests/main_pipeline_extend.nf.test.snap b/tests/main_pipeline_extend.nf.test.snap
index 63c6d111..add7d274 100644
--- a/tests/main_pipeline_extend.nf.test.snap
+++ b/tests/main_pipeline_extend.nf.test.snap
@@ -1,5 +1,5 @@
 {
-    "Should perform scvi reference extension": {
+    "Should perform scvi and harmony reference extension": {
         "content": [
             {
                 "ADATA_EXTEND": {
@@ -144,6 +144,12 @@
                 "SCVITOOLS_SCVI": {
                     "scvi": "1.4.3"
                 },
+                "SYMPHONY_MAPEMBEDDING": {
+                    "pandas": "2.3.3",
+                    "python": "3.13.13",
+                    "scanpy": "1.12.1",
+                    "symphonypy": "0.2.4"
+                },
                 "UMAP": {
                     "pandas": "2.3.3",
                     "python": "3.13.12",
@@ -175,6 +181,14 @@
                 "celltypes/singler/SRR28679759_singler_immune_direct_heatmap.pdf",
                 "celltypes/singler/SRR28679759_singler_predictions.csv",
                 "cluster_dimred",
+                "cluster_dimred/harmony",
+                "cluster_dimred/harmony/entropy",
+                "cluster_dimred/harmony/entropy/harmony-global-0.5_entropy.png",
+                "cluster_dimred/harmony/entropy/harmony-global-1.0_entropy.png",
+                "cluster_dimred/harmony/leiden",
+                "cluster_dimred/harmony/leiden/harmony-global-0.5_leiden.png",
+                "cluster_dimred/harmony/leiden/harmony-global-1.0_leiden.png",
+                "cluster_dimred/harmony/umap",
                 "cluster_dimred/scvi",
                 "cluster_dimred/scvi/entropy",
                 "cluster_dimred/scvi/entropy/scvi-global-0.5_entropy.png",
@@ -186,6 +200,7 @@
                 "combine",
                 "combine/integrate",
                 "combine/integrate/scib_metrics",
+                "combine/integrate/scib_metrics/harmony_metrics.tsv",
                 "combine/integrate/scib_metrics/scvi_metrics.tsv",
                 "combine/integrate/scvi",
                 "combine/integrate/scvi/scvi_model",
@@ -228,10 +243,10 @@
                 "qc-report.qmd:md5,13061014a897b3fbdafd6ea3212df0e0"
             ]
         ],
-        "timestamp": "2026-05-11T23:24:27.061703025",
+        "timestamp": "2026-05-28T11:14:08.38204652",
         "meta": {
             "nf-test": "0.9.4",
             "nextflow": "26.04.0"
         }
     }
-}
\ No newline at end of file
+}
diff --git a/tests/main_pipeline_reference_mapping.nf.test b/tests/main_pipeline_reference_mapping.nf.test
index 9292b3b7..93d1b10f 100644
--- a/tests/main_pipeline_reference_mapping.nf.test
+++ b/tests/main_pipeline_reference_mapping.nf.test
@@ -4,17 +4,18 @@ nextflow_pipeline {
     script "main.nf"
     tag "pipeline"
 
-    test("Should perform scvi reference mapping") {
+    test("Should perform scvi and harmony reference mapping") {
 
         when {
             params {
                 input = 'https://github.com/nf-core/test-datasets/raw/refs/heads/scdownstream/samplesheet_single.csv'
-                integration_methods = 'scvi'
+                integration_methods = 'scvi,harmony'
                 doublet_detection   = 'scrublet,scdblfinder'
                 celltypist_model    = 'Adult_COVID19_PBMC'
                 integration_hvgs    = 500
                 outdir              = "$outputDir"
                 scvi_model          = 'https://github.com/nf-core/test-datasets/raw/refs/heads/scdownstream/extension_base/model.pt'
+                harmony_reference   = 'https://github.com/nf-core/test-datasets/raw/refs/heads/scdownstream/extension_base/harmony_reference.h5ad'
             }
         }
 
diff --git a/tests/main_pipeline_reference_mapping.nf.test.snap b/tests/main_pipeline_reference_mapping.nf.test.snap
index 5812c99c..8353f4a6 100644
--- a/tests/main_pipeline_reference_mapping.nf.test.snap
+++ b/tests/main_pipeline_reference_mapping.nf.test.snap
@@ -1,5 +1,5 @@
 {
-    "Should perform scvi reference mapping": {
+    "Should perform scvi and harmony reference mapping": {
         "content": [
             {
                 "ADATA_EXTEND": {
@@ -144,6 +144,12 @@
                 "SCVITOOLS_SCVI": {
                     "scvi": "1.4.3"
                 },
+                "SYMPHONY_MAPEMBEDDING": {
+                    "pandas": "2.3.3",
+                    "python": "3.13.13",
+                    "scanpy": "1.12.1",
+                    "symphonypy": "0.2.4"
+                },
                 "UMAP": {
                     "pandas": "2.3.3",
                     "python": "3.13.12",
@@ -175,6 +181,14 @@
                 "celltypes/singler/SRR28679759_singler_immune_direct_heatmap.pdf",
                 "celltypes/singler/SRR28679759_singler_predictions.csv",
                 "cluster_dimred",
+                "cluster_dimred/harmony",
+                "cluster_dimred/harmony/entropy",
+                "cluster_dimred/harmony/entropy/harmony-global-0.5_entropy.png",
+                "cluster_dimred/harmony/entropy/harmony-global-1.0_entropy.png",
+                "cluster_dimred/harmony/leiden",
+                "cluster_dimred/harmony/leiden/harmony-global-0.5_leiden.png",
+                "cluster_dimred/harmony/leiden/harmony-global-1.0_leiden.png",
+                "cluster_dimred/harmony/umap",
                 "cluster_dimred/scvi",
                 "cluster_dimred/scvi/entropy",
                 "cluster_dimred/scvi/entropy/scvi-global-0.5_entropy.png",
@@ -186,6 +200,7 @@
                 "combine",
                 "combine/integrate",
                 "combine/integrate/scib_metrics",
+                "combine/integrate/scib_metrics/harmony_metrics.tsv",
                 "combine/integrate/scib_metrics/scvi_metrics.tsv",
                 "combine/integrate/scvi",
                 "combine/integrate/scvi/scvi_model",
@@ -228,10 +243,10 @@
                 "qc-report.qmd:md5,13061014a897b3fbdafd6ea3212df0e0"
             ]
         ],
-        "timestamp": "2026-05-12T08:53:40.785735333",
+        "timestamp": "2026-05-28T10:56:43.387329548",
         "meta": {
             "nf-test": "0.9.4",
             "nextflow": "26.04.0"
         }
     }
-}
\ No newline at end of file
+}
diff --git a/workflows/scdownstream.nf b/workflows/scdownstream.nf
index 82579603..2528f2e7 100644
--- a/workflows/scdownstream.nf
+++ b/workflows/scdownstream.nf
@@ -59,6 +59,7 @@ workflow SCDOWNSTREAM {
     scvi_categorical_covariates   //   value: string
     scvi_continuous_covariates    //   value: string
     scimilarity_model             //   value: string
+    harmony_reference             //   value: string
     expimap_gmt                   //   value: string
     skip_liana                    //   value: boolean
     skip_rankgenesgroups          //   value: boolean
@@ -178,6 +179,7 @@ workflow SCDOWNSTREAM {
                 scvi_categorical_covariates,
                 scvi_continuous_covariates,
                 scimilarity_model,
+                harmony_reference,
                 expimap_gmt,
                 condition_col,
                 scib,

From 18e6d6579f3ba8c2205a5c7fbaa6fcb5ac21e36c Mon Sep 17 00:00:00 2001
From: Nico Trummer <nictru32@gmail.com>
Date: Fri, 29 May 2026 19:20:37 +0200
Subject: [PATCH 06/19] Remove local contrib test-datasets helper after
 upstream merge.

The extension_base artifacts now live in nf-core/test-datasets, so the temporary build and collection scripts are no longer needed in this repo.
---
 contrib/nf-core-test-datasets/.gitattributes  |  1 -
 contrib/nf-core-test-datasets/README.md       | 49 -------------------
 .../nf-core-test-datasets/build.params.json   |  8 ---
 .../collect-artifacts.sh                      | 15 ------
 4 files changed, 73 deletions(-)
 delete mode 100644 contrib/nf-core-test-datasets/.gitattributes
 delete mode 100644 contrib/nf-core-test-datasets/README.md
 delete mode 100644 contrib/nf-core-test-datasets/build.params.json
 delete mode 100755 contrib/nf-core-test-datasets/collect-artifacts.sh

diff --git a/contrib/nf-core-test-datasets/.gitattributes b/contrib/nf-core-test-datasets/.gitattributes
deleted file mode 100644
index 914d733f..00000000
--- a/contrib/nf-core-test-datasets/.gitattributes
+++ /dev/null
@@ -1 +0,0 @@
-extension_base/*.h5ad filter=lfs diff=lfs merge=lfs -text
diff --git a/contrib/nf-core-test-datasets/README.md b/contrib/nf-core-test-datasets/README.md
deleted file mode 100644
index b9b58a05..00000000
--- a/contrib/nf-core-test-datasets/README.md
+++ /dev/null
@@ -1,49 +0,0 @@
-# nf-core/test-datasets update — `scdownstream/extension_base`
-
-Copy the contents of `extension_base/` into the **`scdownstream` branch** of [nf-core/test-datasets](https://github.com/nf-core/test-datasets), replacing the existing files in `scdownstream/extension_base/`.
-
-## Files
-
-| File                                    | Purpose                                                      |
-| --------------------------------------- | ------------------------------------------------------------ |
-| `extension_base/model.pt`               | scVI checkpoint for reference mapping / extension            |
-| `extension_base/merged.h5ad`            | Finalized atlas (`base_adata`) for extension                 |
-| `extension_base/harmony_reference.h5ad` | Symphony reference for Harmony reference mapping / extension |
-
-All three must come from the **same pipeline run** (see below).
-
-> **Note:** If `extension_base/` already contains files but you have not run `collect-artifacts.sh` yet, `merged.h5ad` and `model.pt` may still be the current test-datasets versions and `harmony_reference.h5ad` from a Harmony-only build. **Do not open the test-datasets PR until you have run a unified build and `collect-artifacts.sh`** — all three files must be replaced together.
-
-## How these were generated
-
-```bash
-# From the scdownstream repo root, with nf-core conda env active:
-nextflow run main.nf -profile test,apptainer -params-file contrib/nf-core-test-datasets/build.params.json
-
-# Populate extension_base/ from the build output:
-./contrib/nf-core-test-datasets/collect-artifacts.sh
-```
-
-Build parameters match the consolidated pipeline tests (`main_pipeline_reference_mapping.nf.test`, `main_pipeline_extend.nf.test`):
-
-- Input: `samplesheet.csv` (full atlas)
-- Integration: `scvi,harmony`
-- HVGs: 500
-
-## PR checklist (test-datasets repo)
-
-1. Check out branch `scdownstream`.
-2. Ensure Git LFS is enabled (`git lfs install`).
-3. Copy `extension_base/*` into `scdownstream/extension_base/` (overwrite `model.pt` and `merged.h5ad`, add `harmony_reference.h5ad`).
-4. Add or extend `.gitattributes` on the test-datasets repo:
-
-   ```
-   scdownstream/extension_base/*.h5ad filter=lfs diff=lfs merge=lfs -text
-   ```
-
-5. Commit and open PR against `scdownstream`.
-6. After merge, re-run `nftu` on the scdownstream pipeline reference-mapping and extend tests.
-
-## Pipeline version
-
-Record the scdownstream commit/tag used when generating these files in your PR description.
diff --git a/contrib/nf-core-test-datasets/build.params.json b/contrib/nf-core-test-datasets/build.params.json
deleted file mode 100644
index 8faa4a16..00000000
--- a/contrib/nf-core-test-datasets/build.params.json
+++ /dev/null
@@ -1,8 +0,0 @@
-{
-    "input": "https://github.com/nf-core/test-datasets/raw/refs/heads/scdownstream/samplesheet.csv",
-    "integration_methods": "scvi,harmony",
-    "integration_hvgs": 500,
-    "doublet_detection": "scrublet,scdblfinder",
-    "celltypist_model": "Adult_COVID19_PBMC",
-    "outdir": "contrib/nf-core-test-datasets/build_output"
-}
diff --git a/contrib/nf-core-test-datasets/collect-artifacts.sh b/contrib/nf-core-test-datasets/collect-artifacts.sh
deleted file mode 100755
index 75fa3d4e..00000000
--- a/contrib/nf-core-test-datasets/collect-artifacts.sh
+++ /dev/null
@@ -1,15 +0,0 @@
-#!/usr/bin/env bash
-set -euo pipefail
-
-ROOT="$(cd "$(dirname "${BASH_SOURCE[0]}")/../.." && pwd)"
-OUT="${ROOT}/contrib/nf-core-test-datasets/build_output"
-DEST="${ROOT}/contrib/nf-core-test-datasets/extension_base"
-
-mkdir -p "${DEST}"
-
-cp "${OUT}/combine/integrate/scvi/scvi_model/model.pt" "${DEST}/model.pt"
-cp "${OUT}/finalized/merged.h5ad" "${DEST}/merged.h5ad"
-cp "${OUT}/combine/integrate/harmony/harmony_reference.h5ad" "${DEST}/harmony_reference.h5ad"
-
-echo "Collected artifacts into ${DEST}:"
-ls -lh "${DEST}"

From 00b6c170fc30f2f776f61a8fefd57960c790d784 Mon Sep 17 00:00:00 2001
From: Nico Trummer <nictru32@gmail.com>
Date: Fri, 29 May 2026 19:23:00 +0200
Subject: [PATCH 07/19] Rename harmony integration method to symphony.

Users now select symphony in integration_methods and pass symphony_reference for query mapping, with outputs published under combine/integrate/symphony.
---
 README.md                                          |  2 +-
 assets/multiqc_config.yml                          |  2 +-
 conf/modules.config                                |  6 +++---
 conf/test.config                                   |  2 +-
 conf/test_full.config                              |  2 +-
 docs/output.md                                     |  4 ++--
 docs/reproducibility.md                            | 12 ++++++------
 docs/usage.md                                      | 14 +++++++-------
 main.nf                                            | 10 +++++-----
 .../adata/prepcellxgene/templates/prepcellxgene.py |  2 +-
 .../local/symphony/mapembedding/tests/main.nf.test |  8 ++++----
 nextflow.config                                    |  2 +-
 nextflow_schema.json                               | 14 +++++++-------
 subworkflows/local/cluster/tests/main.nf.test      |  2 +-
 subworkflows/local/combine/main.nf                 |  4 ++--
 subworkflows/local/integrate/main.nf               | 12 ++++++------
 subworkflows/local/integrate/tests/main.nf.test    | 10 +++++-----
 .../utils_nfcore_scdownstream_pipeline/main.nf     |  8 ++++----
 tests/main_pipeline_build.nf.test                  |  2 +-
 workflows/scdownstream.nf                          |  4 ++--
 20 files changed, 61 insertions(+), 61 deletions(-)

diff --git a/README.md b/README.md
index 8433a1fd..d45ee4ae 100644
--- a/README.md
+++ b/README.md
@@ -58,7 +58,7 @@ Steps marked with the boat icon are not yet implemented. For the other steps, th
    3. Integration
       - [scVI](https://docs.scvi-tools.org/en/stable/user_guide/models/scvi.html)
       - [scANVI](https://docs.scvi-tools.org/en/stable/user_guide/models/scanvi.html)
-      - [Harmony](https://portals.broadinstitute.org/harmony/articles/quickstart.html) (via [symphonypy](https://pypi.org/project/symphonypy/))
+      - [Symphony](https://github.com/immunogenomics/symphony) / Harmony (via [symphonypy](https://pypi.org/project/symphonypy/))
       - [BBKNN](https://github.com/Teichlab/bbknn)
       - [Combat](https://scanpy.readthedocs.io/en/latest/api/generated/scanpy.pp.combat.html)
       - [Seurat](https://satijalab.org/seurat/articles/integration_introduction)
diff --git a/assets/multiqc_config.yml b/assets/multiqc_config.yml
index 945264e4..976136b4 100644
--- a/assets/multiqc_config.yml
+++ b/assets/multiqc_config.yml
@@ -18,7 +18,7 @@ report_section_order:
     order: -1005
   "scanvi":
     order: -1006
-  "harmony":
+  "symphony":
     order: -1007
   "bbknn":
     order: -1009
diff --git a/conf/modules.config b/conf/modules.config
index da4a96cb..4260f400 100644
--- a/conf/modules.config
+++ b/conf/modules.config
@@ -381,11 +381,11 @@ process {
 
     withName: SYMPHONY_HARMONYINTEGRATE {
         publishDir = [
-            path: { "${params.outdir}/combine/integrate/harmony" },
+            path: { "${params.outdir}/combine/integrate/symphony" },
             mode: params.publish_dir_mode,
             saveAs: { filename ->
                 if (filename.endsWith('_reference.h5ad')) {
-                    return 'harmony_reference.h5ad'
+                    return 'symphony_reference.h5ad'
                 }
                 if (params.save_intermediates && !filename.equals('versions.yml')) {
                     return filename
@@ -397,7 +397,7 @@ process {
 
     withName: SYMPHONY_MAPEMBEDDING {
         publishDir = [
-            path: { "${params.outdir}/combine/integrate/harmony" },
+            path: { "${params.outdir}/combine/integrate/symphony" },
             mode: params.publish_dir_mode,
             enabled: params.save_intermediates,
             saveAs: { filename -> filename.equals('versions.yml') ? null : filename },
diff --git a/conf/test.config b/conf/test.config
index 31b5a05b..fc2ae4f0 100644
--- a/conf/test.config
+++ b/conf/test.config
@@ -24,7 +24,7 @@ params {
 
     // Input data
     input               = params.pipelines_testdata_base_path + 'samplesheet.csv'
-    integration_methods = 'scvi,harmony,bbknn,combat'
+    integration_methods = 'scvi,symphony,bbknn,combat'
     doublet_detection   = 'solo,scrublet,scdblfinder'
     celltypist_model    = 'Adult_Human_Skin'
     celldex_reference   = 'https://raw.githubusercontent.com/nf-core/test-datasets/scdownstream/singleR/references.csv'
diff --git a/conf/test_full.config b/conf/test_full.config
index f4629109..96e64421 100644
--- a/conf/test_full.config
+++ b/conf/test_full.config
@@ -24,7 +24,7 @@ params {
 
     // Input data for full size test
     input               = params.pipelines_testdata_base_path + 'samplesheet.csv'
-    integration_methods = 'scvi,harmony,bbknn,combat'
+    integration_methods = 'scvi,symphony,bbknn,combat'
     doublet_detection   = 'solo,scrublet,doubletdetection,scdblfinder'
     celltypist_model    = 'Adult_Human_Skin'
     celldex_reference   = 'https://raw.githubusercontent.com/nf-core/test-datasets/scdownstream/singleR/references.csv'
diff --git a/docs/output.md b/docs/output.md
index 38d7356b..97403fec 100644
--- a/docs/output.md
+++ b/docs/output.md
@@ -34,7 +34,7 @@ The pipeline is built using [Nextflow](https://www.nextflow.io/) and processes d
    3. Integration
       - [scVI](https://docs.scvi-tools.org/en/stable/user_guide/models/scvi.html)
       - [scANVI](https://docs.scvi-tools.org/en/stable/user_guide/models/scanvi.html)
-      - [Harmony](https://portals.broadinstitute.org/harmony/articles/quickstart.html) (via [symphonypy](https://pypi.org/project/symphonypy/))
+      - [Symphony](https://github.com/immunogenomics/symphony) / Harmony (via [symphonypy](https://pypi.org/project/symphonypy/))
       - [BBKNN](https://github.com/Teichlab/bbknn)
       - [Combat](https://scanpy.readthedocs.io/en/latest/api/generated/scanpy.pp.combat.html)
       - [Seurat](https://satijalab.org/seurat/articles/integration_introduction)
@@ -98,7 +98,7 @@ The `preprocess` directory contains a subdirectory for each sample, which contai
     - `${tool}`
       - `*.h5ad/*.rds`: The integrated H5AD or RDS file.
       - `X_${tool}.pkl`: Low-dimensional representation of the integrated data.
-      - `harmony_reference.h5ad` (Harmony only): Compact Symphony reference AnnData for query mapping, published from de novo Harmony runs.
+      - `symphony_reference.h5ad` (Symphony only): Compact Symphony reference AnnData for query mapping, published from de novo Symphony runs.
 
 </details>
 
diff --git a/docs/reproducibility.md b/docs/reproducibility.md
index 78769582..f93ff464 100644
--- a/docs/reproducibility.md
+++ b/docs/reproducibility.md
@@ -139,10 +139,10 @@ The **Test strategy (this branch)** column describes what the tests on this bran
 
 ### `symphony/`
 
-| Module                      | Description                                                                                                                                                                                                                                                                                                                                                                                                                                                                         | Reproducibility                                                                                                               | Test strategy (this branch)         |
-| --------------------------- | ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | ----------------------------------------------------------------------------------------------------------------------------- | ----------------------------------- |
-| `symphony/harmonyintegrate` | Runs Harmony batch integration via symphonypy after normalize_total → log1p → scale(max_value=10) → PCA(zero_center=False), storing `X_symphony`, `X_emb`, Symphony reference metadata (`var` mean/std/HVG, `varm['PCs']`, `uns['harmony']`, `uns['normalize']`), and publishing a compact `harmony_reference.h5ad`. Requires symphonypy ≥0.2.3 ([symphonypy#8](https://github.com/potulabe/symphonypy/issues/8), [symphonypy#9](https://github.com/potulabe/symphonypy/issues/9)). | **Non-deterministic** — Harmony is an iterative optimisation; symphonypy passes `random_seed=1` but upstream PCA is unseeded. | structural — versions + schema only |
-| `symphony/mapembedding`     | Maps query cells onto a Symphony reference via symphonypy `map_embedding`, storing mapped coordinates in `X_symphony` and `X_emb`.                                                                                                                                                                                                                                                                                                                                                  | **Non-deterministic** — inherits Harmony mapping variability.                                                                 | structural — versions + schema only |
+| Module                      | Description                                                                                                                                                                                                                                                                                                                                                                                                                                                                             | Reproducibility                                                                                                               | Test strategy (this branch)         |
+| --------------------------- | --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | ----------------------------------------------------------------------------------------------------------------------------- | ----------------------------------- |
+| `symphony/harmonyintegrate` | Runs Symphony integration via symphonypy/Harmony after normalize_total → log1p → scale(max_value=10) → PCA(zero_center=False), storing `X_symphony`, `X_emb`, Symphony reference metadata (`var` mean/std/HVG, `varm['PCs']`, `uns['harmony']`, `uns['normalize']`), and publishing a compact `symphony_reference.h5ad`. Requires symphonypy ≥0.2.3 ([symphonypy#8](https://github.com/potulabe/symphonypy/issues/8), [symphonypy#9](https://github.com/potulabe/symphonypy/issues/9)). | **Non-deterministic** — Harmony is an iterative optimisation; symphonypy passes `random_seed=1` but upstream PCA is unseeded. | structural — versions + schema only |
+| `symphony/mapembedding`     | Maps query cells onto a Symphony reference via symphonypy `map_embedding`, storing mapped coordinates in `X_symphony` and `X_emb`.                                                                                                                                                                                                                                                                                                                                                      | **Non-deterministic** — inherits Symphony/Harmony mapping variability.                                                        | structural — versions + schema only |
 
 ### `scimilarity/`
 
@@ -186,11 +186,11 @@ The **Test strategy (this branch)** column describes what the tests on this bran
 | `ambient_correction`      | Dispatches ambient RNA correction to decontX, SoupX, or none based on a parameter.                                                                                                          | **Non-deterministic** for decontX (no seed) and SoupX (seeded clustering but variable results); fully deterministic for the `none` passthrough.                                                               | **Scenario-dependent:** often `versions` as YAML + `adata.yaml` when an H5AD is produced; **`none` / meta-disabled** paths may snapshot **only `versions` + `workflow.out.h5ad.size()`** (counts, not hashes). |
 | `celltype_assignment`     | Orchestrates cell type annotation by running SingleR and/or CellTypist.                                                                                                                     | Fully deterministic at inference time for both methods.                                                                                                                                                       | **`workflow.out.versions` + `workflow.out.obs.size()`** for non-stub tests; separate **stub** test exercises subworkflow wiring.                                                                               |
 | `cluster`                 | Full clustering pipeline: neighbours → UMAP → Leiden at multiple resolutions → Shannon entropy.                                                                                             | Seeded / quasi-deterministic for UMAP; **non-deterministic** due to unseeded Leiden.                                                                                                                          | structural — **`workflow.out.versions` only** (each as YAML); graph / embedding presence asserted in code outside `snapshot`.                                                                                  |
-| `combine`                 | Merges all samples and runs all configured integration methods.                                                                                                                             | Inherits from constituent modules — ranges from fully deterministic (no integration) to seeded/quasi-deterministic (scVI, Harmony, Seurat).                                                                   | structural — **`workflow.out.versions` (YAML) + `adata.yaml`** on merged H5AD.                                                                                                                                 |
+| `combine`                 | Merges all samples and runs all configured integration methods.                                                                                                                             | Inherits from constituent modules — ranges from fully deterministic (no integration) to seeded/quasi-deterministic (scVI, Symphony, Seurat).                                                                  | structural — **`workflow.out.versions` (YAML) + `adata.yaml`** on merged H5AD.                                                                                                                                 |
 | `differential_expression` | Runs rank-genes-groups DE analysis across all combinations of clustering labels, conditions, and cell-type subsets.                                                                         | Fully deterministic for the default wilcoxon/t-test methods.                                                                                                                                                  | structural — **`workflow.out.versions` only** (YAML); DE / MultiQC presence asserted outside `snapshot` where needed.                                                                                          |
 | `doublet_detection`       | Runs one or more doublet-detection methods (scdblfinder, solo, scrublet, doubletdetection) and removes called doublets.                                                                     | **Non-deterministic** — solo, scrublet, and doubletdetection have stochastic components; scdblfinder is seeded.                                                                                               | structural + **range assertion** on **`n_obs`**; snapshot uses **`versions` (YAML) + `adata.yaml`**.                                                                                                           |
 | `finalize`                | Assembles the final AnnData by extending it with all collected obs/obsm/uns/layers outputs.                                                                                                 | Fully deterministic                                                                                                                                                                                           | hash — **`workflow.out.h5ad` + `workflow.out.versions` (YAML) + `adata.yaml`** — not a bare `snapshot(workflow.out)` in non-stub tests.                                                                        |
-| `integrate`               | Applies HVG selection then one or more integration methods (scVI, scANVI, Harmony, BBKNN, ComBat, Seurat, SCimilarity, PCA, EXPIMAP).                                                       | Seeded / quasi-deterministic for scVI/scANVI/ComBat/Seurat/BBKNN/PCA; **non-deterministic** for Harmony and EXPIMAP (iterative training).                                                                     | structural — **`workflow.out.versions` (YAML) + `adata.yaml`** on integration H5AD (e.g. Harmony / BBKNN / ComBat / PCA tests).                                                                                |
+| `integrate`               | Applies HVG selection then one or more integration methods (scVI, scANVI, Symphony, BBKNN, ComBat, Seurat, SCimilarity, PCA, EXPIMAP).                                                      | Seeded / quasi-deterministic for scVI/scANVI/ComBat/Seurat/BBKNN/PCA; **non-deterministic** for Symphony and EXPIMAP (iterative training).                                                                    | structural — **`workflow.out.versions` (YAML) + `adata.yaml`** on integration H5AD (e.g. Symphony / BBKNN / ComBat / PCA tests).                                                                               |
 | `load_h5ad`               | Loads input files in H5AD, 10x H5, RDS, or CSV format and converts all to AnnData H5AD.                                                                                                     | Fully deterministic                                                                                                                                                                                           | hash — **`snapshot(workflow.out)` only** (passthrough-safe; avoids `anndata().yaml` on unstaged inputs per nf-test rules).                                                                                     |
 | `per_group`               | Runs PAGA, LIANA rank-aggregate, rank-genes DE, and optional CyteType per cluster grouping.                                                                                                 | **Seeded / quasi-deterministic** — inherits from constituent modules; CyteType is non-deterministic when enabled.                                                                                             | structural — **`workflow.out.versions` only** (YAML); optional `workflow.out.obs.size()` when CyteType is enabled.                                                                                             |
 | `pseudobulking`           | Aggregates single-cell data into pseudobulk profiles grouped by specified metadata columns.                                                                                                 | Fully deterministic                                                                                                                                                                                           | hash — **`workflow.out` + `versions` (YAML) + `adata.yaml`** on pseudobulk H5AD.                                                                                                                               |
diff --git a/docs/usage.md b/docs/usage.md
index 6a387656..a7dcc04a 100644
--- a/docs/usage.md
+++ b/docs/usage.md
@@ -216,16 +216,16 @@ nextflow run nf-core/scdownstream --input samplesheet.csv --outdir results \
 ### Reference mapping and extension
 
 **Reference mapping** means **mapping new cells into a latent space using a pre-trained model** instead of training that integration step only on the query data.
-In this pipeline this can be done using **scVI**, **scANVI**, **scimilarity**, and **Harmony (Symphony)**.
-To enable it, add the corresponding method to [`integration_methods`](https://nf-co.re/scdownstream/parameters#integration_methods) (`scvi`, `scanvi`, `scimilarity`, and/or `harmony`) and set the matching model parameters for each method you use: [`scvi_model`](https://nf-co.re/scdownstream/parameters#scvi_model), [`scanvi_model`](https://nf-co.re/scdownstream/parameters#scanvi_model), [`scimilarity_model`](https://nf-co.re/scdownstream/parameters#scimilarity_model), and [`harmony_reference`](https://nf-co.re/scdownstream/parameters#harmony_reference) (see the [parameter reference](https://nf-co.re/scdownstream/parameters) for file types, defaults, and help text).
+In this pipeline this can be done using **scVI**, **scANVI**, **scimilarity**, and **Symphony**.
+To enable it, add the corresponding method to [`integration_methods`](https://nf-co.re/scdownstream/parameters#integration_methods) (`scvi`, `scanvi`, `scimilarity`, and/or `symphony`) and set the matching model parameters for each method you use: [`scvi_model`](https://nf-co.re/scdownstream/parameters#scvi_model), [`scanvi_model`](https://nf-co.re/scdownstream/parameters#scanvi_model), [`scimilarity_model`](https://nf-co.re/scdownstream/parameters#scimilarity_model), and [`symphony_reference`](https://nf-co.re/scdownstream/parameters#symphony_reference) (see the [parameter reference](https://nf-co.re/scdownstream/parameters) for file types, defaults, and help text).
 
-For Harmony reference mapping, provide the compact Symphony reference AnnData from a prior de novo run (`{outdir}/combine/integrate/harmony/harmony_reference.h5ad`). It contains the gene statistics, PCA loadings, Harmony centroids, and normalization metadata required for query mapping.
+For Symphony reference mapping, provide the compact Symphony reference AnnData from a prior de novo run (`{outdir}/combine/integrate/symphony/symphony_reference.h5ad`). It contains the gene statistics, PCA loadings, Harmony centroids, and normalization metadata required for query mapping.
 
 **Extension** is for users that have outputs of a previous run of `nf-core/scdownstream` and want to extend it with new data, without re-running the integration from scratch.
-It only works if `scvi`, `scanvi`, `scimilarity`, and/or `harmony` have been enabled in `integration_methods` in the original pipeline run.
+It only works if `scvi`, `scanvi`, `scimilarity`, and/or `symphony` have been enabled in `integration_methods` in the original pipeline run.
 Other integration methods than the four mentioned before are not supported for this.
 In simple terms, in this setup the workflow is: (1) project new data into the latent space learned from the data in the original run, and then (2) combine the datasets.
-For (1), provide the same checkpoints as for reference mapping ([`scvi_model`](https://nf-co.re/scdownstream/parameters#scvi_model), [`scanvi_model`](https://nf-co.re/scdownstream/parameters#scanvi_model), [`scimilarity_model`](https://nf-co.re/scdownstream/parameters#scimilarity_model), [`harmony_reference`](https://nf-co.re/scdownstream/parameters#harmony_reference)).
+For (1), provide the same checkpoints as for reference mapping ([`scvi_model`](https://nf-co.re/scdownstream/parameters#scvi_model), [`scanvi_model`](https://nf-co.re/scdownstream/parameters#scanvi_model), [`scimilarity_model`](https://nf-co.re/scdownstream/parameters#scimilarity_model), [`symphony_reference`](https://nf-co.re/scdownstream/parameters#symphony_reference)).
 For (2), pass the integrated `.h5ad` from the original run as [`base_adata`](https://nf-co.re/scdownstream/parameters#base_adata).
 
 Pre-trained scVI models are also shared on [scvi-hub](https://huggingface.co/scvi-tools).
@@ -283,11 +283,11 @@ Each row in the CSV selects a subset of clusterings. **All columns are optional*
 
 When multiple rows match a clustering result, their `analyses` lists are **combined** (duplicates removed). If any matching row leaves `analyses` empty, all analyses run for that clustering. Clusterings that match **no** row are excluded from Leiden and all downstream analyses — but their UMAP and neighbour graph are still computed.
 
-Example plan: full analysis on Harmony at resolution 0.5, DE-only at resolution 1.0 for every integration, and DE-only for scVI at any resolution:
+Example plan: full analysis on Symphony at resolution 0.5, DE-only at resolution 1.0 for every integration, and DE-only for scVI at any resolution:
 
 ```csv title="analysis_plan.csv"
 integration,subset,resolution,analyses
-harmony,global,0.5,"paga,de,cytetype"
+symphony,global,0.5,"paga,de,cytetype"
 ,,1.0,de
 scvi,,,de
 ```
diff --git a/main.nf b/main.nf
index 7aa62964..4f032b32 100644
--- a/main.nf
+++ b/main.nf
@@ -61,7 +61,7 @@ workflow NFCORE_SCDOWNSTREAM {
     scvi_categorical_covariates   //   value: string
     scvi_continuous_covariates    //   value: string
     scimilarity_model             //   value: string
-    harmony_reference             //   value: string
+    symphony_reference             //   value: string
     expimap_gmt                   //   value: string
     skip_liana                    //   value: boolean
     skip_rankgenesgroups          //   value: boolean
@@ -118,7 +118,7 @@ workflow NFCORE_SCDOWNSTREAM {
         scvi_categorical_covariates,
         scvi_continuous_covariates,
         scimilarity_model,
-        harmony_reference,
+        symphony_reference,
         expimap_gmt,
         skip_liana,
         skip_rankgenesgroups,
@@ -182,8 +182,8 @@ workflow {
 
     def analysis_plan = analysisPlanToList()
 
-    def harmony_reference = params.harmony_reference
-        ? file(params.harmony_reference, checkIfExists: true)
+    def symphony_reference = params.symphony_reference
+        ? file(params.symphony_reference, checkIfExists: true)
         : null
 
     NFCORE_SCDOWNSTREAM (
@@ -217,7 +217,7 @@ workflow {
         params.scvi_categorical_covariates,
         params.scvi_continuous_covariates,
         params.scimilarity_model,
-        harmony_reference,
+        symphony_reference,
         params.expimap_gmt,
         params.skip_liana,
         params.skip_rankgenesgroups,
diff --git a/modules/local/adata/prepcellxgene/templates/prepcellxgene.py b/modules/local/adata/prepcellxgene/templates/prepcellxgene.py
index a36ffce3..7f00e6c9 100644
--- a/modules/local/adata/prepcellxgene/templates/prepcellxgene.py
+++ b/modules/local/adata/prepcellxgene/templates/prepcellxgene.py
@@ -17,7 +17,7 @@
 
 adata = ad.read_h5ad("${h5ad}")
 
-integration_methods = ["harmony", "scvi", "scanvi", "scimilarity", "seurat", "bbknn", "combat", "pca", "expimap"]
+integration_methods = ["symphony", "scvi", "scanvi", "scimilarity", "seurat", "bbknn", "combat", "pca", "expimap"]
 
 for integration in integration_methods:
     embedding_key = f"X_{integration}"
diff --git a/modules/local/symphony/mapembedding/tests/main.nf.test b/modules/local/symphony/mapembedding/tests/main.nf.test
index e51fe09e..8699ba98 100644
--- a/modules/local/symphony/mapembedding/tests/main.nf.test
+++ b/modules/local/symphony/mapembedding/tests/main.nf.test
@@ -13,7 +13,7 @@ nextflow_process {
             process {
                 """
                 input[0] = channel.of([
-                        [ id: 'harmony' ],
+                        [ id: 'symphony' ],
                         file(params.modules_testdata_base_path + 'genomics/homo_sapiens/scrnaseq/h5ad/combined_filtered_matrix.h5ad', checkIfExists: true)
                     ]
                 )
@@ -33,7 +33,7 @@ nextflow_process {
             process {
                 """
                 input[0] = channel.of([
-                        [ id: 'harmony' ],
+                        [ id: 'symphony' ],
                         file(params.modules_testdata_base_path + 'genomics/homo_sapiens/scrnaseq/h5ad/combined_filtered_matrix.h5ad', checkIfExists: true)
                     ]
                 )
@@ -68,12 +68,12 @@ nextflow_process {
             process {
                 """
                 input[0] = channel.of([
-                        [ id: 'harmony' ],
+                        [ id: 'symphony' ],
                         file(params.modules_testdata_base_path + 'genomics/homo_sapiens/scrnaseq/h5ad/combined_filtered_matrix.h5ad', checkIfExists: true)
                     ]
                 )
                 input[1] = channel.of([
-                        [ id: 'harmony' ],
+                        [ id: 'symphony' ],
                         file(params.modules_testdata_base_path + 'genomics/homo_sapiens/scrnaseq/h5ad/combined_filtered_matrix.h5ad', checkIfExists: true)
                     ]
                 )
diff --git a/nextflow.config b/nextflow.config
index f2d6c135..b94a1c3d 100644
--- a/nextflow.config
+++ b/nextflow.config
@@ -43,7 +43,7 @@ params {
     scvi_model                    = null
     scanvi_model                  = null
     scimilarity_model             = 'https://zenodo.org/records/10685499/files/model_v1.1.tar.gz'
-    harmony_reference             = null
+    symphony_reference             = null
     expimap_gmt                   = null
 
     // Extension options
diff --git a/nextflow_schema.json b/nextflow_schema.json
index 43b1d63f..762c1fc3 100644
--- a/nextflow_schema.json
+++ b/nextflow_schema.json
@@ -152,8 +152,8 @@
                     "type": "string",
                     "default": "scvi",
                     "description": "Specify the tool to use for integration",
-                    "help_text": "If you want to use multiple tools, separate them with a comma. Available methods are: scvi, scanvi, harmony, bbknn, combat, seurat, scimilarity, pca, expimap",
-                    "pattern": "^((scvi|scanvi|harmony|bbknn|combat|seurat|scimilarity|pca|expimap)(,(scvi|scanvi|harmony|bbknn|combat|seurat|scimilarity|pca|expimap))*)?$"
+                    "help_text": "If you want to use multiple tools, separate them with a comma. Available methods are: scvi, scanvi, symphony, bbknn, combat, seurat, scimilarity, pca, expimap",
+                    "pattern": "^((scvi|scanvi|symphony|bbknn|combat|seurat|scimilarity|pca|expimap)(,(scvi|scanvi|symphony|bbknn|combat|seurat|scimilarity|pca|expimap))*)?$"
                 },
                 "integration_hvgs": {
                     "type": "integer",
@@ -167,11 +167,11 @@
                     "description": "Optional file containing a list of gene symbols (one per line). If provided, these genes will be excluded from highly variable genes selection for integration.",
                     "exists": true
                 },
-                "harmony_reference": {
+                "symphony_reference": {
                     "type": "string",
                     "format": "file-path",
-                    "description": "Path to a Symphony reference AnnData, only relevant if Harmony is selected in `integration_methods`. If provided, query cells will be mapped onto this reference instead of running de novo Harmony integration.",
-                    "help_text": "The file should be in the .h5ad format. It is produced by a prior de novo Harmony run as `{outdir}/combine/integrate/harmony/harmony_reference.h5ad` and contains the compact Symphony reference metadata required for query mapping. Required for Harmony reference mapping and when extending an atlas with `--base_adata`.",
+                    "description": "Path to a Symphony reference AnnData, only relevant if Symphony is selected in `integration_methods`. If provided, query cells will be mapped onto this reference instead of running de novo Symphony integration.",
+                    "help_text": "The file should be in the .h5ad format. It is produced by a prior de novo Symphony run as `{outdir}/combine/integrate/symphony/symphony_reference.h5ad` and contains the compact Symphony reference metadata required for query mapping. Required for Symphony reference mapping and when extending an atlas with `--base_adata`.",
                     "pattern": "^\\S+\\.h5ad$",
                     "exists": true
                 },
@@ -236,7 +236,7 @@
                     "type": "string",
                     "description": "The keys in the obsm of the base AnnData object that contain the embeddings (without leading `X_`). Required if `input` is not provided - otherwise it is ignored.",
                     "help_text": "If the `input` parameter is not provided (no new data to add), integration will not be performed. In order to be able to utilize existing integration results, you need to provide the keys in the obsm of the base AnnData object that contain the embeddings (without leading `X_`).",
-                    "pattern": "^((scvi|scanvi|harmony|bbknn|combat|seurat)(,(scvi|scanvi|harmony|bbknn|combat|seurat))*)?$"
+                    "pattern": "^((scvi|scanvi|symphony|bbknn|combat|seurat)(,(scvi|scanvi|symphony|bbknn|combat|seurat))*)?$"
                 }
             }
         },
@@ -577,7 +577,7 @@
                     "type": "string",
                     "fa_icon": "far fa-check-circle",
                     "description": "Base URL or local path to location of pipeline test dataset files",
-                    "default": "https://raw.githubusercontent.com/nf-core/test-datasets/3ba0ba7174a5667fc2e005430594ffb063f986c7/",
+                    "default": "https://raw.githubusercontent.com/nf-core/test-datasets/e3a7f43eb802a090affac918026d2ba5dce8fcd5/",
                     "hidden": true
                 },
                 "trace_report_suffix": {
diff --git a/subworkflows/local/cluster/tests/main.nf.test b/subworkflows/local/cluster/tests/main.nf.test
index 7bf52994..34d39524 100644
--- a/subworkflows/local/cluster/tests/main.nf.test
+++ b/subworkflows/local/cluster/tests/main.nf.test
@@ -292,7 +292,7 @@ nextflow_workflow {
                 input[1] = false
                 input[2] = true
                 input[3] = ''
-                input[4] = [[integration: 'harmony', subset: null, resolution: null, analyses: null]]
+                input[4] = [[integration: 'symphony', subset: null, resolution: null, analyses: null]]
                 input[5] = ['0.5', '1']
                 input[6] = 'sample'
                 input[7] = 'X_scvi'
diff --git a/subworkflows/local/combine/main.nf b/subworkflows/local/combine/main.nf
index 80ec76c1..78c0d12a 100644
--- a/subworkflows/local/combine/main.nf
+++ b/subworkflows/local/combine/main.nf
@@ -17,7 +17,7 @@ workflow COMBINE {
     scvi_categorical_covariates //   value: string
     scvi_continuous_covariates  //   value: string
     scimilarity_model           //   value: string
-    harmony_reference           //   value: string
+    symphony_reference           //   value: string
     expimap_gmt                 //   value: string
     condition_col               //   value: string
     scib                        //   value: boolean
@@ -53,7 +53,7 @@ workflow COMBINE {
         scvi_categorical_covariates,
         scvi_continuous_covariates,
         scimilarity_model,
-        harmony_reference,
+        symphony_reference,
         expimap_gmt,
         condition_col
     )
diff --git a/subworkflows/local/integrate/main.nf b/subworkflows/local/integrate/main.nf
index 7af3e357..9d3d7e5c 100644
--- a/subworkflows/local/integrate/main.nf
+++ b/subworkflows/local/integrate/main.nf
@@ -24,7 +24,7 @@ workflow INTEGRATE {
     scvi_categorical_covariates // list of string
     scvi_continuous_covariates  // list of string
     scimilarity_model           // path
-    harmony_reference           // path
+    symphony_reference           // path
     expimap_gmt                 // path
     condition_col               // string
 
@@ -113,11 +113,11 @@ workflow INTEGRATE {
         ch_obsm = ch_obsm.mix(SCVITOOLS_SCANVI.out.obsm)
     }
 
-    if (methods.contains('harmony')) {
-        if (harmony_reference) {
+    if (methods.contains('symphony')) {
+        if (symphony_reference) {
             SYMPHONY_MAPEMBEDDING (
-                ch_h5ad.map { _meta, h5ad -> [[id: 'harmony'], h5ad] },
-                channel.value([[id: 'harmony'], harmony_reference]),
+                ch_h5ad.map { _meta, h5ad -> [[id: 'symphony'], h5ad] },
+                channel.value([[id: 'symphony'], symphony_reference]),
                 "batch",
                 "X"
             )
@@ -127,7 +127,7 @@ workflow INTEGRATE {
         }
         else {
             SYMPHONY_HARMONYINTEGRATE (
-                ch_h5ad_hvg.map { _meta, h5ad -> [[id: 'harmony'], h5ad] },
+                ch_h5ad_hvg.map { _meta, h5ad -> [[id: 'symphony'], h5ad] },
                 "batch",
                 "X"
             )
diff --git a/subworkflows/local/integrate/tests/main.nf.test b/subworkflows/local/integrate/tests/main.nf.test
index e01cd89a..9ee96c5d 100644
--- a/subworkflows/local/integrate/tests/main.nf.test
+++ b/subworkflows/local/integrate/tests/main.nf.test
@@ -7,7 +7,7 @@ nextflow_workflow {
     tag "subworkflows"
     tag "subworkflows_local"
 
-    test("Should run without failures - harmony - stub") {
+    test("Should run without failures - symphony - stub") {
 
         options '-stub'
 
@@ -24,7 +24,7 @@ nextflow_workflow {
                 input[1] = false
                 input[2] = 2000
                 input[3] = []
-                input[4] = ['harmony']
+                input[4] = ['symphony']
                 input[5] = null
                 input[6] = null
                 input[7] = []
@@ -44,7 +44,7 @@ nextflow_workflow {
 
     }
 
-    test("Should run without failures - harmony") {
+    test("Should run without failures - symphony") {
 
         when {
             params {
@@ -59,7 +59,7 @@ nextflow_workflow {
                 input[1] = false
                 input[2] = 2000
                 input[3] = []
-                input[4] = ['harmony']
+                input[4] = ['symphony']
                 input[5] = null
                 input[6] = null
                 input[7] = []
@@ -260,7 +260,7 @@ nextflow_workflow {
                 input[1] = true
                 input[2] = -1
                 input[3] = []
-                input[4] = ['harmony']
+                input[4] = ['symphony']
                 input[5] = null
                 input[6] = null
                 input[7] = []
diff --git a/subworkflows/local/utils_nfcore_scdownstream_pipeline/main.nf b/subworkflows/local/utils_nfcore_scdownstream_pipeline/main.nf
index 5f4ba6eb..9e0cff62 100644
--- a/subworkflows/local/utils_nfcore_scdownstream_pipeline/main.nf
+++ b/subworkflows/local/utils_nfcore_scdownstream_pipeline/main.nf
@@ -181,8 +181,8 @@ def validateInputParameters() {
     }
 
     def integration_methods = params.integration_methods.split(',').collect { it -> it.trim().toLowerCase() }
-    if (params.input && params.base_adata && (integration_methods - ['scvi', 'scanvi', 'scimilarity', 'harmony']).size() > 0) {
-        throw new Exception("Only scvi, scanvi, scimilarity and harmony integration methods are supported if base_adata is provided")
+    if (params.input && params.base_adata && (integration_methods - ['scvi', 'scanvi', 'scimilarity', 'symphony']).size() > 0) {
+        throw new Exception("Only scvi, scanvi, scimilarity and symphony integration methods are supported if base_adata is provided")
     }
 
     if (params.base_adata && 'scvi' in integration_methods && !params.scvi_model) {
@@ -197,8 +197,8 @@ def validateInputParameters() {
         throw new Exception("If base_adata is provided and scimilarity is used as integration method, scimilarity_model must be provided.")
     }
 
-    if (params.base_adata && 'harmony' in integration_methods && !params.harmony_reference) {
-        throw new Exception("If base_adata is provided and harmony is used as integration method, harmony_reference must be provided.")
+    if (params.base_adata && 'symphony' in integration_methods && !params.symphony_reference) {
+        throw new Exception("If base_adata is provided and symphony is used as integration method, symphony_reference must be provided.")
     }
 
     // Validate sample_n and sample_fraction parameters
diff --git a/tests/main_pipeline_build.nf.test b/tests/main_pipeline_build.nf.test
index f07b1ebb..1932054a 100644
--- a/tests/main_pipeline_build.nf.test
+++ b/tests/main_pipeline_build.nf.test
@@ -9,7 +9,7 @@ nextflow_pipeline {
         when {
             params {
                 input = 'https://github.com/nf-core/test-datasets/raw/refs/heads/scdownstream/samplesheet.csv'
-                integration_methods = 'scvi,harmony,bbknn,combat,seurat'
+                integration_methods = 'scvi,symphony,bbknn,combat,seurat'
                 doublet_detection   = 'scrublet,scdblfinder'
                 celltypist_model    = 'Adult_COVID19_PBMC'
                 integration_hvgs    = 500
diff --git a/workflows/scdownstream.nf b/workflows/scdownstream.nf
index 2528f2e7..3c99deae 100644
--- a/workflows/scdownstream.nf
+++ b/workflows/scdownstream.nf
@@ -59,7 +59,7 @@ workflow SCDOWNSTREAM {
     scvi_categorical_covariates   //   value: string
     scvi_continuous_covariates    //   value: string
     scimilarity_model             //   value: string
-    harmony_reference             //   value: string
+    symphony_reference             //   value: string
     expimap_gmt                   //   value: string
     skip_liana                    //   value: boolean
     skip_rankgenesgroups          //   value: boolean
@@ -179,7 +179,7 @@ workflow SCDOWNSTREAM {
                 scvi_categorical_covariates,
                 scvi_continuous_covariates,
                 scimilarity_model,
-                harmony_reference,
+                symphony_reference,
                 expimap_gmt,
                 condition_col,
                 scib,

From 1da576927061e24308cbb1dde2d07ca3dd293640 Mon Sep 17 00:00:00 2001
From: Nico Trummer <nictru32@gmail.com>
Date: Fri, 29 May 2026 19:23:12 +0200
Subject: [PATCH 08/19] Point pipeline tests at official nf-core test-datasets.

Use the upstream extension_base commit for reference mapping, extension, and sub-atlas tests instead of branch URLs or the temporary fork.
---
 nextflow.config                               |  2 +-
 tests/main_pipeline_extend.nf.test            | 13 +++++++------
 tests/main_pipeline_reference_mapping.nf.test | 11 ++++++-----
 tests/main_pipeline_sub.nf.test               |  5 +++--
 tests/nextflow.config                         |  2 +-
 5 files changed, 18 insertions(+), 15 deletions(-)

diff --git a/nextflow.config b/nextflow.config
index b94a1c3d..9e612096 100644
--- a/nextflow.config
+++ b/nextflow.config
@@ -113,7 +113,7 @@ params {
     help_full                    = false
     show_hidden                  = false
     version                      = false
-    pipelines_testdata_base_path = 'https://raw.githubusercontent.com/nf-core/test-datasets/3ba0ba7174a5667fc2e005430594ffb063f986c7/'
+    pipelines_testdata_base_path = 'https://raw.githubusercontent.com/nf-core/test-datasets/e3a7f43eb802a090affac918026d2ba5dce8fcd5/'
     trace_report_suffix          = new java.util.Date().format( 'yyyy-MM-dd_HH-mm-ss')
     // Config options
     config_profile_name           = null
diff --git a/tests/main_pipeline_extend.nf.test b/tests/main_pipeline_extend.nf.test
index 365d4b30..d549b821 100644
--- a/tests/main_pipeline_extend.nf.test
+++ b/tests/main_pipeline_extend.nf.test
@@ -4,19 +4,20 @@ nextflow_pipeline {
     script "main.nf"
     tag "pipeline"
 
-    test("Should perform scvi and harmony reference extension") {
+    test("Should perform scvi and symphony reference extension") {
 
         when {
             params {
-                input = 'https://github.com/nf-core/test-datasets/raw/refs/heads/scdownstream/samplesheet_single.csv'
-                integration_methods = 'scvi,harmony'
+                pipelines_testdata_base_path = 'https://raw.githubusercontent.com/nf-core/test-datasets/e3a7f43eb802a090affac918026d2ba5dce8fcd5/'
+                input = pipelines_testdata_base_path + 'samplesheet_single.csv'
+                integration_methods = 'scvi,symphony'
                 doublet_detection   = 'scrublet,scdblfinder'
                 celltypist_model    = 'Adult_COVID19_PBMC'
                 integration_hvgs    = 500
                 outdir              = "$outputDir"
-                scvi_model          = 'https://github.com/nf-core/test-datasets/raw/refs/heads/scdownstream/extension_base/model.pt'
-                harmony_reference   = 'https://github.com/nf-core/test-datasets/raw/refs/heads/scdownstream/extension_base/harmony_reference.h5ad'
-                base_adata          = 'https://github.com/nf-core/test-datasets/raw/refs/heads/scdownstream/extension_base/merged.h5ad'
+                scvi_model          = pipelines_testdata_base_path + 'extension_base/model.pt'
+                symphony_reference   = pipelines_testdata_base_path + 'extension_base/symphony_reference.h5ad'
+                base_adata          = pipelines_testdata_base_path + 'extension_base/merged.h5ad'
             }
         }
 
diff --git a/tests/main_pipeline_reference_mapping.nf.test b/tests/main_pipeline_reference_mapping.nf.test
index 93d1b10f..fdaa3411 100644
--- a/tests/main_pipeline_reference_mapping.nf.test
+++ b/tests/main_pipeline_reference_mapping.nf.test
@@ -4,18 +4,19 @@ nextflow_pipeline {
     script "main.nf"
     tag "pipeline"
 
-    test("Should perform scvi and harmony reference mapping") {
+    test("Should perform scvi and symphony reference mapping") {
 
         when {
             params {
-                input = 'https://github.com/nf-core/test-datasets/raw/refs/heads/scdownstream/samplesheet_single.csv'
-                integration_methods = 'scvi,harmony'
+                pipelines_testdata_base_path = 'https://raw.githubusercontent.com/nf-core/test-datasets/e3a7f43eb802a090affac918026d2ba5dce8fcd5/'
+                input = pipelines_testdata_base_path + 'samplesheet_single.csv'
+                integration_methods = 'scvi,symphony'
                 doublet_detection   = 'scrublet,scdblfinder'
                 celltypist_model    = 'Adult_COVID19_PBMC'
                 integration_hvgs    = 500
                 outdir              = "$outputDir"
-                scvi_model          = 'https://github.com/nf-core/test-datasets/raw/refs/heads/scdownstream/extension_base/model.pt'
-                harmony_reference   = 'https://github.com/nf-core/test-datasets/raw/refs/heads/scdownstream/extension_base/harmony_reference.h5ad'
+                scvi_model          = pipelines_testdata_base_path + 'extension_base/model.pt'
+                symphony_reference   = pipelines_testdata_base_path + 'extension_base/symphony_reference.h5ad'
             }
         }
 
diff --git a/tests/main_pipeline_sub.nf.test b/tests/main_pipeline_sub.nf.test
index 8ede02e5..5bfb2b79 100644
--- a/tests/main_pipeline_sub.nf.test
+++ b/tests/main_pipeline_sub.nf.test
@@ -8,10 +8,11 @@ nextflow_pipeline {
 
         when {
             params {
+                pipelines_testdata_base_path = 'https://raw.githubusercontent.com/nf-core/test-datasets/e3a7f43eb802a090affac918026d2ba5dce8fcd5/'
                 outdir              = "$outputDir"
                 input               = null
-                base_adata          = 'https://github.com/nf-core/test-datasets/raw/refs/heads/scdownstream/extension_base/merged.h5ad'
-                base_embeddings     = 'combat,harmony,scvi'
+                base_adata          = pipelines_testdata_base_path + 'extension_base/merged.h5ad'
+                base_embeddings     = 'symphony'
                 cluster_global      = false
                 cluster_per_label   = true
                 base_label_col      = 'sample'
diff --git a/tests/nextflow.config b/tests/nextflow.config
index aa29d8ba..71b5b6be 100644
--- a/tests/nextflow.config
+++ b/tests/nextflow.config
@@ -8,7 +8,7 @@
 // Or any resources requirements
 params {
     modules_testdata_base_path = 'https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/'
-    pipelines_testdata_base_path = 'https://raw.githubusercontent.com/nictru/test-datasets/97addfb0946c0e51dbb70ee1391142d12e70f085'
+    pipelines_testdata_base_path = 'https://raw.githubusercontent.com/nf-core/test-datasets/e3a7f43eb802a090affac918026d2ba5dce8fcd5/'
     // CyteType is slow (remote LLM API); module nf-tests cover it — keep off in pipeline/subworkflow nf-tests
     cytetype_study_context = ''
 }

From e3f2923666beb0a46ab1beddafccaddc443ef5d5 Mon Sep 17 00:00:00 2001
From: Nico Trummer <nictru32@gmail.com>
Date: Fri, 29 May 2026 19:23:20 +0200
Subject: [PATCH 09/19] Refresh nf-test snapshots after symphony rename and
 test-data update.

Regenerated module, subworkflow, and pipeline snapshots with real H5AD outputs from the new official test datasets.
---
 .../prepcellxgene/tests/main.nf.test.snap     | 212 +++++++++++++++---
 .../splitembeddings/tests/main.nf.test.snap   | 205 ++++++++++++++---
 .../mapembedding/tests/main.nf.test.snap      |  14 +-
 .../local/integrate/tests/main.nf.test.snap   | 184 +++++++--------
 tests/default.nf.test.snap                    |  34 +--
 tests/main_pipeline_build.nf.test.snap        |  34 +--
 tests/main_pipeline_extend.nf.test.snap       |  24 +-
 ...in_pipeline_reference_mapping.nf.test.snap |  24 +-
 tests/main_pipeline_sub.nf.test.snap          |  70 ++----
 9 files changed, 541 insertions(+), 260 deletions(-)

diff --git a/modules/local/adata/prepcellxgene/tests/main.nf.test.snap b/modules/local/adata/prepcellxgene/tests/main.nf.test.snap
index 58b78d2a..e2982530 100644
--- a/modules/local/adata/prepcellxgene/tests/main.nf.test.snap
+++ b/modules/local/adata/prepcellxgene/tests/main.nf.test.snap
@@ -40,7 +40,7 @@
                         {
                             "id": "test"
                         },
-                        "test.h5ad:md5,e962f73664186924dfe5269caed069bb"
+                        "test.h5ad:md5,e213f1b004bae37e440c83b3966890f3"
                     ]
                 ],
                 "1": [
@@ -51,7 +51,7 @@
                         {
                             "id": "test"
                         },
-                        "test.h5ad:md5,e962f73664186924dfe5269caed069bb"
+                        "test.h5ad:md5,e213f1b004bae37e440c83b3966890f3"
                     ]
                 ],
                 "versions": [
@@ -67,40 +67,193 @@
                 }
             },
             {
-                "n_obs": 23364,
+                "n_obs": 32135,
                 "n_vars": 9887,
                 "obs": {
                     "index": "_index",
                     "columns": [
+                        "G2M_score",
+                        "S_score",
                         "batch",
                         "bbknn-global-0.5:entropy",
                         "bbknn-global-0.5_leiden",
                         "bbknn-global-1.0:entropy",
                         "bbknn-global-1.0_leiden",
-                        "celltypist:Adult_Human_Skin",
-                        "celltypist:Adult_Human_Skin:conf",
+                        "celldex_hpca__2024.02.26_h5_se.tar.delta.next_hpca_direct",
+                        "celldex_hpca__2024.02.26_h5_se.tar.labels_hpca_direct",
+                        "celldex_hpca__2024.02.26_h5_se.tar.pruned.labels_hpca_direct",
+                        "celldex_hpca__2024.02.26_h5_se.tar.scores_hpca_direct.Astrocyte",
+                        "celldex_hpca__2024.02.26_h5_se.tar.scores_hpca_direct.BM",
+                        "celldex_hpca__2024.02.26_h5_se.tar.scores_hpca_direct.BM...Prog.",
+                        "celldex_hpca__2024.02.26_h5_se.tar.scores_hpca_direct.B_cell",
+                        "celldex_hpca__2024.02.26_h5_se.tar.scores_hpca_direct.CMP",
+                        "celldex_hpca__2024.02.26_h5_se.tar.scores_hpca_direct.Chondrocytes",
+                        "celldex_hpca__2024.02.26_h5_se.tar.scores_hpca_direct.DC",
+                        "celldex_hpca__2024.02.26_h5_se.tar.scores_hpca_direct.Embryonic_stem_cells",
+                        "celldex_hpca__2024.02.26_h5_se.tar.scores_hpca_direct.Endothelial_cells",
+                        "celldex_hpca__2024.02.26_h5_se.tar.scores_hpca_direct.Epithelial_cells",
+                        "celldex_hpca__2024.02.26_h5_se.tar.scores_hpca_direct.Erythroblast",
+                        "celldex_hpca__2024.02.26_h5_se.tar.scores_hpca_direct.Fibroblasts",
+                        "celldex_hpca__2024.02.26_h5_se.tar.scores_hpca_direct.GMP",
+                        "celldex_hpca__2024.02.26_h5_se.tar.scores_hpca_direct.Gametocytes",
+                        "celldex_hpca__2024.02.26_h5_se.tar.scores_hpca_direct.HSC_.G.CSF",
+                        "celldex_hpca__2024.02.26_h5_se.tar.scores_hpca_direct.HSC_CD34.",
+                        "celldex_hpca__2024.02.26_h5_se.tar.scores_hpca_direct.Hepatocytes",
+                        "celldex_hpca__2024.02.26_h5_se.tar.scores_hpca_direct.Keratinocytes",
+                        "celldex_hpca__2024.02.26_h5_se.tar.scores_hpca_direct.MEP",
+                        "celldex_hpca__2024.02.26_h5_se.tar.scores_hpca_direct.MSC",
+                        "celldex_hpca__2024.02.26_h5_se.tar.scores_hpca_direct.Macrophage",
+                        "celldex_hpca__2024.02.26_h5_se.tar.scores_hpca_direct.Monocyte",
+                        "celldex_hpca__2024.02.26_h5_se.tar.scores_hpca_direct.Myelocyte",
+                        "celldex_hpca__2024.02.26_h5_se.tar.scores_hpca_direct.NK_cell",
+                        "celldex_hpca__2024.02.26_h5_se.tar.scores_hpca_direct.Neuroepithelial_cell",
+                        "celldex_hpca__2024.02.26_h5_se.tar.scores_hpca_direct.Neurons",
+                        "celldex_hpca__2024.02.26_h5_se.tar.scores_hpca_direct.Neutrophils",
+                        "celldex_hpca__2024.02.26_h5_se.tar.scores_hpca_direct.Osteoblasts",
+                        "celldex_hpca__2024.02.26_h5_se.tar.scores_hpca_direct.Platelets",
+                        "celldex_hpca__2024.02.26_h5_se.tar.scores_hpca_direct.Pre.B_cell_CD34.",
+                        "celldex_hpca__2024.02.26_h5_se.tar.scores_hpca_direct.Pro.B_cell_CD34.",
+                        "celldex_hpca__2024.02.26_h5_se.tar.scores_hpca_direct.Pro.Myelocyte",
+                        "celldex_hpca__2024.02.26_h5_se.tar.scores_hpca_direct.Smooth_muscle_cells",
+                        "celldex_hpca__2024.02.26_h5_se.tar.scores_hpca_direct.T_cells",
+                        "celldex_hpca__2024.02.26_h5_se.tar.scores_hpca_direct.Tissue_stem_cells",
+                        "celldex_hpca__2024.02.26_h5_se.tar.scores_hpca_direct.iPS_cells",
+                        "celldex_monaco_immune__2024.02.26_h5_se.tar.delta.next_immune_direct",
+                        "celldex_monaco_immune__2024.02.26_h5_se.tar.labels_immune_direct",
+                        "celldex_monaco_immune__2024.02.26_h5_se.tar.pruned.labels_immune_direct",
+                        "celldex_monaco_immune__2024.02.26_h5_se.tar.scores_immune_direct.Central.memory.CD8.T.cells",
+                        "celldex_monaco_immune__2024.02.26_h5_se.tar.scores_immune_direct.Classical.monocytes",
+                        "celldex_monaco_immune__2024.02.26_h5_se.tar.scores_immune_direct.Effector.memory.CD8.T.cells",
+                        "celldex_monaco_immune__2024.02.26_h5_se.tar.scores_immune_direct.Exhausted.B.cells",
+                        "celldex_monaco_immune__2024.02.26_h5_se.tar.scores_immune_direct.Follicular.helper.T.cells",
+                        "celldex_monaco_immune__2024.02.26_h5_se.tar.scores_immune_direct.Intermediate.monocytes",
+                        "celldex_monaco_immune__2024.02.26_h5_se.tar.scores_immune_direct.Low.density.basophils",
+                        "celldex_monaco_immune__2024.02.26_h5_se.tar.scores_immune_direct.Low.density.neutrophils",
+                        "celldex_monaco_immune__2024.02.26_h5_se.tar.scores_immune_direct.MAIT.cells",
+                        "celldex_monaco_immune__2024.02.26_h5_se.tar.scores_immune_direct.Myeloid.dendritic.cells",
+                        "celldex_monaco_immune__2024.02.26_h5_se.tar.scores_immune_direct.Naive.B.cells",
+                        "celldex_monaco_immune__2024.02.26_h5_se.tar.scores_immune_direct.Naive.CD4.T.cells",
+                        "celldex_monaco_immune__2024.02.26_h5_se.tar.scores_immune_direct.Naive.CD8.T.cells",
+                        "celldex_monaco_immune__2024.02.26_h5_se.tar.scores_immune_direct.Natural.killer.cells",
+                        "celldex_monaco_immune__2024.02.26_h5_se.tar.scores_immune_direct.Non.Vd2.gd.T.cells",
+                        "celldex_monaco_immune__2024.02.26_h5_se.tar.scores_immune_direct.Non.classical.monocytes",
+                        "celldex_monaco_immune__2024.02.26_h5_se.tar.scores_immune_direct.Non.switched.memory.B.cells",
+                        "celldex_monaco_immune__2024.02.26_h5_se.tar.scores_immune_direct.Plasmablasts",
+                        "celldex_monaco_immune__2024.02.26_h5_se.tar.scores_immune_direct.Plasmacytoid.dendritic.cells",
+                        "celldex_monaco_immune__2024.02.26_h5_se.tar.scores_immune_direct.Progenitor.cells",
+                        "celldex_monaco_immune__2024.02.26_h5_se.tar.scores_immune_direct.Switched.memory.B.cells",
+                        "celldex_monaco_immune__2024.02.26_h5_se.tar.scores_immune_direct.T.regulatory.cells",
+                        "celldex_monaco_immune__2024.02.26_h5_se.tar.scores_immune_direct.Terminal.effector.CD4.T.cells",
+                        "celldex_monaco_immune__2024.02.26_h5_se.tar.scores_immune_direct.Terminal.effector.CD8.T.cells",
+                        "celldex_monaco_immune__2024.02.26_h5_se.tar.scores_immune_direct.Th1.Th17.cells",
+                        "celldex_monaco_immune__2024.02.26_h5_se.tar.scores_immune_direct.Th1.cells",
+                        "celldex_monaco_immune__2024.02.26_h5_se.tar.scores_immune_direct.Th17.cells",
+                        "celldex_monaco_immune__2024.02.26_h5_se.tar.scores_immune_direct.Th2.cells",
+                        "celldex_monaco_immune__2024.02.26_h5_se.tar.scores_immune_direct.Vd2.gd.T.cells",
+                        "celltypist:Adult_COVID19_PBMC",
+                        "celltypist:Adult_COVID19_PBMC:conf",
                         "combat-global-0.5:entropy",
                         "combat-global-0.5_leiden",
                         "combat-global-1.0:entropy",
                         "combat-global-1.0_leiden",
                         "condition",
-                        "harmony-global-0.5:entropy",
-                        "harmony-global-0.5_leiden",
-                        "harmony-global-1.0:entropy",
-                        "harmony-global-1.0_leiden",
+                        "hpca_celldex.tar.delta.next_hpca_celldex",
+                        "hpca_celldex.tar.labels_hpca_celldex",
+                        "hpca_celldex.tar.pruned.labels_hpca_celldex",
+                        "hpca_celldex.tar.scores_hpca_celldex.Astrocyte",
+                        "hpca_celldex.tar.scores_hpca_celldex.BM",
+                        "hpca_celldex.tar.scores_hpca_celldex.BM...Prog.",
+                        "hpca_celldex.tar.scores_hpca_celldex.B_cell",
+                        "hpca_celldex.tar.scores_hpca_celldex.CMP",
+                        "hpca_celldex.tar.scores_hpca_celldex.Chondrocytes",
+                        "hpca_celldex.tar.scores_hpca_celldex.DC",
+                        "hpca_celldex.tar.scores_hpca_celldex.Embryonic_stem_cells",
+                        "hpca_celldex.tar.scores_hpca_celldex.Endothelial_cells",
+                        "hpca_celldex.tar.scores_hpca_celldex.Epithelial_cells",
+                        "hpca_celldex.tar.scores_hpca_celldex.Erythroblast",
+                        "hpca_celldex.tar.scores_hpca_celldex.Fibroblasts",
+                        "hpca_celldex.tar.scores_hpca_celldex.GMP",
+                        "hpca_celldex.tar.scores_hpca_celldex.Gametocytes",
+                        "hpca_celldex.tar.scores_hpca_celldex.HSC_.G.CSF",
+                        "hpca_celldex.tar.scores_hpca_celldex.HSC_CD34.",
+                        "hpca_celldex.tar.scores_hpca_celldex.Hepatocytes",
+                        "hpca_celldex.tar.scores_hpca_celldex.Keratinocytes",
+                        "hpca_celldex.tar.scores_hpca_celldex.MEP",
+                        "hpca_celldex.tar.scores_hpca_celldex.MSC",
+                        "hpca_celldex.tar.scores_hpca_celldex.Macrophage",
+                        "hpca_celldex.tar.scores_hpca_celldex.Monocyte",
+                        "hpca_celldex.tar.scores_hpca_celldex.Myelocyte",
+                        "hpca_celldex.tar.scores_hpca_celldex.NK_cell",
+                        "hpca_celldex.tar.scores_hpca_celldex.Neuroepithelial_cell",
+                        "hpca_celldex.tar.scores_hpca_celldex.Neurons",
+                        "hpca_celldex.tar.scores_hpca_celldex.Neutrophils",
+                        "hpca_celldex.tar.scores_hpca_celldex.Osteoblasts",
+                        "hpca_celldex.tar.scores_hpca_celldex.Platelets",
+                        "hpca_celldex.tar.scores_hpca_celldex.Pre.B_cell_CD34.",
+                        "hpca_celldex.tar.scores_hpca_celldex.Pro.B_cell_CD34.",
+                        "hpca_celldex.tar.scores_hpca_celldex.Pro.Myelocyte",
+                        "hpca_celldex.tar.scores_hpca_celldex.Smooth_muscle_cells",
+                        "hpca_celldex.tar.scores_hpca_celldex.T_cells",
+                        "hpca_celldex.tar.scores_hpca_celldex.Tissue_stem_cells",
+                        "hpca_celldex.tar.scores_hpca_celldex.iPS_cells",
+                        "immune_celldex.tar.delta.next_immune_celldex",
+                        "immune_celldex.tar.labels_immune_celldex",
+                        "immune_celldex.tar.pruned.labels_immune_celldex",
+                        "immune_celldex.tar.scores_immune_celldex.Central.memory.CD8.T.cells",
+                        "immune_celldex.tar.scores_immune_celldex.Classical.monocytes",
+                        "immune_celldex.tar.scores_immune_celldex.Effector.memory.CD8.T.cells",
+                        "immune_celldex.tar.scores_immune_celldex.Exhausted.B.cells",
+                        "immune_celldex.tar.scores_immune_celldex.Follicular.helper.T.cells",
+                        "immune_celldex.tar.scores_immune_celldex.Intermediate.monocytes",
+                        "immune_celldex.tar.scores_immune_celldex.Low.density.basophils",
+                        "immune_celldex.tar.scores_immune_celldex.Low.density.neutrophils",
+                        "immune_celldex.tar.scores_immune_celldex.MAIT.cells",
+                        "immune_celldex.tar.scores_immune_celldex.Myeloid.dendritic.cells",
+                        "immune_celldex.tar.scores_immune_celldex.Naive.B.cells",
+                        "immune_celldex.tar.scores_immune_celldex.Naive.CD4.T.cells",
+                        "immune_celldex.tar.scores_immune_celldex.Naive.CD8.T.cells",
+                        "immune_celldex.tar.scores_immune_celldex.Natural.killer.cells",
+                        "immune_celldex.tar.scores_immune_celldex.Non.Vd2.gd.T.cells",
+                        "immune_celldex.tar.scores_immune_celldex.Non.classical.monocytes",
+                        "immune_celldex.tar.scores_immune_celldex.Non.switched.memory.B.cells",
+                        "immune_celldex.tar.scores_immune_celldex.Plasmablasts",
+                        "immune_celldex.tar.scores_immune_celldex.Plasmacytoid.dendritic.cells",
+                        "immune_celldex.tar.scores_immune_celldex.Progenitor.cells",
+                        "immune_celldex.tar.scores_immune_celldex.Switched.memory.B.cells",
+                        "immune_celldex.tar.scores_immune_celldex.T.regulatory.cells",
+                        "immune_celldex.tar.scores_immune_celldex.Terminal.effector.CD4.T.cells",
+                        "immune_celldex.tar.scores_immune_celldex.Terminal.effector.CD8.T.cells",
+                        "immune_celldex.tar.scores_immune_celldex.Th1.Th17.cells",
+                        "immune_celldex.tar.scores_immune_celldex.Th1.cells",
+                        "immune_celldex.tar.scores_immune_celldex.Th17.cells",
+                        "immune_celldex.tar.scores_immune_celldex.Th2.cells",
+                        "immune_celldex.tar.scores_immune_celldex.Vd2.gd.T.cells",
                         "label",
                         "n_counts",
                         "n_genes",
                         "n_genes_by_counts",
+                        "pct_counts_hb",
                         "pct_counts_mt",
+                        "pct_counts_ribo",
+                        "phase",
                         "sample",
                         "sample_original",
                         "scvi-global-0.5:entropy",
                         "scvi-global-0.5_leiden",
                         "scvi-global-1.0:entropy",
                         "scvi-global-1.0_leiden",
+                        "seurat-global-0.5:entropy",
+                        "seurat-global-0.5_leiden",
+                        "seurat-global-1.0:entropy",
+                        "seurat-global-1.0_leiden",
+                        "symphony-global-0.5:entropy",
+                        "symphony-global-0.5_leiden",
+                        "symphony-global-1.0:entropy",
+                        "symphony-global-1.0_leiden",
                         "total_counts",
-                        "total_counts_mt"
+                        "total_counts_hb",
+                        "total_counts_mt",
+                        "total_counts_ribo"
                     ]
                 },
                 "var": {
@@ -115,11 +268,12 @@
                 "obsm": [
                     "X_bbknn-global_umap",
                     "X_combat-global_umap",
-                    "X_harmony-global_umap",
                     "X_scvi-global_umap",
+                    "X_seurat-global_umap",
+                    "X_symphony-global_umap",
                     "combat",
-                    "harmony",
-                    "scvi"
+                    "scvi",
+                    "symphony"
                 ],
                 "varm": [
                     
@@ -131,32 +285,34 @@
                     
                 ],
                 "uns": [
-                    "bbknn-global-0.5_characteristic_genes",
+                    "bbknn-global-0.5_leiden_characteristic_genes",
                     "bbknn-global-0.5_paga",
-                    "bbknn-global-1.0_characteristic_genes",
+                    "bbknn-global-1.0_leiden_characteristic_genes",
                     "bbknn-global-1.0_paga",
-                    "combat-global-0.5_characteristic_genes",
-                    "combat-global-0.5_liana",
+                    "combat-global-0.5_leiden_characteristic_genes",
                     "combat-global-0.5_paga",
-                    "combat-global-1.0_characteristic_genes",
-                    "combat-global-1.0_liana",
+                    "combat-global-1.0_leiden_characteristic_genes",
                     "combat-global-1.0_paga",
-                    "harmony-global-0.5_characteristic_genes",
-                    "harmony-global-0.5_paga",
-                    "harmony-global-1.0_characteristic_genes",
-                    "harmony-global-1.0_paga",
                     "log1p",
-                    "scvi-global-0.5_characteristic_genes",
+                    "scvi-global-0.5_leiden_characteristic_genes",
                     "scvi-global-0.5_paga",
-                    "scvi-global-1.0_characteristic_genes",
-                    "scvi-global-1.0_paga"
+                    "scvi-global-1.0_leiden_characteristic_genes",
+                    "scvi-global-1.0_paga",
+                    "seurat-global-0.5_leiden_characteristic_genes",
+                    "seurat-global-0.5_paga",
+                    "seurat-global-1.0_leiden_characteristic_genes",
+                    "seurat-global-1.0_paga",
+                    "symphony-global-0.5_leiden_characteristic_genes",
+                    "symphony-global-0.5_paga",
+                    "symphony-global-1.0_leiden_characteristic_genes",
+                    "symphony-global-1.0_paga"
                 ]
             }
         ],
-        "timestamp": "2026-03-29T12:57:46.020211425",
+        "timestamp": "2026-05-29T11:36:58.839746387",
         "meta": {
             "nf-test": "0.9.4",
-            "nextflow": "25.10.2"
+            "nextflow": "26.04.0"
         }
     }
 }
\ No newline at end of file
diff --git a/modules/local/adata/splitembeddings/tests/main.nf.test.snap b/modules/local/adata/splitembeddings/tests/main.nf.test.snap
index 911f0c48..1da9eb04 100644
--- a/modules/local/adata/splitembeddings/tests/main.nf.test.snap
+++ b/modules/local/adata/splitembeddings/tests/main.nf.test.snap
@@ -40,7 +40,7 @@
                         {
                             "id": "test"
                         },
-                        "scvi.h5ad:md5,41c46e638fbd817665eca0ce1921585e"
+                        "scvi.h5ad:md5,70a63fb030713420c635aab523701691"
                     ]
                 ],
                 "1": [
@@ -51,7 +51,7 @@
                         {
                             "id": "test"
                         },
-                        "scvi.h5ad:md5,41c46e638fbd817665eca0ce1921585e"
+                        "scvi.h5ad:md5,70a63fb030713420c635aab523701691"
                     ]
                 ],
                 "versions": [
@@ -65,40 +65,193 @@
                 }
             },
             {
-                "n_obs": 23364,
+                "n_obs": 32135,
                 "n_vars": 9887,
                 "obs": {
                     "index": "_index",
                     "columns": [
+                        "G2M_score",
+                        "S_score",
                         "batch",
                         "bbknn-global-0.5:entropy",
                         "bbknn-global-0.5_leiden",
                         "bbknn-global-1.0:entropy",
                         "bbknn-global-1.0_leiden",
-                        "celltypist:Adult_Human_Skin",
-                        "celltypist:Adult_Human_Skin:conf",
+                        "celldex_hpca__2024.02.26_h5_se.tar.delta.next_hpca_direct",
+                        "celldex_hpca__2024.02.26_h5_se.tar.labels_hpca_direct",
+                        "celldex_hpca__2024.02.26_h5_se.tar.pruned.labels_hpca_direct",
+                        "celldex_hpca__2024.02.26_h5_se.tar.scores_hpca_direct.Astrocyte",
+                        "celldex_hpca__2024.02.26_h5_se.tar.scores_hpca_direct.BM",
+                        "celldex_hpca__2024.02.26_h5_se.tar.scores_hpca_direct.BM...Prog.",
+                        "celldex_hpca__2024.02.26_h5_se.tar.scores_hpca_direct.B_cell",
+                        "celldex_hpca__2024.02.26_h5_se.tar.scores_hpca_direct.CMP",
+                        "celldex_hpca__2024.02.26_h5_se.tar.scores_hpca_direct.Chondrocytes",
+                        "celldex_hpca__2024.02.26_h5_se.tar.scores_hpca_direct.DC",
+                        "celldex_hpca__2024.02.26_h5_se.tar.scores_hpca_direct.Embryonic_stem_cells",
+                        "celldex_hpca__2024.02.26_h5_se.tar.scores_hpca_direct.Endothelial_cells",
+                        "celldex_hpca__2024.02.26_h5_se.tar.scores_hpca_direct.Epithelial_cells",
+                        "celldex_hpca__2024.02.26_h5_se.tar.scores_hpca_direct.Erythroblast",
+                        "celldex_hpca__2024.02.26_h5_se.tar.scores_hpca_direct.Fibroblasts",
+                        "celldex_hpca__2024.02.26_h5_se.tar.scores_hpca_direct.GMP",
+                        "celldex_hpca__2024.02.26_h5_se.tar.scores_hpca_direct.Gametocytes",
+                        "celldex_hpca__2024.02.26_h5_se.tar.scores_hpca_direct.HSC_.G.CSF",
+                        "celldex_hpca__2024.02.26_h5_se.tar.scores_hpca_direct.HSC_CD34.",
+                        "celldex_hpca__2024.02.26_h5_se.tar.scores_hpca_direct.Hepatocytes",
+                        "celldex_hpca__2024.02.26_h5_se.tar.scores_hpca_direct.Keratinocytes",
+                        "celldex_hpca__2024.02.26_h5_se.tar.scores_hpca_direct.MEP",
+                        "celldex_hpca__2024.02.26_h5_se.tar.scores_hpca_direct.MSC",
+                        "celldex_hpca__2024.02.26_h5_se.tar.scores_hpca_direct.Macrophage",
+                        "celldex_hpca__2024.02.26_h5_se.tar.scores_hpca_direct.Monocyte",
+                        "celldex_hpca__2024.02.26_h5_se.tar.scores_hpca_direct.Myelocyte",
+                        "celldex_hpca__2024.02.26_h5_se.tar.scores_hpca_direct.NK_cell",
+                        "celldex_hpca__2024.02.26_h5_se.tar.scores_hpca_direct.Neuroepithelial_cell",
+                        "celldex_hpca__2024.02.26_h5_se.tar.scores_hpca_direct.Neurons",
+                        "celldex_hpca__2024.02.26_h5_se.tar.scores_hpca_direct.Neutrophils",
+                        "celldex_hpca__2024.02.26_h5_se.tar.scores_hpca_direct.Osteoblasts",
+                        "celldex_hpca__2024.02.26_h5_se.tar.scores_hpca_direct.Platelets",
+                        "celldex_hpca__2024.02.26_h5_se.tar.scores_hpca_direct.Pre.B_cell_CD34.",
+                        "celldex_hpca__2024.02.26_h5_se.tar.scores_hpca_direct.Pro.B_cell_CD34.",
+                        "celldex_hpca__2024.02.26_h5_se.tar.scores_hpca_direct.Pro.Myelocyte",
+                        "celldex_hpca__2024.02.26_h5_se.tar.scores_hpca_direct.Smooth_muscle_cells",
+                        "celldex_hpca__2024.02.26_h5_se.tar.scores_hpca_direct.T_cells",
+                        "celldex_hpca__2024.02.26_h5_se.tar.scores_hpca_direct.Tissue_stem_cells",
+                        "celldex_hpca__2024.02.26_h5_se.tar.scores_hpca_direct.iPS_cells",
+                        "celldex_monaco_immune__2024.02.26_h5_se.tar.delta.next_immune_direct",
+                        "celldex_monaco_immune__2024.02.26_h5_se.tar.labels_immune_direct",
+                        "celldex_monaco_immune__2024.02.26_h5_se.tar.pruned.labels_immune_direct",
+                        "celldex_monaco_immune__2024.02.26_h5_se.tar.scores_immune_direct.Central.memory.CD8.T.cells",
+                        "celldex_monaco_immune__2024.02.26_h5_se.tar.scores_immune_direct.Classical.monocytes",
+                        "celldex_monaco_immune__2024.02.26_h5_se.tar.scores_immune_direct.Effector.memory.CD8.T.cells",
+                        "celldex_monaco_immune__2024.02.26_h5_se.tar.scores_immune_direct.Exhausted.B.cells",
+                        "celldex_monaco_immune__2024.02.26_h5_se.tar.scores_immune_direct.Follicular.helper.T.cells",
+                        "celldex_monaco_immune__2024.02.26_h5_se.tar.scores_immune_direct.Intermediate.monocytes",
+                        "celldex_monaco_immune__2024.02.26_h5_se.tar.scores_immune_direct.Low.density.basophils",
+                        "celldex_monaco_immune__2024.02.26_h5_se.tar.scores_immune_direct.Low.density.neutrophils",
+                        "celldex_monaco_immune__2024.02.26_h5_se.tar.scores_immune_direct.MAIT.cells",
+                        "celldex_monaco_immune__2024.02.26_h5_se.tar.scores_immune_direct.Myeloid.dendritic.cells",
+                        "celldex_monaco_immune__2024.02.26_h5_se.tar.scores_immune_direct.Naive.B.cells",
+                        "celldex_monaco_immune__2024.02.26_h5_se.tar.scores_immune_direct.Naive.CD4.T.cells",
+                        "celldex_monaco_immune__2024.02.26_h5_se.tar.scores_immune_direct.Naive.CD8.T.cells",
+                        "celldex_monaco_immune__2024.02.26_h5_se.tar.scores_immune_direct.Natural.killer.cells",
+                        "celldex_monaco_immune__2024.02.26_h5_se.tar.scores_immune_direct.Non.Vd2.gd.T.cells",
+                        "celldex_monaco_immune__2024.02.26_h5_se.tar.scores_immune_direct.Non.classical.monocytes",
+                        "celldex_monaco_immune__2024.02.26_h5_se.tar.scores_immune_direct.Non.switched.memory.B.cells",
+                        "celldex_monaco_immune__2024.02.26_h5_se.tar.scores_immune_direct.Plasmablasts",
+                        "celldex_monaco_immune__2024.02.26_h5_se.tar.scores_immune_direct.Plasmacytoid.dendritic.cells",
+                        "celldex_monaco_immune__2024.02.26_h5_se.tar.scores_immune_direct.Progenitor.cells",
+                        "celldex_monaco_immune__2024.02.26_h5_se.tar.scores_immune_direct.Switched.memory.B.cells",
+                        "celldex_monaco_immune__2024.02.26_h5_se.tar.scores_immune_direct.T.regulatory.cells",
+                        "celldex_monaco_immune__2024.02.26_h5_se.tar.scores_immune_direct.Terminal.effector.CD4.T.cells",
+                        "celldex_monaco_immune__2024.02.26_h5_se.tar.scores_immune_direct.Terminal.effector.CD8.T.cells",
+                        "celldex_monaco_immune__2024.02.26_h5_se.tar.scores_immune_direct.Th1.Th17.cells",
+                        "celldex_monaco_immune__2024.02.26_h5_se.tar.scores_immune_direct.Th1.cells",
+                        "celldex_monaco_immune__2024.02.26_h5_se.tar.scores_immune_direct.Th17.cells",
+                        "celldex_monaco_immune__2024.02.26_h5_se.tar.scores_immune_direct.Th2.cells",
+                        "celldex_monaco_immune__2024.02.26_h5_se.tar.scores_immune_direct.Vd2.gd.T.cells",
+                        "celltypist:Adult_COVID19_PBMC",
+                        "celltypist:Adult_COVID19_PBMC:conf",
                         "combat-global-0.5:entropy",
                         "combat-global-0.5_leiden",
                         "combat-global-1.0:entropy",
                         "combat-global-1.0_leiden",
                         "condition",
-                        "harmony-global-0.5:entropy",
-                        "harmony-global-0.5_leiden",
-                        "harmony-global-1.0:entropy",
-                        "harmony-global-1.0_leiden",
+                        "hpca_celldex.tar.delta.next_hpca_celldex",
+                        "hpca_celldex.tar.labels_hpca_celldex",
+                        "hpca_celldex.tar.pruned.labels_hpca_celldex",
+                        "hpca_celldex.tar.scores_hpca_celldex.Astrocyte",
+                        "hpca_celldex.tar.scores_hpca_celldex.BM",
+                        "hpca_celldex.tar.scores_hpca_celldex.BM...Prog.",
+                        "hpca_celldex.tar.scores_hpca_celldex.B_cell",
+                        "hpca_celldex.tar.scores_hpca_celldex.CMP",
+                        "hpca_celldex.tar.scores_hpca_celldex.Chondrocytes",
+                        "hpca_celldex.tar.scores_hpca_celldex.DC",
+                        "hpca_celldex.tar.scores_hpca_celldex.Embryonic_stem_cells",
+                        "hpca_celldex.tar.scores_hpca_celldex.Endothelial_cells",
+                        "hpca_celldex.tar.scores_hpca_celldex.Epithelial_cells",
+                        "hpca_celldex.tar.scores_hpca_celldex.Erythroblast",
+                        "hpca_celldex.tar.scores_hpca_celldex.Fibroblasts",
+                        "hpca_celldex.tar.scores_hpca_celldex.GMP",
+                        "hpca_celldex.tar.scores_hpca_celldex.Gametocytes",
+                        "hpca_celldex.tar.scores_hpca_celldex.HSC_.G.CSF",
+                        "hpca_celldex.tar.scores_hpca_celldex.HSC_CD34.",
+                        "hpca_celldex.tar.scores_hpca_celldex.Hepatocytes",
+                        "hpca_celldex.tar.scores_hpca_celldex.Keratinocytes",
+                        "hpca_celldex.tar.scores_hpca_celldex.MEP",
+                        "hpca_celldex.tar.scores_hpca_celldex.MSC",
+                        "hpca_celldex.tar.scores_hpca_celldex.Macrophage",
+                        "hpca_celldex.tar.scores_hpca_celldex.Monocyte",
+                        "hpca_celldex.tar.scores_hpca_celldex.Myelocyte",
+                        "hpca_celldex.tar.scores_hpca_celldex.NK_cell",
+                        "hpca_celldex.tar.scores_hpca_celldex.Neuroepithelial_cell",
+                        "hpca_celldex.tar.scores_hpca_celldex.Neurons",
+                        "hpca_celldex.tar.scores_hpca_celldex.Neutrophils",
+                        "hpca_celldex.tar.scores_hpca_celldex.Osteoblasts",
+                        "hpca_celldex.tar.scores_hpca_celldex.Platelets",
+                        "hpca_celldex.tar.scores_hpca_celldex.Pre.B_cell_CD34.",
+                        "hpca_celldex.tar.scores_hpca_celldex.Pro.B_cell_CD34.",
+                        "hpca_celldex.tar.scores_hpca_celldex.Pro.Myelocyte",
+                        "hpca_celldex.tar.scores_hpca_celldex.Smooth_muscle_cells",
+                        "hpca_celldex.tar.scores_hpca_celldex.T_cells",
+                        "hpca_celldex.tar.scores_hpca_celldex.Tissue_stem_cells",
+                        "hpca_celldex.tar.scores_hpca_celldex.iPS_cells",
+                        "immune_celldex.tar.delta.next_immune_celldex",
+                        "immune_celldex.tar.labels_immune_celldex",
+                        "immune_celldex.tar.pruned.labels_immune_celldex",
+                        "immune_celldex.tar.scores_immune_celldex.Central.memory.CD8.T.cells",
+                        "immune_celldex.tar.scores_immune_celldex.Classical.monocytes",
+                        "immune_celldex.tar.scores_immune_celldex.Effector.memory.CD8.T.cells",
+                        "immune_celldex.tar.scores_immune_celldex.Exhausted.B.cells",
+                        "immune_celldex.tar.scores_immune_celldex.Follicular.helper.T.cells",
+                        "immune_celldex.tar.scores_immune_celldex.Intermediate.monocytes",
+                        "immune_celldex.tar.scores_immune_celldex.Low.density.basophils",
+                        "immune_celldex.tar.scores_immune_celldex.Low.density.neutrophils",
+                        "immune_celldex.tar.scores_immune_celldex.MAIT.cells",
+                        "immune_celldex.tar.scores_immune_celldex.Myeloid.dendritic.cells",
+                        "immune_celldex.tar.scores_immune_celldex.Naive.B.cells",
+                        "immune_celldex.tar.scores_immune_celldex.Naive.CD4.T.cells",
+                        "immune_celldex.tar.scores_immune_celldex.Naive.CD8.T.cells",
+                        "immune_celldex.tar.scores_immune_celldex.Natural.killer.cells",
+                        "immune_celldex.tar.scores_immune_celldex.Non.Vd2.gd.T.cells",
+                        "immune_celldex.tar.scores_immune_celldex.Non.classical.monocytes",
+                        "immune_celldex.tar.scores_immune_celldex.Non.switched.memory.B.cells",
+                        "immune_celldex.tar.scores_immune_celldex.Plasmablasts",
+                        "immune_celldex.tar.scores_immune_celldex.Plasmacytoid.dendritic.cells",
+                        "immune_celldex.tar.scores_immune_celldex.Progenitor.cells",
+                        "immune_celldex.tar.scores_immune_celldex.Switched.memory.B.cells",
+                        "immune_celldex.tar.scores_immune_celldex.T.regulatory.cells",
+                        "immune_celldex.tar.scores_immune_celldex.Terminal.effector.CD4.T.cells",
+                        "immune_celldex.tar.scores_immune_celldex.Terminal.effector.CD8.T.cells",
+                        "immune_celldex.tar.scores_immune_celldex.Th1.Th17.cells",
+                        "immune_celldex.tar.scores_immune_celldex.Th1.cells",
+                        "immune_celldex.tar.scores_immune_celldex.Th17.cells",
+                        "immune_celldex.tar.scores_immune_celldex.Th2.cells",
+                        "immune_celldex.tar.scores_immune_celldex.Vd2.gd.T.cells",
                         "label",
                         "n_counts",
                         "n_genes",
                         "n_genes_by_counts",
+                        "pct_counts_hb",
                         "pct_counts_mt",
+                        "pct_counts_ribo",
+                        "phase",
                         "sample",
                         "sample_original",
                         "scvi-global-0.5:entropy",
                         "scvi-global-0.5_leiden",
                         "scvi-global-1.0:entropy",
                         "scvi-global-1.0_leiden",
+                        "seurat-global-0.5:entropy",
+                        "seurat-global-0.5_leiden",
+                        "seurat-global-1.0:entropy",
+                        "seurat-global-1.0_leiden",
+                        "symphony-global-0.5:entropy",
+                        "symphony-global-0.5_leiden",
+                        "symphony-global-1.0:entropy",
+                        "symphony-global-1.0_leiden",
                         "total_counts",
-                        "total_counts_mt"
+                        "total_counts_hb",
+                        "total_counts_mt",
+                        "total_counts_ribo"
                     ]
                 },
                 "var": {
@@ -123,31 +276,33 @@
                     
                 ],
                 "uns": [
-                    "bbknn-global-0.5_characteristic_genes",
+                    "bbknn-global-0.5_leiden_characteristic_genes",
                     "bbknn-global-0.5_paga",
-                    "bbknn-global-1.0_characteristic_genes",
+                    "bbknn-global-1.0_leiden_characteristic_genes",
                     "bbknn-global-1.0_paga",
-                    "combat-global-0.5_characteristic_genes",
-                    "combat-global-0.5_liana",
+                    "combat-global-0.5_leiden_characteristic_genes",
                     "combat-global-0.5_paga",
-                    "combat-global-1.0_characteristic_genes",
-                    "combat-global-1.0_liana",
+                    "combat-global-1.0_leiden_characteristic_genes",
                     "combat-global-1.0_paga",
-                    "harmony-global-0.5_characteristic_genes",
-                    "harmony-global-0.5_paga",
-                    "harmony-global-1.0_characteristic_genes",
-                    "harmony-global-1.0_paga",
-                    "scvi-global-0.5_characteristic_genes",
+                    "scvi-global-0.5_leiden_characteristic_genes",
                     "scvi-global-0.5_paga",
-                    "scvi-global-1.0_characteristic_genes",
-                    "scvi-global-1.0_paga"
+                    "scvi-global-1.0_leiden_characteristic_genes",
+                    "scvi-global-1.0_paga",
+                    "seurat-global-0.5_leiden_characteristic_genes",
+                    "seurat-global-0.5_paga",
+                    "seurat-global-1.0_leiden_characteristic_genes",
+                    "seurat-global-1.0_paga",
+                    "symphony-global-0.5_leiden_characteristic_genes",
+                    "symphony-global-0.5_paga",
+                    "symphony-global-1.0_leiden_characteristic_genes",
+                    "symphony-global-1.0_paga"
                 ]
             }
         ],
-        "timestamp": "2026-03-29T14:55:42.179483745",
+        "timestamp": "2026-05-29T11:36:58.752520992",
         "meta": {
             "nf-test": "0.9.4",
-            "nextflow": "25.10.2"
+            "nextflow": "26.04.0"
         }
     }
 }
\ No newline at end of file
diff --git a/modules/local/symphony/mapembedding/tests/main.nf.test.snap b/modules/local/symphony/mapembedding/tests/main.nf.test.snap
index f46df979..5588ee0b 100644
--- a/modules/local/symphony/mapembedding/tests/main.nf.test.snap
+++ b/modules/local/symphony/mapembedding/tests/main.nf.test.snap
@@ -5,13 +5,13 @@
                 "0": [
                     [
                         {
-                            "id": "harmony"
+                            "id": "symphony"
                         },
-                        "harmony.h5ad:md5,d41d8cd98f00b204e9800998ecf8427e"
+                        "symphony.h5ad:md5,d41d8cd98f00b204e9800998ecf8427e"
                     ]
                 ],
                 "1": [
-                    "X_harmony.pkl:md5,d41d8cd98f00b204e9800998ecf8427e"
+                    "X_symphony.pkl:md5,d41d8cd98f00b204e9800998ecf8427e"
                 ],
                 "2": [
                     "versions.yml:md5,d41d8cd98f00b204e9800998ecf8427e"
@@ -19,20 +19,20 @@
                 "h5ad": [
                     [
                         {
-                            "id": "harmony"
+                            "id": "symphony"
                         },
-                        "harmony.h5ad:md5,d41d8cd98f00b204e9800998ecf8427e"
+                        "symphony.h5ad:md5,d41d8cd98f00b204e9800998ecf8427e"
                     ]
                 ],
                 "obsm": [
-                    "X_harmony.pkl:md5,d41d8cd98f00b204e9800998ecf8427e"
+                    "X_symphony.pkl:md5,d41d8cd98f00b204e9800998ecf8427e"
                 ],
                 "versions": [
                     "versions.yml:md5,d41d8cd98f00b204e9800998ecf8427e"
                 ]
             }
         ],
-        "timestamp": "2026-05-28T08:08:04.937392964",
+        "timestamp": "2026-05-28T16:31:40.664029252",
         "meta": {
             "nf-test": "0.9.4",
             "nextflow": "26.04.0"
diff --git a/subworkflows/local/integrate/tests/main.nf.test.snap b/subworkflows/local/integrate/tests/main.nf.test.snap
index 17d238af..14315dc9 100644
--- a/subworkflows/local/integrate/tests/main.nf.test.snap
+++ b/subworkflows/local/integrate/tests/main.nf.test.snap
@@ -1,59 +1,4 @@
 {
-    "Should run without failures - harmony - stub": {
-        "content": [
-            {
-                "0": [
-                    [
-                        {
-                            "id": "harmony"
-                        },
-                        "harmony.h5ad:md5,d41d8cd98f00b204e9800998ecf8427e"
-                    ]
-                ],
-                "1": [
-                    
-                ],
-                "2": [
-                    
-                ],
-                "3": [
-                    "X_harmony.pkl:md5,d41d8cd98f00b204e9800998ecf8427e"
-                ],
-                "4": [
-                    "versions.yml:md5,d41d8cd98f00b204e9800998ecf8427e",
-                    "versions.yml:md5,d41d8cd98f00b204e9800998ecf8427e",
-                    "versions.yml:md5,d41d8cd98f00b204e9800998ecf8427e"
-                ],
-                "integrations": [
-                    [
-                        {
-                            "id": "harmony"
-                        },
-                        "harmony.h5ad:md5,d41d8cd98f00b204e9800998ecf8427e"
-                    ]
-                ],
-                "obs": [
-                    
-                ],
-                "obsm": [
-                    "X_harmony.pkl:md5,d41d8cd98f00b204e9800998ecf8427e"
-                ],
-                "var": [
-                    
-                ],
-                "versions": [
-                    "versions.yml:md5,d41d8cd98f00b204e9800998ecf8427e",
-                    "versions.yml:md5,d41d8cd98f00b204e9800998ecf8427e",
-                    "versions.yml:md5,d41d8cd98f00b204e9800998ecf8427e"
-                ]
-            }
-        ],
-        "timestamp": "2026-03-28T23:05:37.694952307",
-        "meta": {
-            "nf-test": "0.9.4",
-            "nextflow": "25.10.2"
-        }
-    },
     "Should run without failures - bbknn - stub": {
         "content": [
             {
@@ -239,15 +184,15 @@
             "nextflow": "25.10.2"
         }
     },
-    "Should run without failures - extension mode - stub": {
+    "Should run without failures - combat - stub": {
         "content": [
             {
                 "0": [
                     [
                         {
-                            "id": "harmony"
+                            "id": "combat"
                         },
-                        "harmony.h5ad:md5,d41d8cd98f00b204e9800998ecf8427e"
+                        "combat.h5ad:md5,d41d8cd98f00b204e9800998ecf8427e"
                     ]
                 ],
                 "1": [
@@ -257,44 +202,48 @@
                     
                 ],
                 "3": [
-                    "X_harmony.pkl:md5,d41d8cd98f00b204e9800998ecf8427e"
+                    "combat.pkl:md5,d41d8cd98f00b204e9800998ecf8427e"
                 ],
                 "4": [
+                    "versions.yml:md5,d41d8cd98f00b204e9800998ecf8427e",
+                    "versions.yml:md5,d41d8cd98f00b204e9800998ecf8427e",
                     "versions.yml:md5,d41d8cd98f00b204e9800998ecf8427e"
                 ],
                 "integrations": [
                     [
                         {
-                            "id": "harmony"
+                            "id": "combat"
                         },
-                        "harmony.h5ad:md5,d41d8cd98f00b204e9800998ecf8427e"
+                        "combat.h5ad:md5,d41d8cd98f00b204e9800998ecf8427e"
                     ]
                 ],
                 "obs": [
                     
                 ],
                 "obsm": [
-                    "X_harmony.pkl:md5,d41d8cd98f00b204e9800998ecf8427e"
+                    "combat.pkl:md5,d41d8cd98f00b204e9800998ecf8427e"
                 ],
                 "var": [
                     
                 ],
                 "versions": [
+                    "versions.yml:md5,d41d8cd98f00b204e9800998ecf8427e",
+                    "versions.yml:md5,d41d8cd98f00b204e9800998ecf8427e",
                     "versions.yml:md5,d41d8cd98f00b204e9800998ecf8427e"
                 ]
             }
         ],
-        "timestamp": "2026-03-28T23:09:49.392744851",
+        "timestamp": "2026-03-25T15:49:26.334091777",
         "meta": {
             "nf-test": "0.9.4",
             "nextflow": "25.10.2"
         }
     },
-    "Should run without failures - pca": {
+    "Should run without failures - bbknn": {
         "content": [
             [
                 "versions.yml:md5,20020d8c9cf585aaa75dd5a14aa5d3ae",
-                "versions.yml:md5,87a2cb96724430656d9c1276e91e0208",
+                "versions.yml:md5,ccf730637c4c61a84ac4a002bf9832e0",
                 "versions.yml:md5,d28b65c4c18c54e1abc34040b584b823"
             ],
             {
@@ -339,39 +288,41 @@
                     "counts"
                 ],
                 "obsm": [
-                    "X_emb"
+                    "X_pca"
                 ],
                 "varm": [
-                    "X_emb"
+                    "PCs"
                 ],
                 "obsp": [
-                    
+                    "connectivities",
+                    "distances"
                 ],
                 "varp": [
                     
                 ],
                 "uns": [
-                    "X_emb",
                     "hvg",
-                    "log1p"
+                    "log1p",
+                    "neighbors",
+                    "pca"
                 ]
             }
         ],
-        "timestamp": "2026-05-28T14:04:36.10115423",
+        "timestamp": "2026-05-28T14:01:44.359301169",
         "meta": {
             "nf-test": "0.9.4",
             "nextflow": "26.04.0"
         }
     },
-    "Should run without failures - combat - stub": {
+    "Should run without failures - symphony - stub": {
         "content": [
             {
                 "0": [
                     [
                         {
-                            "id": "combat"
+                            "id": "symphony"
                         },
-                        "combat.h5ad:md5,d41d8cd98f00b204e9800998ecf8427e"
+                        "symphony.h5ad:md5,d41d8cd98f00b204e9800998ecf8427e"
                     ]
                 ],
                 "1": [
@@ -381,7 +332,7 @@
                     
                 ],
                 "3": [
-                    "combat.pkl:md5,d41d8cd98f00b204e9800998ecf8427e"
+                    "X_symphony.pkl:md5,d41d8cd98f00b204e9800998ecf8427e"
                 ],
                 "4": [
                     "versions.yml:md5,d41d8cd98f00b204e9800998ecf8427e",
@@ -391,16 +342,16 @@
                 "integrations": [
                     [
                         {
-                            "id": "combat"
+                            "id": "symphony"
                         },
-                        "combat.h5ad:md5,d41d8cd98f00b204e9800998ecf8427e"
+                        "symphony.h5ad:md5,d41d8cd98f00b204e9800998ecf8427e"
                     ]
                 ],
                 "obs": [
                     
                 ],
                 "obsm": [
-                    "combat.pkl:md5,d41d8cd98f00b204e9800998ecf8427e"
+                    "X_symphony.pkl:md5,d41d8cd98f00b204e9800998ecf8427e"
                 ],
                 "var": [
                     
@@ -412,13 +363,13 @@
                 ]
             }
         ],
-        "timestamp": "2026-03-25T15:49:26.334091777",
+        "timestamp": "2026-05-28T16:31:59.971834646",
         "meta": {
             "nf-test": "0.9.4",
-            "nextflow": "25.10.2"
+            "nextflow": "26.04.0"
         }
     },
-    "Should run without failures - harmony": {
+    "Should run without failures - symphony": {
         "content": [
             [
                 "versions.yml:md5,0941a4daea5c41d9e3259be11e9f2263",
@@ -485,17 +436,68 @@
                 ]
             }
         ],
-        "timestamp": "2026-05-28T14:18:52.042984469",
+        "timestamp": "2026-05-28T16:32:53.466053531",
         "meta": {
             "nf-test": "0.9.4",
             "nextflow": "26.04.0"
         }
     },
-    "Should run without failures - bbknn": {
+    "Should run without failures - extension mode - stub": {
+        "content": [
+            {
+                "0": [
+                    [
+                        {
+                            "id": "symphony"
+                        },
+                        "symphony.h5ad:md5,d41d8cd98f00b204e9800998ecf8427e"
+                    ]
+                ],
+                "1": [
+                    
+                ],
+                "2": [
+                    
+                ],
+                "3": [
+                    "X_symphony.pkl:md5,d41d8cd98f00b204e9800998ecf8427e"
+                ],
+                "4": [
+                    "versions.yml:md5,d41d8cd98f00b204e9800998ecf8427e"
+                ],
+                "integrations": [
+                    [
+                        {
+                            "id": "symphony"
+                        },
+                        "symphony.h5ad:md5,d41d8cd98f00b204e9800998ecf8427e"
+                    ]
+                ],
+                "obs": [
+                    
+                ],
+                "obsm": [
+                    "X_symphony.pkl:md5,d41d8cd98f00b204e9800998ecf8427e"
+                ],
+                "var": [
+                    
+                ],
+                "versions": [
+                    "versions.yml:md5,d41d8cd98f00b204e9800998ecf8427e"
+                ]
+            }
+        ],
+        "timestamp": "2026-05-28T16:35:48.236148467",
+        "meta": {
+            "nf-test": "0.9.4",
+            "nextflow": "26.04.0"
+        }
+    },
+    "Should run without failures - pca": {
         "content": [
             [
                 "versions.yml:md5,20020d8c9cf585aaa75dd5a14aa5d3ae",
-                "versions.yml:md5,ccf730637c4c61a84ac4a002bf9832e0",
+                "versions.yml:md5,87a2cb96724430656d9c1276e91e0208",
                 "versions.yml:md5,d28b65c4c18c54e1abc34040b584b823"
             ],
             {
@@ -540,27 +542,25 @@
                     "counts"
                 ],
                 "obsm": [
-                    "X_pca"
+                    "X_emb"
                 ],
                 "varm": [
-                    "PCs"
+                    "X_emb"
                 ],
                 "obsp": [
-                    "connectivities",
-                    "distances"
+                    
                 ],
                 "varp": [
                     
                 ],
                 "uns": [
+                    "X_emb",
                     "hvg",
-                    "log1p",
-                    "neighbors",
-                    "pca"
+                    "log1p"
                 ]
             }
         ],
-        "timestamp": "2026-05-28T14:01:44.359301169",
+        "timestamp": "2026-05-28T14:04:36.10115423",
         "meta": {
             "nf-test": "0.9.4",
             "nextflow": "26.04.0"
diff --git a/tests/default.nf.test.snap b/tests/default.nf.test.snap
index eda1a9db..5fc73379 100644
--- a/tests/default.nf.test.snap
+++ b/tests/default.nf.test.snap
@@ -135,12 +135,6 @@
                     "python": "3.13.12",
                     "scanpy": "1.12"
                 },
-                "SCANPY_HARMONY": {
-                    "harmonypy": "0.2.0",
-                    "pandas": "2.3.3",
-                    "python": "3.13.12",
-                    "scanpy": "1.12"
-                },
                 "SCANPY_HVGS": {
                     "python": "3.13.12",
                     "scanpy": "1.12"
@@ -172,6 +166,12 @@
                 "SCVITOOLS_SOLO": {
                     "scvi": "1.4.3"
                 },
+                "SYMPHONY_HARMONYINTEGRATE": {
+                    "pandas": "2.3.3",
+                    "python": "3.13.13",
+                    "scanpy": "1.12.1",
+                    "symphonypy": "0.2.4"
+                },
                 "UMAP": {
                     "pandas": "2.3.3",
                     "python": "3.13.12",
@@ -237,14 +237,6 @@
                 "cluster_dimred/combat/leiden/combat-global-0.5_leiden.png",
                 "cluster_dimred/combat/leiden/combat-global-1.0_leiden.png",
                 "cluster_dimred/combat/umap",
-                "cluster_dimred/harmony",
-                "cluster_dimred/harmony/entropy",
-                "cluster_dimred/harmony/entropy/harmony-global-0.5_entropy.png",
-                "cluster_dimred/harmony/entropy/harmony-global-1.0_entropy.png",
-                "cluster_dimred/harmony/leiden",
-                "cluster_dimred/harmony/leiden/harmony-global-0.5_leiden.png",
-                "cluster_dimred/harmony/leiden/harmony-global-1.0_leiden.png",
-                "cluster_dimred/harmony/umap",
                 "cluster_dimred/scvi",
                 "cluster_dimred/scvi/entropy",
                 "cluster_dimred/scvi/entropy/scvi-global-0.5_entropy.png",
@@ -253,15 +245,25 @@
                 "cluster_dimred/scvi/leiden/scvi-global-0.5_leiden.png",
                 "cluster_dimred/scvi/leiden/scvi-global-1.0_leiden.png",
                 "cluster_dimred/scvi/umap",
+                "cluster_dimred/symphony",
+                "cluster_dimred/symphony/entropy",
+                "cluster_dimred/symphony/entropy/symphony-global-0.5_entropy.png",
+                "cluster_dimred/symphony/entropy/symphony-global-1.0_entropy.png",
+                "cluster_dimred/symphony/leiden",
+                "cluster_dimred/symphony/leiden/symphony-global-0.5_leiden.png",
+                "cluster_dimred/symphony/leiden/symphony-global-1.0_leiden.png",
+                "cluster_dimred/symphony/umap",
                 "combine",
                 "combine/integrate",
                 "combine/integrate/scib_metrics",
                 "combine/integrate/scib_metrics/combat_metrics.tsv",
-                "combine/integrate/scib_metrics/harmony_metrics.tsv",
                 "combine/integrate/scib_metrics/scvi_metrics.tsv",
+                "combine/integrate/scib_metrics/symphony_metrics.tsv",
                 "combine/integrate/scvi",
                 "combine/integrate/scvi/scvi_model",
                 "combine/integrate/scvi/scvi_model/model.pt",
+                "combine/integrate/symphony",
+                "combine/integrate/symphony/symphony_reference.h5ad",
                 "combine/merge",
                 "combine/merge/upset_genes.png",
                 "finalized",
@@ -305,7 +307,7 @@
                 "qc-report.qmd:md5,13061014a897b3fbdafd6ea3212df0e0"
             ]
         ],
-        "timestamp": "2026-05-11T22:49:53.648324922",
+        "timestamp": "2026-05-28T17:09:18.020969587",
         "meta": {
             "nf-test": "0.9.4",
             "nextflow": "26.04.0"
diff --git a/tests/main_pipeline_build.nf.test.snap b/tests/main_pipeline_build.nf.test.snap
index 02fca1ba..77005b00 100644
--- a/tests/main_pipeline_build.nf.test.snap
+++ b/tests/main_pipeline_build.nf.test.snap
@@ -135,12 +135,6 @@
                     "python": "3.13.12",
                     "scanpy": "1.12"
                 },
-                "SCANPY_HARMONY": {
-                    "harmonypy": "0.2.0",
-                    "pandas": "2.3.3",
-                    "python": "3.13.12",
-                    "scanpy": "1.12"
-                },
                 "SCANPY_HVGS": {
                     "python": "3.13.12",
                     "scanpy": "1.12"
@@ -174,6 +168,12 @@
                     "Seurat": "5.4.0",
                     "anndataR": "1.0.2"
                 },
+                "SYMPHONY_HARMONYINTEGRATE": {
+                    "pandas": "2.3.3",
+                    "python": "3.13.13",
+                    "scanpy": "1.12.1",
+                    "symphonypy": "0.2.4"
+                },
                 "UMAP": {
                     "pandas": "2.3.3",
                     "python": "3.13.12",
@@ -239,14 +239,6 @@
                 "cluster_dimred/combat/leiden/combat-global-0.5_leiden.png",
                 "cluster_dimred/combat/leiden/combat-global-1.0_leiden.png",
                 "cluster_dimred/combat/umap",
-                "cluster_dimred/harmony",
-                "cluster_dimred/harmony/entropy",
-                "cluster_dimred/harmony/entropy/harmony-global-0.5_entropy.png",
-                "cluster_dimred/harmony/entropy/harmony-global-1.0_entropy.png",
-                "cluster_dimred/harmony/leiden",
-                "cluster_dimred/harmony/leiden/harmony-global-0.5_leiden.png",
-                "cluster_dimred/harmony/leiden/harmony-global-1.0_leiden.png",
-                "cluster_dimred/harmony/umap",
                 "cluster_dimred/scvi",
                 "cluster_dimred/scvi/entropy",
                 "cluster_dimred/scvi/entropy/scvi-global-0.5_entropy.png",
@@ -263,16 +255,26 @@
                 "cluster_dimred/seurat/leiden/seurat-global-0.5_leiden.png",
                 "cluster_dimred/seurat/leiden/seurat-global-1.0_leiden.png",
                 "cluster_dimred/seurat/umap",
+                "cluster_dimred/symphony",
+                "cluster_dimred/symphony/entropy",
+                "cluster_dimred/symphony/entropy/symphony-global-0.5_entropy.png",
+                "cluster_dimred/symphony/entropy/symphony-global-1.0_entropy.png",
+                "cluster_dimred/symphony/leiden",
+                "cluster_dimred/symphony/leiden/symphony-global-0.5_leiden.png",
+                "cluster_dimred/symphony/leiden/symphony-global-1.0_leiden.png",
+                "cluster_dimred/symphony/umap",
                 "combine",
                 "combine/integrate",
                 "combine/integrate/scib_metrics",
                 "combine/integrate/scib_metrics/combat_metrics.tsv",
-                "combine/integrate/scib_metrics/harmony_metrics.tsv",
                 "combine/integrate/scib_metrics/scvi_metrics.tsv",
                 "combine/integrate/scib_metrics/seurat_metrics.tsv",
+                "combine/integrate/scib_metrics/symphony_metrics.tsv",
                 "combine/integrate/scvi",
                 "combine/integrate/scvi/scvi_model",
                 "combine/integrate/scvi/scvi_model/model.pt",
+                "combine/integrate/symphony",
+                "combine/integrate/symphony/symphony_reference.h5ad",
                 "combine/merge",
                 "combine/merge/upset_genes.png",
                 "finalized",
@@ -316,7 +318,7 @@
                 "qc-report.qmd:md5,13061014a897b3fbdafd6ea3212df0e0"
             ]
         ],
-        "timestamp": "2026-05-11T23:09:11.341514791",
+        "timestamp": "2026-05-28T17:30:10.040067606",
         "meta": {
             "nf-test": "0.9.4",
             "nextflow": "26.04.0"
diff --git a/tests/main_pipeline_extend.nf.test.snap b/tests/main_pipeline_extend.nf.test.snap
index add7d274..400a7ed3 100644
--- a/tests/main_pipeline_extend.nf.test.snap
+++ b/tests/main_pipeline_extend.nf.test.snap
@@ -1,5 +1,5 @@
 {
-    "Should perform scvi and harmony reference extension": {
+    "Should perform scvi and symphony reference extension": {
         "content": [
             {
                 "ADATA_EXTEND": {
@@ -181,14 +181,6 @@
                 "celltypes/singler/SRR28679759_singler_immune_direct_heatmap.pdf",
                 "celltypes/singler/SRR28679759_singler_predictions.csv",
                 "cluster_dimred",
-                "cluster_dimred/harmony",
-                "cluster_dimred/harmony/entropy",
-                "cluster_dimred/harmony/entropy/harmony-global-0.5_entropy.png",
-                "cluster_dimred/harmony/entropy/harmony-global-1.0_entropy.png",
-                "cluster_dimred/harmony/leiden",
-                "cluster_dimred/harmony/leiden/harmony-global-0.5_leiden.png",
-                "cluster_dimred/harmony/leiden/harmony-global-1.0_leiden.png",
-                "cluster_dimred/harmony/umap",
                 "cluster_dimred/scvi",
                 "cluster_dimred/scvi/entropy",
                 "cluster_dimred/scvi/entropy/scvi-global-0.5_entropy.png",
@@ -197,11 +189,19 @@
                 "cluster_dimred/scvi/leiden/scvi-global-0.5_leiden.png",
                 "cluster_dimred/scvi/leiden/scvi-global-1.0_leiden.png",
                 "cluster_dimred/scvi/umap",
+                "cluster_dimred/symphony",
+                "cluster_dimred/symphony/entropy",
+                "cluster_dimred/symphony/entropy/symphony-global-0.5_entropy.png",
+                "cluster_dimred/symphony/entropy/symphony-global-1.0_entropy.png",
+                "cluster_dimred/symphony/leiden",
+                "cluster_dimred/symphony/leiden/symphony-global-0.5_leiden.png",
+                "cluster_dimred/symphony/leiden/symphony-global-1.0_leiden.png",
+                "cluster_dimred/symphony/umap",
                 "combine",
                 "combine/integrate",
                 "combine/integrate/scib_metrics",
-                "combine/integrate/scib_metrics/harmony_metrics.tsv",
                 "combine/integrate/scib_metrics/scvi_metrics.tsv",
+                "combine/integrate/scib_metrics/symphony_metrics.tsv",
                 "combine/integrate/scvi",
                 "combine/integrate/scvi/scvi_model",
                 "combine/integrate/scvi/scvi_model/model.pt",
@@ -243,10 +243,10 @@
                 "qc-report.qmd:md5,13061014a897b3fbdafd6ea3212df0e0"
             ]
         ],
-        "timestamp": "2026-05-28T11:14:08.38204652",
+        "timestamp": "2026-05-29T14:41:14.623955124",
         "meta": {
             "nf-test": "0.9.4",
             "nextflow": "26.04.0"
         }
     }
-}
+}
\ No newline at end of file
diff --git a/tests/main_pipeline_reference_mapping.nf.test.snap b/tests/main_pipeline_reference_mapping.nf.test.snap
index 8353f4a6..e8795e97 100644
--- a/tests/main_pipeline_reference_mapping.nf.test.snap
+++ b/tests/main_pipeline_reference_mapping.nf.test.snap
@@ -1,5 +1,5 @@
 {
-    "Should perform scvi and harmony reference mapping": {
+    "Should perform scvi and symphony reference mapping": {
         "content": [
             {
                 "ADATA_EXTEND": {
@@ -181,14 +181,6 @@
                 "celltypes/singler/SRR28679759_singler_immune_direct_heatmap.pdf",
                 "celltypes/singler/SRR28679759_singler_predictions.csv",
                 "cluster_dimred",
-                "cluster_dimred/harmony",
-                "cluster_dimred/harmony/entropy",
-                "cluster_dimred/harmony/entropy/harmony-global-0.5_entropy.png",
-                "cluster_dimred/harmony/entropy/harmony-global-1.0_entropy.png",
-                "cluster_dimred/harmony/leiden",
-                "cluster_dimred/harmony/leiden/harmony-global-0.5_leiden.png",
-                "cluster_dimred/harmony/leiden/harmony-global-1.0_leiden.png",
-                "cluster_dimred/harmony/umap",
                 "cluster_dimred/scvi",
                 "cluster_dimred/scvi/entropy",
                 "cluster_dimred/scvi/entropy/scvi-global-0.5_entropy.png",
@@ -197,11 +189,19 @@
                 "cluster_dimred/scvi/leiden/scvi-global-0.5_leiden.png",
                 "cluster_dimred/scvi/leiden/scvi-global-1.0_leiden.png",
                 "cluster_dimred/scvi/umap",
+                "cluster_dimred/symphony",
+                "cluster_dimred/symphony/entropy",
+                "cluster_dimred/symphony/entropy/symphony-global-0.5_entropy.png",
+                "cluster_dimred/symphony/entropy/symphony-global-1.0_entropy.png",
+                "cluster_dimred/symphony/leiden",
+                "cluster_dimred/symphony/leiden/symphony-global-0.5_leiden.png",
+                "cluster_dimred/symphony/leiden/symphony-global-1.0_leiden.png",
+                "cluster_dimred/symphony/umap",
                 "combine",
                 "combine/integrate",
                 "combine/integrate/scib_metrics",
-                "combine/integrate/scib_metrics/harmony_metrics.tsv",
                 "combine/integrate/scib_metrics/scvi_metrics.tsv",
+                "combine/integrate/scib_metrics/symphony_metrics.tsv",
                 "combine/integrate/scvi",
                 "combine/integrate/scvi/scvi_model",
                 "combine/integrate/scvi/scvi_model/model.pt",
@@ -243,10 +243,10 @@
                 "qc-report.qmd:md5,13061014a897b3fbdafd6ea3212df0e0"
             ]
         ],
-        "timestamp": "2026-05-28T10:56:43.387329548",
+        "timestamp": "2026-05-29T14:31:02.484302876",
         "meta": {
             "nf-test": "0.9.4",
             "nextflow": "26.04.0"
         }
     }
-}
+}
\ No newline at end of file
diff --git a/tests/main_pipeline_sub.nf.test.snap b/tests/main_pipeline_sub.nf.test.snap
index 36b85745..cb74498d 100644
--- a/tests/main_pipeline_sub.nf.test.snap
+++ b/tests/main_pipeline_sub.nf.test.snap
@@ -59,58 +59,24 @@
             },
             [
                 "adata",
-                "adata/combat.h5ad",
-                "adata/harmony.h5ad",
-                "adata/scvi.h5ad",
+                "adata/symphony.h5ad",
                 "cluster_dimred",
-                "cluster_dimred/combat",
-                "cluster_dimred/combat/entropy",
-                "cluster_dimred/combat/entropy/combat-SRR28679756-0.5_entropy.png",
-                "cluster_dimred/combat/entropy/combat-SRR28679756-1.0_entropy.png",
-                "cluster_dimred/combat/entropy/combat-SRR28679757-0.5_entropy.png",
-                "cluster_dimred/combat/entropy/combat-SRR28679757-1.0_entropy.png",
-                "cluster_dimred/combat/entropy/combat-SRR28679758-0.5_entropy.png",
-                "cluster_dimred/combat/entropy/combat-SRR28679758-1.0_entropy.png",
-                "cluster_dimred/combat/leiden",
-                "cluster_dimred/combat/leiden/combat-SRR28679756-0.5_leiden.png",
-                "cluster_dimred/combat/leiden/combat-SRR28679756-1.0_leiden.png",
-                "cluster_dimred/combat/leiden/combat-SRR28679757-0.5_leiden.png",
-                "cluster_dimred/combat/leiden/combat-SRR28679757-1.0_leiden.png",
-                "cluster_dimred/combat/leiden/combat-SRR28679758-0.5_leiden.png",
-                "cluster_dimred/combat/leiden/combat-SRR28679758-1.0_leiden.png",
-                "cluster_dimred/combat/umap",
-                "cluster_dimred/harmony",
-                "cluster_dimred/harmony/entropy",
-                "cluster_dimred/harmony/entropy/harmony-SRR28679756-0.5_entropy.png",
-                "cluster_dimred/harmony/entropy/harmony-SRR28679756-1.0_entropy.png",
-                "cluster_dimred/harmony/entropy/harmony-SRR28679757-0.5_entropy.png",
-                "cluster_dimred/harmony/entropy/harmony-SRR28679757-1.0_entropy.png",
-                "cluster_dimred/harmony/entropy/harmony-SRR28679758-0.5_entropy.png",
-                "cluster_dimred/harmony/entropy/harmony-SRR28679758-1.0_entropy.png",
-                "cluster_dimred/harmony/leiden",
-                "cluster_dimred/harmony/leiden/harmony-SRR28679756-0.5_leiden.png",
-                "cluster_dimred/harmony/leiden/harmony-SRR28679756-1.0_leiden.png",
-                "cluster_dimred/harmony/leiden/harmony-SRR28679757-0.5_leiden.png",
-                "cluster_dimred/harmony/leiden/harmony-SRR28679757-1.0_leiden.png",
-                "cluster_dimred/harmony/leiden/harmony-SRR28679758-0.5_leiden.png",
-                "cluster_dimred/harmony/leiden/harmony-SRR28679758-1.0_leiden.png",
-                "cluster_dimred/harmony/umap",
-                "cluster_dimred/scvi",
-                "cluster_dimred/scvi/entropy",
-                "cluster_dimred/scvi/entropy/scvi-SRR28679756-0.5_entropy.png",
-                "cluster_dimred/scvi/entropy/scvi-SRR28679756-1.0_entropy.png",
-                "cluster_dimred/scvi/entropy/scvi-SRR28679757-0.5_entropy.png",
-                "cluster_dimred/scvi/entropy/scvi-SRR28679757-1.0_entropy.png",
-                "cluster_dimred/scvi/entropy/scvi-SRR28679758-0.5_entropy.png",
-                "cluster_dimred/scvi/entropy/scvi-SRR28679758-1.0_entropy.png",
-                "cluster_dimred/scvi/leiden",
-                "cluster_dimred/scvi/leiden/scvi-SRR28679756-0.5_leiden.png",
-                "cluster_dimred/scvi/leiden/scvi-SRR28679756-1.0_leiden.png",
-                "cluster_dimred/scvi/leiden/scvi-SRR28679757-0.5_leiden.png",
-                "cluster_dimred/scvi/leiden/scvi-SRR28679757-1.0_leiden.png",
-                "cluster_dimred/scvi/leiden/scvi-SRR28679758-0.5_leiden.png",
-                "cluster_dimred/scvi/leiden/scvi-SRR28679758-1.0_leiden.png",
-                "cluster_dimred/scvi/umap",
+                "cluster_dimred/symphony",
+                "cluster_dimred/symphony/entropy",
+                "cluster_dimred/symphony/entropy/symphony-SRR28679756-0.5_entropy.png",
+                "cluster_dimred/symphony/entropy/symphony-SRR28679756-1.0_entropy.png",
+                "cluster_dimred/symphony/entropy/symphony-SRR28679757-0.5_entropy.png",
+                "cluster_dimred/symphony/entropy/symphony-SRR28679757-1.0_entropy.png",
+                "cluster_dimred/symphony/entropy/symphony-SRR28679758-0.5_entropy.png",
+                "cluster_dimred/symphony/entropy/symphony-SRR28679758-1.0_entropy.png",
+                "cluster_dimred/symphony/leiden",
+                "cluster_dimred/symphony/leiden/symphony-SRR28679756-0.5_leiden.png",
+                "cluster_dimred/symphony/leiden/symphony-SRR28679756-1.0_leiden.png",
+                "cluster_dimred/symphony/leiden/symphony-SRR28679757-0.5_leiden.png",
+                "cluster_dimred/symphony/leiden/symphony-SRR28679757-1.0_leiden.png",
+                "cluster_dimred/symphony/leiden/symphony-SRR28679758-0.5_leiden.png",
+                "cluster_dimred/symphony/leiden/symphony-SRR28679758-1.0_leiden.png",
+                "cluster_dimred/symphony/umap",
                 "finalized",
                 "finalized/base.h5ad",
                 "finalized/base.rds",
@@ -145,7 +111,7 @@
                 "qc-report.qmd:md5,13061014a897b3fbdafd6ea3212df0e0"
             ]
         ],
-        "timestamp": "2026-05-11T23:50:17.469316064",
+        "timestamp": "2026-05-29T14:32:12.630005788",
         "meta": {
             "nf-test": "0.9.4",
             "nextflow": "26.04.0"

From a7429c96c3d029476e80a113d784e7143a796ebd Mon Sep 17 00:00:00 2001
From: Nico Trummer <nictru32@gmail.com>
Date: Fri, 29 May 2026 23:19:00 +0200
Subject: [PATCH 10/19] Use forked pipeline test data

Point pipeline nf-tests back at the forked fixtures needed by existing tests and align the COMBINE test inputs with the current workflow signature.
---
 nextflow.config                               |  2 +-
 nextflow_schema.json                          |  2 +-
 subworkflows/local/combine/tests/main.nf.test | 10 ++++++----
 tests/main_pipeline_extend.nf.test            |  2 +-
 tests/main_pipeline_reference_mapping.nf.test |  2 +-
 tests/main_pipeline_sub.nf.test               |  2 +-
 tests/nextflow.config                         |  2 +-
 7 files changed, 12 insertions(+), 10 deletions(-)

diff --git a/nextflow.config b/nextflow.config
index 9e612096..1bfca071 100644
--- a/nextflow.config
+++ b/nextflow.config
@@ -113,7 +113,7 @@ params {
     help_full                    = false
     show_hidden                  = false
     version                      = false
-    pipelines_testdata_base_path = 'https://raw.githubusercontent.com/nf-core/test-datasets/e3a7f43eb802a090affac918026d2ba5dce8fcd5/'
+    pipelines_testdata_base_path = 'https://raw.githubusercontent.com/nictru/test-datasets/97addfb0946c0e51dbb70ee1391142d12e70f085/'
     trace_report_suffix          = new java.util.Date().format( 'yyyy-MM-dd_HH-mm-ss')
     // Config options
     config_profile_name           = null
diff --git a/nextflow_schema.json b/nextflow_schema.json
index 762c1fc3..a180dba5 100644
--- a/nextflow_schema.json
+++ b/nextflow_schema.json
@@ -577,7 +577,7 @@
                     "type": "string",
                     "fa_icon": "far fa-check-circle",
                     "description": "Base URL or local path to location of pipeline test dataset files",
-                    "default": "https://raw.githubusercontent.com/nf-core/test-datasets/e3a7f43eb802a090affac918026d2ba5dce8fcd5/",
+                    "default": "https://raw.githubusercontent.com/nictru/test-datasets/97addfb0946c0e51dbb70ee1391142d12e70f085/",
                     "hidden": true
                 },
                 "trace_report_suffix": {
diff --git a/subworkflows/local/combine/tests/main.nf.test b/subworkflows/local/combine/tests/main.nf.test
index 836d14d3..e8446503 100644
--- a/subworkflows/local/combine/tests/main.nf.test
+++ b/subworkflows/local/combine/tests/main.nf.test
@@ -32,8 +32,9 @@ nextflow_workflow {
                 input[9]  = ''
                 input[10] = 'https://zenodo.org/records/10685499/files/model_v1.1.tar.gz'
                 input[11] = null
-                input[12] = 'condition'
-                input[13] = false
+                input[12] = null
+                input[13] = 'condition'
+                input[14] = false
                 """
             }
         }
@@ -68,8 +69,9 @@ nextflow_workflow {
                 input[9]  = ''
                 input[10] = 'https://zenodo.org/records/10685499/files/model_v1.1.tar.gz'
                 input[11] = null
-                input[12] = 'condition'
-                input[13] = false
+                input[12] = null
+                input[13] = 'condition'
+                input[14] = false
                 """
             }
         }
diff --git a/tests/main_pipeline_extend.nf.test b/tests/main_pipeline_extend.nf.test
index d549b821..bc311d04 100644
--- a/tests/main_pipeline_extend.nf.test
+++ b/tests/main_pipeline_extend.nf.test
@@ -8,7 +8,7 @@ nextflow_pipeline {
 
         when {
             params {
-                pipelines_testdata_base_path = 'https://raw.githubusercontent.com/nf-core/test-datasets/e3a7f43eb802a090affac918026d2ba5dce8fcd5/'
+                pipelines_testdata_base_path = 'https://raw.githubusercontent.com/nictru/test-datasets/e3a7f43eb802a090affac918026d2ba5dce8fcd5/'
                 input = pipelines_testdata_base_path + 'samplesheet_single.csv'
                 integration_methods = 'scvi,symphony'
                 doublet_detection   = 'scrublet,scdblfinder'
diff --git a/tests/main_pipeline_reference_mapping.nf.test b/tests/main_pipeline_reference_mapping.nf.test
index fdaa3411..e7ccbe44 100644
--- a/tests/main_pipeline_reference_mapping.nf.test
+++ b/tests/main_pipeline_reference_mapping.nf.test
@@ -8,7 +8,7 @@ nextflow_pipeline {
 
         when {
             params {
-                pipelines_testdata_base_path = 'https://raw.githubusercontent.com/nf-core/test-datasets/e3a7f43eb802a090affac918026d2ba5dce8fcd5/'
+                pipelines_testdata_base_path = 'https://raw.githubusercontent.com/nictru/test-datasets/e3a7f43eb802a090affac918026d2ba5dce8fcd5/'
                 input = pipelines_testdata_base_path + 'samplesheet_single.csv'
                 integration_methods = 'scvi,symphony'
                 doublet_detection   = 'scrublet,scdblfinder'
diff --git a/tests/main_pipeline_sub.nf.test b/tests/main_pipeline_sub.nf.test
index 5bfb2b79..03758609 100644
--- a/tests/main_pipeline_sub.nf.test
+++ b/tests/main_pipeline_sub.nf.test
@@ -8,7 +8,7 @@ nextflow_pipeline {
 
         when {
             params {
-                pipelines_testdata_base_path = 'https://raw.githubusercontent.com/nf-core/test-datasets/e3a7f43eb802a090affac918026d2ba5dce8fcd5/'
+                pipelines_testdata_base_path = 'https://raw.githubusercontent.com/nictru/test-datasets/e3a7f43eb802a090affac918026d2ba5dce8fcd5/'
                 outdir              = "$outputDir"
                 input               = null
                 base_adata          = pipelines_testdata_base_path + 'extension_base/merged.h5ad'
diff --git a/tests/nextflow.config b/tests/nextflow.config
index 71b5b6be..0ab28b9b 100644
--- a/tests/nextflow.config
+++ b/tests/nextflow.config
@@ -8,7 +8,7 @@
 // Or any resources requirements
 params {
     modules_testdata_base_path = 'https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/'
-    pipelines_testdata_base_path = 'https://raw.githubusercontent.com/nf-core/test-datasets/e3a7f43eb802a090affac918026d2ba5dce8fcd5/'
+    pipelines_testdata_base_path = 'https://raw.githubusercontent.com/nictru/test-datasets/97addfb0946c0e51dbb70ee1391142d12e70f085/'
     // CyteType is slow (remote LLM API); module nf-tests cover it — keep off in pipeline/subworkflow nf-tests
     cytetype_study_context = ''
 }

From 3b3d863c2979af20ef9911ea98eda44a8877aba6 Mon Sep 17 00:00:00 2001
From: Nico Trummer <nictru32@gmail.com>
Date: Sat, 30 May 2026 23:07:50 +0200
Subject: [PATCH 11/19] Fix cellbender merge gene alignment for mixed ID
 columns.

Support gene_ids and gene_id in filtered matrices and fall back to var index when neither is present, fixing AMBIENT_CORRECTION tests on symbol-only h5ad inputs.
---
 modules/nf-core/cellbender/merge/templates/merge.py | 13 ++++++++-----
 1 file changed, 8 insertions(+), 5 deletions(-)

diff --git a/modules/nf-core/cellbender/merge/templates/merge.py b/modules/nf-core/cellbender/merge/templates/merge.py
index a15ada0d..8d424df5 100644
--- a/modules/nf-core/cellbender/merge/templates/merge.py
+++ b/modules/nf-core/cellbender/merge/templates/merge.py
@@ -30,11 +30,14 @@ def format_yaml_like(data: dict, indent: int = 0) -> str:
 adata_cellbender = load_anndata_from_input_and_output("${unfiltered}", "${cellbender_h5}", analyzed_barcodes_only=False)
 
 # Subset to the barcodes and genes present in the filtered matrix.
-# Gene symbols (var index) may not be unique, so align on Ensembl IDs.
-# The filtered h5ad uses 'gene_ids'; load_anndata_from_input_and_output uses 'gene_id'.
-gene_id_col = "gene_id" if "gene_id" in adata_cellbender.var.columns else adata_cellbender.var.index.name
-cb_id_to_pos = {gid: i for i, gid in enumerate(adata_cellbender.var[gene_id_col])}
-var_positions = [cb_id_to_pos[gid] for gid in adata.var["gene_ids"]]
+# Gene symbols (var index) may not be unique, so prefer Ensembl IDs when present.
+# Column names differ: 10x/readh5 uses 'gene_ids'; unify/cellbender uses 'gene_id'.
+filtered_gene_id_col = next((col for col in ("gene_ids", "gene_id") if col in adata.var.columns), None)
+cellbender_gene_id_col = next((col for col in ("gene_id", "gene_ids") if col in adata_cellbender.var.columns), None)
+filtered_ids = adata.var[filtered_gene_id_col] if filtered_gene_id_col else adata.var.index
+cellbender_ids = adata_cellbender.var[cellbender_gene_id_col] if filtered_gene_id_col and cellbender_gene_id_col else adata_cellbender.var.index
+cb_id_to_pos = {gid: i for i, gid in enumerate(cellbender_ids)}
+var_positions = [cb_id_to_pos[gid] for gid in filtered_ids]
 adata_cellbender = adata_cellbender[adata.obs_names, var_positions]
 
 if "${output_layer}" == "X":

From f31f4842a399a373198ffff5d366dc068120a161 Mon Sep 17 00:00:00 2001
From: Nico Trummer <nictru32@gmail.com>
Date: Sun, 31 May 2026 08:51:19 +0200
Subject: [PATCH 12/19] Update SEURAT_INTEGRATION container with glmGamPoi and
 Seurat 5.5.

Add bioconductor-glmgampoi and bump r-seurat so SCTransform uses the supported v2 backend in the Wave image.
---
 modules/local/seurat/integration/environment.yml | 3 ++-
 modules/local/seurat/integration/main.nf         | 4 ++--
 2 files changed, 4 insertions(+), 3 deletions(-)

diff --git a/modules/local/seurat/integration/environment.yml b/modules/local/seurat/integration/environment.yml
index f6bd79fc..d362a6f1 100644
--- a/modules/local/seurat/integration/environment.yml
+++ b/modules/local/seurat/integration/environment.yml
@@ -3,5 +3,6 @@ channels:
   - bioconda
 dependencies:
   - bioconda::bioconductor-anndatar=1.0.2
+  - bioconda::bioconductor-glmgampoi=1.22.0
   - bioconda::bioconductor-rhdf5=2.54.1
-  - conda-forge::r-seurat=5.4.0
+  - conda-forge::r-seurat=5.5.0
diff --git a/modules/local/seurat/integration/main.nf b/modules/local/seurat/integration/main.nf
index 3cc2605c..33ff14fc 100644
--- a/modules/local/seurat/integration/main.nf
+++ b/modules/local/seurat/integration/main.nf
@@ -4,8 +4,8 @@ process SEURAT_INTEGRATION {
 
     conda "${moduleDir}/environment.yml"
     container "${ workflow.containerEngine in ['singularity', 'apptainer'] && !task.ext.singularity_pull_docker_container ?
-        'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/b4/b4393c608e642b1232cd7bb84e6c5d7620c4b167462f342a4780307e5e67596b/data':
-        'community.wave.seqera.io/library/bioconductor-anndatar_bioconductor-rhdf5_r-seurat:71809468c7d8a963' }"
+        'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/7b/7bbad8d18ada67c2ca1dfaec11c5acb0fcd355713fec10331b0e202f1d6165f1/data':
+        'community.wave.seqera.io/library/bioconductor-anndatar_bioconductor-glmgampoi_bioconductor-rhdf5_r-seurat:a0acfd4813d44adc' }"
 
     input:
     tuple val(meta), path(h5ad)

From be87838905b71bbb9e06f452945ab24bad8b0456 Mon Sep 17 00:00:00 2001
From: Nico Trummer <nictru32@gmail.com>
Date: Sun, 31 May 2026 08:51:25 +0200
Subject: [PATCH 13/19] Align SEURAT_INTEGRATION nf-test with pipeline QC gene
 filtering.

Run SCANPY_FILTER on raw counts before HVG selection and refresh integration and combat snapshots.
---
 .../scanpy/combat/tests/main.nf.test.snap     |  4 +--
 .../seurat/integration/tests/main.nf.test     | 27 +++++++++++++++----
 .../integration/tests/main.nf.test.snap       | 10 +++----
 3 files changed, 29 insertions(+), 12 deletions(-)

diff --git a/modules/local/scanpy/combat/tests/main.nf.test.snap b/modules/local/scanpy/combat/tests/main.nf.test.snap
index c3becdf6..67c00aa4 100644
--- a/modules/local/scanpy/combat/tests/main.nf.test.snap
+++ b/modules/local/scanpy/combat/tests/main.nf.test.snap
@@ -56,7 +56,7 @@
             },
             {
                 "n_obs": 38234,
-                "n_vars": 100,
+                "n_vars": 101,
                 "obs": {
                     "index": "_index",
                     "columns": [
@@ -95,7 +95,7 @@
                 ]
             }
         ],
-        "timestamp": "2026-05-11T12:33:40.280258286",
+        "timestamp": "2026-05-31T07:26:05.351659325",
         "meta": {
             "nf-test": "0.9.4",
             "nextflow": "26.04.0"
diff --git a/modules/local/seurat/integration/tests/main.nf.test b/modules/local/seurat/integration/tests/main.nf.test
index eac29227..8fac539d 100644
--- a/modules/local/seurat/integration/tests/main.nf.test
+++ b/modules/local/seurat/integration/tests/main.nf.test
@@ -8,8 +8,8 @@ nextflow_process {
     tag "modules_local"
 
     setup {
-        run("SCANPY_HVGS") {
-            script "modules/local/scanpy/hvgs/main.nf"
+        run("SCANPY_FILTER", alias: "QC_FILTER") {
+            script "modules/local/scanpy/filter/main.nf"
             process {
                 """
                 input[0] = channel.of([
@@ -17,12 +17,29 @@ nextflow_process {
                         file(params.modules_testdata_base_path + 'genomics/homo_sapiens/scrnaseq/h5ad/combined_filtered_matrix.h5ad', checkIfExists: true)
                     ]
                 )
+                input[1] = "index"
+                input[2] = 20
+                input[3] = 20
+                input[4] = 50
+                input[5] = 50
+                input[6] = 100
+                input[7] = 0
+                input[8] = 100
+                input[9] = []
+                """
+            }
+        }
+        run("SCANPY_HVGS") {
+            script "modules/local/scanpy/hvgs/main.nf"
+            process {
+                """
+                input[0] = QC_FILTER.out.h5ad
                 input[1] = 100
                 input[2] = []
                 """
             }
         }
-        run("SCANPY_FILTER") {
+        run("SCANPY_FILTER", alias: "HVG_FILTER") {
             script "modules/local/scanpy/filter/main.nf"
             process {
                 """
@@ -49,7 +66,7 @@ nextflow_process {
             }
             process {
                 """
-                input[0] = SCANPY_FILTER.out.h5ad
+                input[0] = HVG_FILTER.out.h5ad
                 input[1] = 'sample'
                 """
             }
@@ -79,7 +96,7 @@ nextflow_process {
             }
             process {
                 """
-                input[0] = SCANPY_FILTER.out.h5ad
+                input[0] = HVG_FILTER.out.h5ad
                 input[1] = 'sample'
                 """
             }
diff --git a/modules/local/seurat/integration/tests/main.nf.test.snap b/modules/local/seurat/integration/tests/main.nf.test.snap
index 1957b1a4..cb67cdc5 100644
--- a/modules/local/seurat/integration/tests/main.nf.test.snap
+++ b/modules/local/seurat/integration/tests/main.nf.test.snap
@@ -37,12 +37,12 @@
             {
                 "SEURAT_INTEGRATION": {
                     "R": "4.5.3",
-                    "Seurat": "5.4.0",
+                    "Seurat": "5.5.0",
                     "anndataR": "1.0.2"
                 }
             },
             {
-                "n_obs": 27350,
+                "n_obs": 12381,
                 "n_vars": 100,
                 "obs": {
                     "index": "_index",
@@ -99,10 +99,10 @@
                 ]
             }
         ],
-        "timestamp": "2026-04-12T20:15:34.917181",
+        "timestamp": "2026-05-31T08:44:01.475540778",
         "meta": {
-            "nf-test": "0.9.5",
-            "nextflow": "25.10.4"
+            "nf-test": "0.9.4",
+            "nextflow": "26.04.0"
         }
     }
 }
\ No newline at end of file

From 150d30c088f7a4a8a077e0d303a1846746350d98 Mon Sep 17 00:00:00 2001
From: Nico Trummer <nictru32@gmail.com>
Date: Sun, 31 May 2026 10:15:23 +0200
Subject: [PATCH 14/19] Update test snapshots

---
 .../prepcellxgene/tests/main.nf.test.snap     | 210 +++---------------
 .../splitembeddings/tests/main.nf.test.snap   | 203 ++---------------
 .../local/scanpy/hvgs/tests/main.nf.test.snap |   6 +-
 3 files changed, 54 insertions(+), 365 deletions(-)

diff --git a/modules/local/adata/prepcellxgene/tests/main.nf.test.snap b/modules/local/adata/prepcellxgene/tests/main.nf.test.snap
index e2982530..8fa47513 100644
--- a/modules/local/adata/prepcellxgene/tests/main.nf.test.snap
+++ b/modules/local/adata/prepcellxgene/tests/main.nf.test.snap
@@ -40,7 +40,7 @@
                         {
                             "id": "test"
                         },
-                        "test.h5ad:md5,e213f1b004bae37e440c83b3966890f3"
+                        "test.h5ad:md5,1ea9af3fd7a7908e99d6a0ec04f62b89"
                     ]
                 ],
                 "1": [
@@ -51,7 +51,7 @@
                         {
                             "id": "test"
                         },
-                        "test.h5ad:md5,e213f1b004bae37e440c83b3966890f3"
+                        "test.h5ad:md5,1ea9af3fd7a7908e99d6a0ec04f62b89"
                     ]
                 ],
                 "versions": [
@@ -67,193 +67,40 @@
                 }
             },
             {
-                "n_obs": 32135,
+                "n_obs": 23364,
                 "n_vars": 9887,
                 "obs": {
                     "index": "_index",
                     "columns": [
-                        "G2M_score",
-                        "S_score",
                         "batch",
                         "bbknn-global-0.5:entropy",
                         "bbknn-global-0.5_leiden",
                         "bbknn-global-1.0:entropy",
                         "bbknn-global-1.0_leiden",
-                        "celldex_hpca__2024.02.26_h5_se.tar.delta.next_hpca_direct",
-                        "celldex_hpca__2024.02.26_h5_se.tar.labels_hpca_direct",
-                        "celldex_hpca__2024.02.26_h5_se.tar.pruned.labels_hpca_direct",
-                        "celldex_hpca__2024.02.26_h5_se.tar.scores_hpca_direct.Astrocyte",
-                        "celldex_hpca__2024.02.26_h5_se.tar.scores_hpca_direct.BM",
-                        "celldex_hpca__2024.02.26_h5_se.tar.scores_hpca_direct.BM...Prog.",
-                        "celldex_hpca__2024.02.26_h5_se.tar.scores_hpca_direct.B_cell",
-                        "celldex_hpca__2024.02.26_h5_se.tar.scores_hpca_direct.CMP",
-                        "celldex_hpca__2024.02.26_h5_se.tar.scores_hpca_direct.Chondrocytes",
-                        "celldex_hpca__2024.02.26_h5_se.tar.scores_hpca_direct.DC",
-                        "celldex_hpca__2024.02.26_h5_se.tar.scores_hpca_direct.Embryonic_stem_cells",
-                        "celldex_hpca__2024.02.26_h5_se.tar.scores_hpca_direct.Endothelial_cells",
-                        "celldex_hpca__2024.02.26_h5_se.tar.scores_hpca_direct.Epithelial_cells",
-                        "celldex_hpca__2024.02.26_h5_se.tar.scores_hpca_direct.Erythroblast",
-                        "celldex_hpca__2024.02.26_h5_se.tar.scores_hpca_direct.Fibroblasts",
-                        "celldex_hpca__2024.02.26_h5_se.tar.scores_hpca_direct.GMP",
-                        "celldex_hpca__2024.02.26_h5_se.tar.scores_hpca_direct.Gametocytes",
-                        "celldex_hpca__2024.02.26_h5_se.tar.scores_hpca_direct.HSC_.G.CSF",
-                        "celldex_hpca__2024.02.26_h5_se.tar.scores_hpca_direct.HSC_CD34.",
-                        "celldex_hpca__2024.02.26_h5_se.tar.scores_hpca_direct.Hepatocytes",
-                        "celldex_hpca__2024.02.26_h5_se.tar.scores_hpca_direct.Keratinocytes",
-                        "celldex_hpca__2024.02.26_h5_se.tar.scores_hpca_direct.MEP",
-                        "celldex_hpca__2024.02.26_h5_se.tar.scores_hpca_direct.MSC",
-                        "celldex_hpca__2024.02.26_h5_se.tar.scores_hpca_direct.Macrophage",
-                        "celldex_hpca__2024.02.26_h5_se.tar.scores_hpca_direct.Monocyte",
-                        "celldex_hpca__2024.02.26_h5_se.tar.scores_hpca_direct.Myelocyte",
-                        "celldex_hpca__2024.02.26_h5_se.tar.scores_hpca_direct.NK_cell",
-                        "celldex_hpca__2024.02.26_h5_se.tar.scores_hpca_direct.Neuroepithelial_cell",
-                        "celldex_hpca__2024.02.26_h5_se.tar.scores_hpca_direct.Neurons",
-                        "celldex_hpca__2024.02.26_h5_se.tar.scores_hpca_direct.Neutrophils",
-                        "celldex_hpca__2024.02.26_h5_se.tar.scores_hpca_direct.Osteoblasts",
-                        "celldex_hpca__2024.02.26_h5_se.tar.scores_hpca_direct.Platelets",
-                        "celldex_hpca__2024.02.26_h5_se.tar.scores_hpca_direct.Pre.B_cell_CD34.",
-                        "celldex_hpca__2024.02.26_h5_se.tar.scores_hpca_direct.Pro.B_cell_CD34.",
-                        "celldex_hpca__2024.02.26_h5_se.tar.scores_hpca_direct.Pro.Myelocyte",
-                        "celldex_hpca__2024.02.26_h5_se.tar.scores_hpca_direct.Smooth_muscle_cells",
-                        "celldex_hpca__2024.02.26_h5_se.tar.scores_hpca_direct.T_cells",
-                        "celldex_hpca__2024.02.26_h5_se.tar.scores_hpca_direct.Tissue_stem_cells",
-                        "celldex_hpca__2024.02.26_h5_se.tar.scores_hpca_direct.iPS_cells",
-                        "celldex_monaco_immune__2024.02.26_h5_se.tar.delta.next_immune_direct",
-                        "celldex_monaco_immune__2024.02.26_h5_se.tar.labels_immune_direct",
-                        "celldex_monaco_immune__2024.02.26_h5_se.tar.pruned.labels_immune_direct",
-                        "celldex_monaco_immune__2024.02.26_h5_se.tar.scores_immune_direct.Central.memory.CD8.T.cells",
-                        "celldex_monaco_immune__2024.02.26_h5_se.tar.scores_immune_direct.Classical.monocytes",
-                        "celldex_monaco_immune__2024.02.26_h5_se.tar.scores_immune_direct.Effector.memory.CD8.T.cells",
-                        "celldex_monaco_immune__2024.02.26_h5_se.tar.scores_immune_direct.Exhausted.B.cells",
-                        "celldex_monaco_immune__2024.02.26_h5_se.tar.scores_immune_direct.Follicular.helper.T.cells",
-                        "celldex_monaco_immune__2024.02.26_h5_se.tar.scores_immune_direct.Intermediate.monocytes",
-                        "celldex_monaco_immune__2024.02.26_h5_se.tar.scores_immune_direct.Low.density.basophils",
-                        "celldex_monaco_immune__2024.02.26_h5_se.tar.scores_immune_direct.Low.density.neutrophils",
-                        "celldex_monaco_immune__2024.02.26_h5_se.tar.scores_immune_direct.MAIT.cells",
-                        "celldex_monaco_immune__2024.02.26_h5_se.tar.scores_immune_direct.Myeloid.dendritic.cells",
-                        "celldex_monaco_immune__2024.02.26_h5_se.tar.scores_immune_direct.Naive.B.cells",
-                        "celldex_monaco_immune__2024.02.26_h5_se.tar.scores_immune_direct.Naive.CD4.T.cells",
-                        "celldex_monaco_immune__2024.02.26_h5_se.tar.scores_immune_direct.Naive.CD8.T.cells",
-                        "celldex_monaco_immune__2024.02.26_h5_se.tar.scores_immune_direct.Natural.killer.cells",
-                        "celldex_monaco_immune__2024.02.26_h5_se.tar.scores_immune_direct.Non.Vd2.gd.T.cells",
-                        "celldex_monaco_immune__2024.02.26_h5_se.tar.scores_immune_direct.Non.classical.monocytes",
-                        "celldex_monaco_immune__2024.02.26_h5_se.tar.scores_immune_direct.Non.switched.memory.B.cells",
-                        "celldex_monaco_immune__2024.02.26_h5_se.tar.scores_immune_direct.Plasmablasts",
-                        "celldex_monaco_immune__2024.02.26_h5_se.tar.scores_immune_direct.Plasmacytoid.dendritic.cells",
-                        "celldex_monaco_immune__2024.02.26_h5_se.tar.scores_immune_direct.Progenitor.cells",
-                        "celldex_monaco_immune__2024.02.26_h5_se.tar.scores_immune_direct.Switched.memory.B.cells",
-                        "celldex_monaco_immune__2024.02.26_h5_se.tar.scores_immune_direct.T.regulatory.cells",
-                        "celldex_monaco_immune__2024.02.26_h5_se.tar.scores_immune_direct.Terminal.effector.CD4.T.cells",
-                        "celldex_monaco_immune__2024.02.26_h5_se.tar.scores_immune_direct.Terminal.effector.CD8.T.cells",
-                        "celldex_monaco_immune__2024.02.26_h5_se.tar.scores_immune_direct.Th1.Th17.cells",
-                        "celldex_monaco_immune__2024.02.26_h5_se.tar.scores_immune_direct.Th1.cells",
-                        "celldex_monaco_immune__2024.02.26_h5_se.tar.scores_immune_direct.Th17.cells",
-                        "celldex_monaco_immune__2024.02.26_h5_se.tar.scores_immune_direct.Th2.cells",
-                        "celldex_monaco_immune__2024.02.26_h5_se.tar.scores_immune_direct.Vd2.gd.T.cells",
-                        "celltypist:Adult_COVID19_PBMC",
-                        "celltypist:Adult_COVID19_PBMC:conf",
+                        "celltypist:Adult_Human_Skin",
+                        "celltypist:Adult_Human_Skin:conf",
                         "combat-global-0.5:entropy",
                         "combat-global-0.5_leiden",
                         "combat-global-1.0:entropy",
                         "combat-global-1.0_leiden",
                         "condition",
-                        "hpca_celldex.tar.delta.next_hpca_celldex",
-                        "hpca_celldex.tar.labels_hpca_celldex",
-                        "hpca_celldex.tar.pruned.labels_hpca_celldex",
-                        "hpca_celldex.tar.scores_hpca_celldex.Astrocyte",
-                        "hpca_celldex.tar.scores_hpca_celldex.BM",
-                        "hpca_celldex.tar.scores_hpca_celldex.BM...Prog.",
-                        "hpca_celldex.tar.scores_hpca_celldex.B_cell",
-                        "hpca_celldex.tar.scores_hpca_celldex.CMP",
-                        "hpca_celldex.tar.scores_hpca_celldex.Chondrocytes",
-                        "hpca_celldex.tar.scores_hpca_celldex.DC",
-                        "hpca_celldex.tar.scores_hpca_celldex.Embryonic_stem_cells",
-                        "hpca_celldex.tar.scores_hpca_celldex.Endothelial_cells",
-                        "hpca_celldex.tar.scores_hpca_celldex.Epithelial_cells",
-                        "hpca_celldex.tar.scores_hpca_celldex.Erythroblast",
-                        "hpca_celldex.tar.scores_hpca_celldex.Fibroblasts",
-                        "hpca_celldex.tar.scores_hpca_celldex.GMP",
-                        "hpca_celldex.tar.scores_hpca_celldex.Gametocytes",
-                        "hpca_celldex.tar.scores_hpca_celldex.HSC_.G.CSF",
-                        "hpca_celldex.tar.scores_hpca_celldex.HSC_CD34.",
-                        "hpca_celldex.tar.scores_hpca_celldex.Hepatocytes",
-                        "hpca_celldex.tar.scores_hpca_celldex.Keratinocytes",
-                        "hpca_celldex.tar.scores_hpca_celldex.MEP",
-                        "hpca_celldex.tar.scores_hpca_celldex.MSC",
-                        "hpca_celldex.tar.scores_hpca_celldex.Macrophage",
-                        "hpca_celldex.tar.scores_hpca_celldex.Monocyte",
-                        "hpca_celldex.tar.scores_hpca_celldex.Myelocyte",
-                        "hpca_celldex.tar.scores_hpca_celldex.NK_cell",
-                        "hpca_celldex.tar.scores_hpca_celldex.Neuroepithelial_cell",
-                        "hpca_celldex.tar.scores_hpca_celldex.Neurons",
-                        "hpca_celldex.tar.scores_hpca_celldex.Neutrophils",
-                        "hpca_celldex.tar.scores_hpca_celldex.Osteoblasts",
-                        "hpca_celldex.tar.scores_hpca_celldex.Platelets",
-                        "hpca_celldex.tar.scores_hpca_celldex.Pre.B_cell_CD34.",
-                        "hpca_celldex.tar.scores_hpca_celldex.Pro.B_cell_CD34.",
-                        "hpca_celldex.tar.scores_hpca_celldex.Pro.Myelocyte",
-                        "hpca_celldex.tar.scores_hpca_celldex.Smooth_muscle_cells",
-                        "hpca_celldex.tar.scores_hpca_celldex.T_cells",
-                        "hpca_celldex.tar.scores_hpca_celldex.Tissue_stem_cells",
-                        "hpca_celldex.tar.scores_hpca_celldex.iPS_cells",
-                        "immune_celldex.tar.delta.next_immune_celldex",
-                        "immune_celldex.tar.labels_immune_celldex",
-                        "immune_celldex.tar.pruned.labels_immune_celldex",
-                        "immune_celldex.tar.scores_immune_celldex.Central.memory.CD8.T.cells",
-                        "immune_celldex.tar.scores_immune_celldex.Classical.monocytes",
-                        "immune_celldex.tar.scores_immune_celldex.Effector.memory.CD8.T.cells",
-                        "immune_celldex.tar.scores_immune_celldex.Exhausted.B.cells",
-                        "immune_celldex.tar.scores_immune_celldex.Follicular.helper.T.cells",
-                        "immune_celldex.tar.scores_immune_celldex.Intermediate.monocytes",
-                        "immune_celldex.tar.scores_immune_celldex.Low.density.basophils",
-                        "immune_celldex.tar.scores_immune_celldex.Low.density.neutrophils",
-                        "immune_celldex.tar.scores_immune_celldex.MAIT.cells",
-                        "immune_celldex.tar.scores_immune_celldex.Myeloid.dendritic.cells",
-                        "immune_celldex.tar.scores_immune_celldex.Naive.B.cells",
-                        "immune_celldex.tar.scores_immune_celldex.Naive.CD4.T.cells",
-                        "immune_celldex.tar.scores_immune_celldex.Naive.CD8.T.cells",
-                        "immune_celldex.tar.scores_immune_celldex.Natural.killer.cells",
-                        "immune_celldex.tar.scores_immune_celldex.Non.Vd2.gd.T.cells",
-                        "immune_celldex.tar.scores_immune_celldex.Non.classical.monocytes",
-                        "immune_celldex.tar.scores_immune_celldex.Non.switched.memory.B.cells",
-                        "immune_celldex.tar.scores_immune_celldex.Plasmablasts",
-                        "immune_celldex.tar.scores_immune_celldex.Plasmacytoid.dendritic.cells",
-                        "immune_celldex.tar.scores_immune_celldex.Progenitor.cells",
-                        "immune_celldex.tar.scores_immune_celldex.Switched.memory.B.cells",
-                        "immune_celldex.tar.scores_immune_celldex.T.regulatory.cells",
-                        "immune_celldex.tar.scores_immune_celldex.Terminal.effector.CD4.T.cells",
-                        "immune_celldex.tar.scores_immune_celldex.Terminal.effector.CD8.T.cells",
-                        "immune_celldex.tar.scores_immune_celldex.Th1.Th17.cells",
-                        "immune_celldex.tar.scores_immune_celldex.Th1.cells",
-                        "immune_celldex.tar.scores_immune_celldex.Th17.cells",
-                        "immune_celldex.tar.scores_immune_celldex.Th2.cells",
-                        "immune_celldex.tar.scores_immune_celldex.Vd2.gd.T.cells",
+                        "harmony-global-0.5:entropy",
+                        "harmony-global-0.5_leiden",
+                        "harmony-global-1.0:entropy",
+                        "harmony-global-1.0_leiden",
                         "label",
                         "n_counts",
                         "n_genes",
                         "n_genes_by_counts",
-                        "pct_counts_hb",
                         "pct_counts_mt",
-                        "pct_counts_ribo",
-                        "phase",
                         "sample",
                         "sample_original",
                         "scvi-global-0.5:entropy",
                         "scvi-global-0.5_leiden",
                         "scvi-global-1.0:entropy",
                         "scvi-global-1.0_leiden",
-                        "seurat-global-0.5:entropy",
-                        "seurat-global-0.5_leiden",
-                        "seurat-global-1.0:entropy",
-                        "seurat-global-1.0_leiden",
-                        "symphony-global-0.5:entropy",
-                        "symphony-global-0.5_leiden",
-                        "symphony-global-1.0:entropy",
-                        "symphony-global-1.0_leiden",
                         "total_counts",
-                        "total_counts_hb",
-                        "total_counts_mt",
-                        "total_counts_ribo"
+                        "total_counts_mt"
                     ]
                 },
                 "var": {
@@ -268,12 +115,11 @@
                 "obsm": [
                     "X_bbknn-global_umap",
                     "X_combat-global_umap",
+                    "X_harmony",
+                    "X_harmony-global_umap",
                     "X_scvi-global_umap",
-                    "X_seurat-global_umap",
-                    "X_symphony-global_umap",
                     "combat",
-                    "scvi",
-                    "symphony"
+                    "scvi"
                 ],
                 "varm": [
                     
@@ -285,31 +131,29 @@
                     
                 ],
                 "uns": [
-                    "bbknn-global-0.5_leiden_characteristic_genes",
+                    "bbknn-global-0.5_characteristic_genes",
                     "bbknn-global-0.5_paga",
-                    "bbknn-global-1.0_leiden_characteristic_genes",
+                    "bbknn-global-1.0_characteristic_genes",
                     "bbknn-global-1.0_paga",
-                    "combat-global-0.5_leiden_characteristic_genes",
+                    "combat-global-0.5_characteristic_genes",
+                    "combat-global-0.5_liana",
                     "combat-global-0.5_paga",
-                    "combat-global-1.0_leiden_characteristic_genes",
+                    "combat-global-1.0_characteristic_genes",
+                    "combat-global-1.0_liana",
                     "combat-global-1.0_paga",
+                    "harmony-global-0.5_characteristic_genes",
+                    "harmony-global-0.5_paga",
+                    "harmony-global-1.0_characteristic_genes",
+                    "harmony-global-1.0_paga",
                     "log1p",
-                    "scvi-global-0.5_leiden_characteristic_genes",
+                    "scvi-global-0.5_characteristic_genes",
                     "scvi-global-0.5_paga",
-                    "scvi-global-1.0_leiden_characteristic_genes",
-                    "scvi-global-1.0_paga",
-                    "seurat-global-0.5_leiden_characteristic_genes",
-                    "seurat-global-0.5_paga",
-                    "seurat-global-1.0_leiden_characteristic_genes",
-                    "seurat-global-1.0_paga",
-                    "symphony-global-0.5_leiden_characteristic_genes",
-                    "symphony-global-0.5_paga",
-                    "symphony-global-1.0_leiden_characteristic_genes",
-                    "symphony-global-1.0_paga"
+                    "scvi-global-1.0_characteristic_genes",
+                    "scvi-global-1.0_paga"
                 ]
             }
         ],
-        "timestamp": "2026-05-29T11:36:58.839746387",
+        "timestamp": "2026-05-31T10:12:36.828571877",
         "meta": {
             "nf-test": "0.9.4",
             "nextflow": "26.04.0"
diff --git a/modules/local/adata/splitembeddings/tests/main.nf.test.snap b/modules/local/adata/splitembeddings/tests/main.nf.test.snap
index 1da9eb04..7d87289d 100644
--- a/modules/local/adata/splitembeddings/tests/main.nf.test.snap
+++ b/modules/local/adata/splitembeddings/tests/main.nf.test.snap
@@ -40,7 +40,7 @@
                         {
                             "id": "test"
                         },
-                        "scvi.h5ad:md5,70a63fb030713420c635aab523701691"
+                        "scvi.h5ad:md5,41c46e638fbd817665eca0ce1921585e"
                     ]
                 ],
                 "1": [
@@ -51,7 +51,7 @@
                         {
                             "id": "test"
                         },
-                        "scvi.h5ad:md5,70a63fb030713420c635aab523701691"
+                        "scvi.h5ad:md5,41c46e638fbd817665eca0ce1921585e"
                     ]
                 ],
                 "versions": [
@@ -65,193 +65,40 @@
                 }
             },
             {
-                "n_obs": 32135,
+                "n_obs": 23364,
                 "n_vars": 9887,
                 "obs": {
                     "index": "_index",
                     "columns": [
-                        "G2M_score",
-                        "S_score",
                         "batch",
                         "bbknn-global-0.5:entropy",
                         "bbknn-global-0.5_leiden",
                         "bbknn-global-1.0:entropy",
                         "bbknn-global-1.0_leiden",
-                        "celldex_hpca__2024.02.26_h5_se.tar.delta.next_hpca_direct",
-                        "celldex_hpca__2024.02.26_h5_se.tar.labels_hpca_direct",
-                        "celldex_hpca__2024.02.26_h5_se.tar.pruned.labels_hpca_direct",
-                        "celldex_hpca__2024.02.26_h5_se.tar.scores_hpca_direct.Astrocyte",
-                        "celldex_hpca__2024.02.26_h5_se.tar.scores_hpca_direct.BM",
-                        "celldex_hpca__2024.02.26_h5_se.tar.scores_hpca_direct.BM...Prog.",
-                        "celldex_hpca__2024.02.26_h5_se.tar.scores_hpca_direct.B_cell",
-                        "celldex_hpca__2024.02.26_h5_se.tar.scores_hpca_direct.CMP",
-                        "celldex_hpca__2024.02.26_h5_se.tar.scores_hpca_direct.Chondrocytes",
-                        "celldex_hpca__2024.02.26_h5_se.tar.scores_hpca_direct.DC",
-                        "celldex_hpca__2024.02.26_h5_se.tar.scores_hpca_direct.Embryonic_stem_cells",
-                        "celldex_hpca__2024.02.26_h5_se.tar.scores_hpca_direct.Endothelial_cells",
-                        "celldex_hpca__2024.02.26_h5_se.tar.scores_hpca_direct.Epithelial_cells",
-                        "celldex_hpca__2024.02.26_h5_se.tar.scores_hpca_direct.Erythroblast",
-                        "celldex_hpca__2024.02.26_h5_se.tar.scores_hpca_direct.Fibroblasts",
-                        "celldex_hpca__2024.02.26_h5_se.tar.scores_hpca_direct.GMP",
-                        "celldex_hpca__2024.02.26_h5_se.tar.scores_hpca_direct.Gametocytes",
-                        "celldex_hpca__2024.02.26_h5_se.tar.scores_hpca_direct.HSC_.G.CSF",
-                        "celldex_hpca__2024.02.26_h5_se.tar.scores_hpca_direct.HSC_CD34.",
-                        "celldex_hpca__2024.02.26_h5_se.tar.scores_hpca_direct.Hepatocytes",
-                        "celldex_hpca__2024.02.26_h5_se.tar.scores_hpca_direct.Keratinocytes",
-                        "celldex_hpca__2024.02.26_h5_se.tar.scores_hpca_direct.MEP",
-                        "celldex_hpca__2024.02.26_h5_se.tar.scores_hpca_direct.MSC",
-                        "celldex_hpca__2024.02.26_h5_se.tar.scores_hpca_direct.Macrophage",
-                        "celldex_hpca__2024.02.26_h5_se.tar.scores_hpca_direct.Monocyte",
-                        "celldex_hpca__2024.02.26_h5_se.tar.scores_hpca_direct.Myelocyte",
-                        "celldex_hpca__2024.02.26_h5_se.tar.scores_hpca_direct.NK_cell",
-                        "celldex_hpca__2024.02.26_h5_se.tar.scores_hpca_direct.Neuroepithelial_cell",
-                        "celldex_hpca__2024.02.26_h5_se.tar.scores_hpca_direct.Neurons",
-                        "celldex_hpca__2024.02.26_h5_se.tar.scores_hpca_direct.Neutrophils",
-                        "celldex_hpca__2024.02.26_h5_se.tar.scores_hpca_direct.Osteoblasts",
-                        "celldex_hpca__2024.02.26_h5_se.tar.scores_hpca_direct.Platelets",
-                        "celldex_hpca__2024.02.26_h5_se.tar.scores_hpca_direct.Pre.B_cell_CD34.",
-                        "celldex_hpca__2024.02.26_h5_se.tar.scores_hpca_direct.Pro.B_cell_CD34.",
-                        "celldex_hpca__2024.02.26_h5_se.tar.scores_hpca_direct.Pro.Myelocyte",
-                        "celldex_hpca__2024.02.26_h5_se.tar.scores_hpca_direct.Smooth_muscle_cells",
-                        "celldex_hpca__2024.02.26_h5_se.tar.scores_hpca_direct.T_cells",
-                        "celldex_hpca__2024.02.26_h5_se.tar.scores_hpca_direct.Tissue_stem_cells",
-                        "celldex_hpca__2024.02.26_h5_se.tar.scores_hpca_direct.iPS_cells",
-                        "celldex_monaco_immune__2024.02.26_h5_se.tar.delta.next_immune_direct",
-                        "celldex_monaco_immune__2024.02.26_h5_se.tar.labels_immune_direct",
-                        "celldex_monaco_immune__2024.02.26_h5_se.tar.pruned.labels_immune_direct",
-                        "celldex_monaco_immune__2024.02.26_h5_se.tar.scores_immune_direct.Central.memory.CD8.T.cells",
-                        "celldex_monaco_immune__2024.02.26_h5_se.tar.scores_immune_direct.Classical.monocytes",
-                        "celldex_monaco_immune__2024.02.26_h5_se.tar.scores_immune_direct.Effector.memory.CD8.T.cells",
-                        "celldex_monaco_immune__2024.02.26_h5_se.tar.scores_immune_direct.Exhausted.B.cells",
-                        "celldex_monaco_immune__2024.02.26_h5_se.tar.scores_immune_direct.Follicular.helper.T.cells",
-                        "celldex_monaco_immune__2024.02.26_h5_se.tar.scores_immune_direct.Intermediate.monocytes",
-                        "celldex_monaco_immune__2024.02.26_h5_se.tar.scores_immune_direct.Low.density.basophils",
-                        "celldex_monaco_immune__2024.02.26_h5_se.tar.scores_immune_direct.Low.density.neutrophils",
-                        "celldex_monaco_immune__2024.02.26_h5_se.tar.scores_immune_direct.MAIT.cells",
-                        "celldex_monaco_immune__2024.02.26_h5_se.tar.scores_immune_direct.Myeloid.dendritic.cells",
-                        "celldex_monaco_immune__2024.02.26_h5_se.tar.scores_immune_direct.Naive.B.cells",
-                        "celldex_monaco_immune__2024.02.26_h5_se.tar.scores_immune_direct.Naive.CD4.T.cells",
-                        "celldex_monaco_immune__2024.02.26_h5_se.tar.scores_immune_direct.Naive.CD8.T.cells",
-                        "celldex_monaco_immune__2024.02.26_h5_se.tar.scores_immune_direct.Natural.killer.cells",
-                        "celldex_monaco_immune__2024.02.26_h5_se.tar.scores_immune_direct.Non.Vd2.gd.T.cells",
-                        "celldex_monaco_immune__2024.02.26_h5_se.tar.scores_immune_direct.Non.classical.monocytes",
-                        "celldex_monaco_immune__2024.02.26_h5_se.tar.scores_immune_direct.Non.switched.memory.B.cells",
-                        "celldex_monaco_immune__2024.02.26_h5_se.tar.scores_immune_direct.Plasmablasts",
-                        "celldex_monaco_immune__2024.02.26_h5_se.tar.scores_immune_direct.Plasmacytoid.dendritic.cells",
-                        "celldex_monaco_immune__2024.02.26_h5_se.tar.scores_immune_direct.Progenitor.cells",
-                        "celldex_monaco_immune__2024.02.26_h5_se.tar.scores_immune_direct.Switched.memory.B.cells",
-                        "celldex_monaco_immune__2024.02.26_h5_se.tar.scores_immune_direct.T.regulatory.cells",
-                        "celldex_monaco_immune__2024.02.26_h5_se.tar.scores_immune_direct.Terminal.effector.CD4.T.cells",
-                        "celldex_monaco_immune__2024.02.26_h5_se.tar.scores_immune_direct.Terminal.effector.CD8.T.cells",
-                        "celldex_monaco_immune__2024.02.26_h5_se.tar.scores_immune_direct.Th1.Th17.cells",
-                        "celldex_monaco_immune__2024.02.26_h5_se.tar.scores_immune_direct.Th1.cells",
-                        "celldex_monaco_immune__2024.02.26_h5_se.tar.scores_immune_direct.Th17.cells",
-                        "celldex_monaco_immune__2024.02.26_h5_se.tar.scores_immune_direct.Th2.cells",
-                        "celldex_monaco_immune__2024.02.26_h5_se.tar.scores_immune_direct.Vd2.gd.T.cells",
-                        "celltypist:Adult_COVID19_PBMC",
-                        "celltypist:Adult_COVID19_PBMC:conf",
+                        "celltypist:Adult_Human_Skin",
+                        "celltypist:Adult_Human_Skin:conf",
                         "combat-global-0.5:entropy",
                         "combat-global-0.5_leiden",
                         "combat-global-1.0:entropy",
                         "combat-global-1.0_leiden",
                         "condition",
-                        "hpca_celldex.tar.delta.next_hpca_celldex",
-                        "hpca_celldex.tar.labels_hpca_celldex",
-                        "hpca_celldex.tar.pruned.labels_hpca_celldex",
-                        "hpca_celldex.tar.scores_hpca_celldex.Astrocyte",
-                        "hpca_celldex.tar.scores_hpca_celldex.BM",
-                        "hpca_celldex.tar.scores_hpca_celldex.BM...Prog.",
-                        "hpca_celldex.tar.scores_hpca_celldex.B_cell",
-                        "hpca_celldex.tar.scores_hpca_celldex.CMP",
-                        "hpca_celldex.tar.scores_hpca_celldex.Chondrocytes",
-                        "hpca_celldex.tar.scores_hpca_celldex.DC",
-                        "hpca_celldex.tar.scores_hpca_celldex.Embryonic_stem_cells",
-                        "hpca_celldex.tar.scores_hpca_celldex.Endothelial_cells",
-                        "hpca_celldex.tar.scores_hpca_celldex.Epithelial_cells",
-                        "hpca_celldex.tar.scores_hpca_celldex.Erythroblast",
-                        "hpca_celldex.tar.scores_hpca_celldex.Fibroblasts",
-                        "hpca_celldex.tar.scores_hpca_celldex.GMP",
-                        "hpca_celldex.tar.scores_hpca_celldex.Gametocytes",
-                        "hpca_celldex.tar.scores_hpca_celldex.HSC_.G.CSF",
-                        "hpca_celldex.tar.scores_hpca_celldex.HSC_CD34.",
-                        "hpca_celldex.tar.scores_hpca_celldex.Hepatocytes",
-                        "hpca_celldex.tar.scores_hpca_celldex.Keratinocytes",
-                        "hpca_celldex.tar.scores_hpca_celldex.MEP",
-                        "hpca_celldex.tar.scores_hpca_celldex.MSC",
-                        "hpca_celldex.tar.scores_hpca_celldex.Macrophage",
-                        "hpca_celldex.tar.scores_hpca_celldex.Monocyte",
-                        "hpca_celldex.tar.scores_hpca_celldex.Myelocyte",
-                        "hpca_celldex.tar.scores_hpca_celldex.NK_cell",
-                        "hpca_celldex.tar.scores_hpca_celldex.Neuroepithelial_cell",
-                        "hpca_celldex.tar.scores_hpca_celldex.Neurons",
-                        "hpca_celldex.tar.scores_hpca_celldex.Neutrophils",
-                        "hpca_celldex.tar.scores_hpca_celldex.Osteoblasts",
-                        "hpca_celldex.tar.scores_hpca_celldex.Platelets",
-                        "hpca_celldex.tar.scores_hpca_celldex.Pre.B_cell_CD34.",
-                        "hpca_celldex.tar.scores_hpca_celldex.Pro.B_cell_CD34.",
-                        "hpca_celldex.tar.scores_hpca_celldex.Pro.Myelocyte",
-                        "hpca_celldex.tar.scores_hpca_celldex.Smooth_muscle_cells",
-                        "hpca_celldex.tar.scores_hpca_celldex.T_cells",
-                        "hpca_celldex.tar.scores_hpca_celldex.Tissue_stem_cells",
-                        "hpca_celldex.tar.scores_hpca_celldex.iPS_cells",
-                        "immune_celldex.tar.delta.next_immune_celldex",
-                        "immune_celldex.tar.labels_immune_celldex",
-                        "immune_celldex.tar.pruned.labels_immune_celldex",
-                        "immune_celldex.tar.scores_immune_celldex.Central.memory.CD8.T.cells",
-                        "immune_celldex.tar.scores_immune_celldex.Classical.monocytes",
-                        "immune_celldex.tar.scores_immune_celldex.Effector.memory.CD8.T.cells",
-                        "immune_celldex.tar.scores_immune_celldex.Exhausted.B.cells",
-                        "immune_celldex.tar.scores_immune_celldex.Follicular.helper.T.cells",
-                        "immune_celldex.tar.scores_immune_celldex.Intermediate.monocytes",
-                        "immune_celldex.tar.scores_immune_celldex.Low.density.basophils",
-                        "immune_celldex.tar.scores_immune_celldex.Low.density.neutrophils",
-                        "immune_celldex.tar.scores_immune_celldex.MAIT.cells",
-                        "immune_celldex.tar.scores_immune_celldex.Myeloid.dendritic.cells",
-                        "immune_celldex.tar.scores_immune_celldex.Naive.B.cells",
-                        "immune_celldex.tar.scores_immune_celldex.Naive.CD4.T.cells",
-                        "immune_celldex.tar.scores_immune_celldex.Naive.CD8.T.cells",
-                        "immune_celldex.tar.scores_immune_celldex.Natural.killer.cells",
-                        "immune_celldex.tar.scores_immune_celldex.Non.Vd2.gd.T.cells",
-                        "immune_celldex.tar.scores_immune_celldex.Non.classical.monocytes",
-                        "immune_celldex.tar.scores_immune_celldex.Non.switched.memory.B.cells",
-                        "immune_celldex.tar.scores_immune_celldex.Plasmablasts",
-                        "immune_celldex.tar.scores_immune_celldex.Plasmacytoid.dendritic.cells",
-                        "immune_celldex.tar.scores_immune_celldex.Progenitor.cells",
-                        "immune_celldex.tar.scores_immune_celldex.Switched.memory.B.cells",
-                        "immune_celldex.tar.scores_immune_celldex.T.regulatory.cells",
-                        "immune_celldex.tar.scores_immune_celldex.Terminal.effector.CD4.T.cells",
-                        "immune_celldex.tar.scores_immune_celldex.Terminal.effector.CD8.T.cells",
-                        "immune_celldex.tar.scores_immune_celldex.Th1.Th17.cells",
-                        "immune_celldex.tar.scores_immune_celldex.Th1.cells",
-                        "immune_celldex.tar.scores_immune_celldex.Th17.cells",
-                        "immune_celldex.tar.scores_immune_celldex.Th2.cells",
-                        "immune_celldex.tar.scores_immune_celldex.Vd2.gd.T.cells",
+                        "harmony-global-0.5:entropy",
+                        "harmony-global-0.5_leiden",
+                        "harmony-global-1.0:entropy",
+                        "harmony-global-1.0_leiden",
                         "label",
                         "n_counts",
                         "n_genes",
                         "n_genes_by_counts",
-                        "pct_counts_hb",
                         "pct_counts_mt",
-                        "pct_counts_ribo",
-                        "phase",
                         "sample",
                         "sample_original",
                         "scvi-global-0.5:entropy",
                         "scvi-global-0.5_leiden",
                         "scvi-global-1.0:entropy",
                         "scvi-global-1.0_leiden",
-                        "seurat-global-0.5:entropy",
-                        "seurat-global-0.5_leiden",
-                        "seurat-global-1.0:entropy",
-                        "seurat-global-1.0_leiden",
-                        "symphony-global-0.5:entropy",
-                        "symphony-global-0.5_leiden",
-                        "symphony-global-1.0:entropy",
-                        "symphony-global-1.0_leiden",
                         "total_counts",
-                        "total_counts_hb",
-                        "total_counts_mt",
-                        "total_counts_ribo"
+                        "total_counts_mt"
                     ]
                 },
                 "var": {
@@ -276,30 +123,28 @@
                     
                 ],
                 "uns": [
-                    "bbknn-global-0.5_leiden_characteristic_genes",
+                    "bbknn-global-0.5_characteristic_genes",
                     "bbknn-global-0.5_paga",
-                    "bbknn-global-1.0_leiden_characteristic_genes",
+                    "bbknn-global-1.0_characteristic_genes",
                     "bbknn-global-1.0_paga",
-                    "combat-global-0.5_leiden_characteristic_genes",
+                    "combat-global-0.5_characteristic_genes",
+                    "combat-global-0.5_liana",
                     "combat-global-0.5_paga",
-                    "combat-global-1.0_leiden_characteristic_genes",
+                    "combat-global-1.0_characteristic_genes",
+                    "combat-global-1.0_liana",
                     "combat-global-1.0_paga",
-                    "scvi-global-0.5_leiden_characteristic_genes",
+                    "harmony-global-0.5_characteristic_genes",
+                    "harmony-global-0.5_paga",
+                    "harmony-global-1.0_characteristic_genes",
+                    "harmony-global-1.0_paga",
+                    "scvi-global-0.5_characteristic_genes",
                     "scvi-global-0.5_paga",
-                    "scvi-global-1.0_leiden_characteristic_genes",
-                    "scvi-global-1.0_paga",
-                    "seurat-global-0.5_leiden_characteristic_genes",
-                    "seurat-global-0.5_paga",
-                    "seurat-global-1.0_leiden_characteristic_genes",
-                    "seurat-global-1.0_paga",
-                    "symphony-global-0.5_leiden_characteristic_genes",
-                    "symphony-global-0.5_paga",
-                    "symphony-global-1.0_leiden_characteristic_genes",
-                    "symphony-global-1.0_paga"
+                    "scvi-global-1.0_characteristic_genes",
+                    "scvi-global-1.0_paga"
                 ]
             }
         ],
-        "timestamp": "2026-05-29T11:36:58.752520992",
+        "timestamp": "2026-05-31T10:09:07.090064015",
         "meta": {
             "nf-test": "0.9.4",
             "nextflow": "26.04.0"
diff --git a/modules/local/scanpy/hvgs/tests/main.nf.test.snap b/modules/local/scanpy/hvgs/tests/main.nf.test.snap
index e8a736f2..e6dfc237 100644
--- a/modules/local/scanpy/hvgs/tests/main.nf.test.snap
+++ b/modules/local/scanpy/hvgs/tests/main.nf.test.snap
@@ -154,7 +154,7 @@
             },
             {
                 "n_obs": 38234,
-                "n_vars": 100,
+                "n_vars": 101,
                 "obs": {
                     "index": "_index",
                     "columns": [
@@ -191,10 +191,10 @@
                 ]
             }
         ],
-        "timestamp": "2026-03-29T11:19:17.695541068",
+        "timestamp": "2026-05-31T10:13:07.459491579",
         "meta": {
             "nf-test": "0.9.4",
-            "nextflow": "25.10.2"
+            "nextflow": "26.04.0"
         }
     }
 }
\ No newline at end of file

From b7488c78e0c8754deeba8303f269ada2c1aea9b2 Mon Sep 17 00:00:00 2001
From: Nico Trummer <nictru32@gmail.com>
Date: Sun, 31 May 2026 13:07:07 +0200
Subject: [PATCH 15/19] Update pipeline-level test snapshot

---
 tests/main_pipeline_build.nf.test.snap | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tests/main_pipeline_build.nf.test.snap b/tests/main_pipeline_build.nf.test.snap
index 77005b00..f7cb9baa 100644
--- a/tests/main_pipeline_build.nf.test.snap
+++ b/tests/main_pipeline_build.nf.test.snap
@@ -165,7 +165,7 @@
                 },
                 "SEURAT_INTEGRATION": {
                     "R": "4.5.3",
-                    "Seurat": "5.4.0",
+                    "Seurat": "5.5.0",
                     "anndataR": "1.0.2"
                 },
                 "SYMPHONY_HARMONYINTEGRATE": {
@@ -318,7 +318,7 @@
                 "qc-report.qmd:md5,13061014a897b3fbdafd6ea3212df0e0"
             ]
         ],
-        "timestamp": "2026-05-28T17:30:10.040067606",
+        "timestamp": "2026-05-31T12:35:21.295737307",
         "meta": {
             "nf-test": "0.9.4",
             "nextflow": "26.04.0"

From 3f1d4e76337c5523c5dacdf7fca233186630f46c Mon Sep 17 00:00:00 2001
From: Nico Trummer <nictru32@gmail.com>
Date: Sun, 31 May 2026 18:59:35 +0200
Subject: [PATCH 16/19] Improve mygene.info API failure messages in
 ADATA_MYGENE.

Surface actionable context when mygene.info returns server or network errors instead of a raw httpx traceback.
---
 .../local/adata/mygene/templates/mygene.py    | 31 +++++++++++++++++--
 1 file changed, 28 insertions(+), 3 deletions(-)

diff --git a/modules/local/adata/mygene/templates/mygene.py b/modules/local/adata/mygene/templates/mygene.py
index e9050d91..ac7cbed9 100644
--- a/modules/local/adata/mygene/templates/mygene.py
+++ b/modules/local/adata/mygene/templates/mygene.py
@@ -6,6 +6,7 @@
 os.environ["NUMBA_CACHE_DIR"] = "./tmp/numba"
 
 import anndata as ad
+import httpx
 import mygene
 import yaml
 
@@ -21,9 +22,33 @@
 )
 
 mg = mygene.MyGeneInfo()
-df_genes = mg.querymany(inputs,
-    scopes=["symbol", "entrezgene", "ensemblgene"],
-    fields="symbol", species="human", as_dataframe=True)
+try:
+    df_genes = mg.querymany(
+        inputs,
+        scopes=["symbol", "entrezgene", "ensemblgene"],
+        fields="symbol",
+        species="human",
+        as_dataframe=True,
+    )
+except httpx.HTTPStatusError as exc:
+    status = exc.response.status_code
+    if status >= 500:
+        raise RuntimeError(
+            f"mygene.info returned HTTP {status} (server error) while mapping "
+            f"{len(inputs)} gene identifiers from var[{input_col!r}]. "
+            "The mygene.info API is temporarily unavailable or overloaded — "
+            "this is not caused by your input data. Re-run this process; "
+            "if it keeps failing, check https://mygene.info or try again later."
+        ) from exc
+    raise RuntimeError(
+        f"mygene.info returned HTTP {status} while mapping "
+        f"{len(inputs)} gene identifiers from var[{input_col!r}]."
+    ) from exc
+except httpx.RequestError as exc:
+    raise RuntimeError(
+        f"Could not reach mygene.info while mapping {len(inputs)} gene identifiers "
+        f"from var[{input_col!r}]: {exc}. Check network connectivity and try again."
+    ) from exc
 mapping = df_genes["symbol"].dropna().to_dict()
 
 outputs = [mapping.get(i, i) for i in inputs]

From e8e002e1ff9e1b7262405a7c23f421254a1e2807 Mon Sep 17 00:00:00 2001
From: Nico Trummer <nictru32@gmail.com>
Date: Mon, 1 Jun 2026 21:22:56 +0200
Subject: [PATCH 17/19] Limit number of genes processed at a time in mygene

---
 .../local/adata/mygene/templates/mygene.py    | 27 +++----------------
 1 file changed, 4 insertions(+), 23 deletions(-)

diff --git a/modules/local/adata/mygene/templates/mygene.py b/modules/local/adata/mygene/templates/mygene.py
index ac7cbed9..e5d14385 100644
--- a/modules/local/adata/mygene/templates/mygene.py
+++ b/modules/local/adata/mygene/templates/mygene.py
@@ -6,7 +6,6 @@
 os.environ["NUMBA_CACHE_DIR"] = "./tmp/numba"
 
 import anndata as ad
-import httpx
 import mygene
 import yaml
 
@@ -22,34 +21,16 @@
 )
 
 mg = mygene.MyGeneInfo()
-try:
+mapping = {}
+for i in range(0, len(inputs), 500):
     df_genes = mg.querymany(
-        inputs,
+        inputs[i : i + 500],
         scopes=["symbol", "entrezgene", "ensemblgene"],
         fields="symbol",
         species="human",
         as_dataframe=True,
     )
-except httpx.HTTPStatusError as exc:
-    status = exc.response.status_code
-    if status >= 500:
-        raise RuntimeError(
-            f"mygene.info returned HTTP {status} (server error) while mapping "
-            f"{len(inputs)} gene identifiers from var[{input_col!r}]. "
-            "The mygene.info API is temporarily unavailable or overloaded — "
-            "this is not caused by your input data. Re-run this process; "
-            "if it keeps failing, check https://mygene.info or try again later."
-        ) from exc
-    raise RuntimeError(
-        f"mygene.info returned HTTP {status} while mapping "
-        f"{len(inputs)} gene identifiers from var[{input_col!r}]."
-    ) from exc
-except httpx.RequestError as exc:
-    raise RuntimeError(
-        f"Could not reach mygene.info while mapping {len(inputs)} gene identifiers "
-        f"from var[{input_col!r}]: {exc}. Check network connectivity and try again."
-    ) from exc
-mapping = df_genes["symbol"].dropna().to_dict()
+    mapping.update(df_genes["symbol"].dropna().to_dict())
 
 outputs = [mapping.get(i, i) for i in inputs]
 

From 8631fd441eb1c2ecae9af88003e657aa29ba0776 Mon Sep 17 00:00:00 2001
From: Nico Trummer <nictru32@gmail.com>
Date: Mon, 1 Jun 2026 21:44:20 +0200
Subject: [PATCH 18/19] Thread params.species through ADATA_MYGENE for
 MyGene.info lookups.

Pass species from the pipeline into unify and quality control so gene ID conversion respects the configured organism, and update tests and parameter docs accordingly.
---
 docs/usage.md                                 | 47 ++++++++++---------
 main.nf                                       |  3 ++
 modules/local/adata/mygene/main.nf            |  1 +
 .../local/adata/mygene/templates/mygene.py    |  2 +-
 modules/local/adata/mygene/tests/main.nf.test |  2 +
 nextflow_schema.json                          |  2 +-
 subworkflows/local/quality_control/main.nf    |  4 +-
 .../local/quality_control/tests/main.nf.test  |  5 ++
 subworkflows/local/unify/main.nf              |  4 +-
 subworkflows/local/unify/tests/main.nf.test   |  6 +++
 workflows/scdownstream.nf                     |  2 +
 11 files changed, 51 insertions(+), 27 deletions(-)

diff --git a/docs/usage.md b/docs/usage.md
index a7dcc04a..8447d758 100644
--- a/docs/usage.md
+++ b/docs/usage.md
@@ -51,29 +51,29 @@ sample3,/absolute/path/to/sample3_filtered.csv,/absolute/path/to/sample3.csv,,,,
 
 For CSV input files, specifying the `batch_col`, `label_col`, `condition_col`, and `unknown_label` columns will not have any effect, as no additional metadata is available in the CSV file.
 
-| Column                          | Description                                                                                                                                                                                                                                                                                                                                                                                                         |
-| ------------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
-| `sample`                        | Unique sample identifier. Will be added to the pipeline output objects as `sample` column.                                                                                                                                                                                                                                                                                                                          |
-| `filtered`                      | May contain paths to `h5ad`, `h5`, `rds`, or `csv` files. `rds` files may contain any object that can be converted to a `SingleCellExperiment` using the [Seurat `as.SingleCellExperiment`](https://satijalab.org/seurat/reference/as.singlecellexperiment) function. `csv` files should contain a matrix with genes as columns and cells as rows.                                                                  |
-| `unfiltered`                    | Same as `filtered`, but for the unfiltered cellranger or nf-core/scrnaseq output. If not provided, only `decontX` can be used for ambient RNA removal.                                                                                                                                                                                                                                                              |
-| `batch_col`                     | Column in the input file containing batch information. If not provided, the entire input object will be considered as one batch. If the `batch_col` is something else than `batch`, it will be renamed to `batch` during pipeline execution.                                                                                                                                                                        |
-| `symbol_col`                    | Column in the input file containing gene symbol information. Defaults to `index`. There are two special values that can be used: `index` and `none`. `index` will use the row names of the matrix as gene symbols. `none` will trigger the pipeline to perform gene symbol conversion using MyGene.info based on the `geneid_col`. The values from `symbol_col` will be set as the index during pipeline execution. |
-| `geneid_col`                    | Column in the input file containing gene identifier information. Defaults to `index`. Only used if `symbol_col` is set to `none`.                                                                                                                                                                                                                                                                                   |
-| `label_col`                     | Column in the input file containing cell type information. Defaults to `label`. If the column does not exist in the input object, the pipeline will create a new column and put `unknown` in it. If the `label_col` is something else than `label`, it will be renamed to `label` during pipeline execution.                                                                                                        |
-| `condition_col`                 | Column in the input file containing condition information (e.g. disease state, treatment). If the column does not exist in the input object, the pipeline will create a new column and put `unknown` in it. If the `condition_col` is something else than `condition`, it will be renamed to `condition` during pipeline execution.                                                                                 |
-| `unknown_label`                 | Value in the `label_col` column that should be considered as unknown. Defaults to `unknown`. If the `unknown_label` is something else than `unknown`, it will be renamed to `unknown` during pipeline execution. If trying to perform integration with scANVI, more than one unique label other than `unknown` must exist in the input data.                                                                        |
-| `counts_layer`                  | Layer in the input file containing the raw counts matrix. Defaults to `X`.                                                                                                                                                                                                                                                                                                                                          |
-| `min_genes`                     | Minimum number of genes required for a cell to be considered. Defaults to `1`.                                                                                                                                                                                                                                                                                                                                      |
-| `min_cells`                     | Minimum number of cells required for a gene to be considered. Defaults to `1`.                                                                                                                                                                                                                                                                                                                                      |
-| `min_counts_cell`               | Minimum number of counts required for a cell to be considered. Defaults to `1`.                                                                                                                                                                                                                                                                                                                                     |
-| `min_counts_gene`               | Minimum number of counts required for a gene to be considered. Defaults to `1`.                                                                                                                                                                                                                                                                                                                                     |
-| `expected_cells`                | Number of expected cells, used as input to CellBender for empty droplet detection.                                                                                                                                                                                                                                                                                                                                  |
-| `doublet_rate`                  | Optional expected doublet rate (0-1) for `scDblFinder`. If not provided, `scDblFinder` estimates it internally.                                                                                                                                                                                                                                                                                                     |
-| `max_mito_percentage`           | Maximum percentage of mitochondrial reads for a cell to be considered. Defaults to `100`.                                                                                                                                                                                                                                                                                                                           |
-| `min_ribo_percentage`           | Minimum percentage of ribosomal reads for a cell to be considered. Defaults to `0`.                                                                                                                                                                                                                                                                                                                                 |
-| `max_hb_percentage`             | Maximum percentage of haemoglobin reads for a cell to be considered. Defaults to `100`.                                                                                                                                                                                                                                                                                                                             |
-| `ambient_correction`            | Whether to perform ambient RNA correction for this sample. Set to `true` to use the globally configured method, `false` to skip ambient correction for this sample. Defaults to `true`.                                                                                                                                                                                                                             |
-| `ambient_corrected_integration` | Whether to use ambient-corrected counts for integration for this sample. Set to `true` to use corrected counts in downstream integration, `false` to store them only as additional layers. Can override the global `--ambient_corrected_integration` parameter. Defaults to global setting.                                                                                                                         |
+| Column                          | Description                                                                                                                                                                                                                                                                                                                                                                                                                                                |
+| ------------------------------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
+| `sample`                        | Unique sample identifier. Will be added to the pipeline output objects as `sample` column.                                                                                                                                                                                                                                                                                                                                                                 |
+| `filtered`                      | May contain paths to `h5ad`, `h5`, `rds`, or `csv` files. `rds` files may contain any object that can be converted to a `SingleCellExperiment` using the [Seurat `as.SingleCellExperiment`](https://satijalab.org/seurat/reference/as.singlecellexperiment) function. `csv` files should contain a matrix with genes as columns and cells as rows.                                                                                                         |
+| `unfiltered`                    | Same as `filtered`, but for the unfiltered cellranger or nf-core/scrnaseq output. If not provided, only `decontX` can be used for ambient RNA removal.                                                                                                                                                                                                                                                                                                     |
+| `batch_col`                     | Column in the input file containing batch information. If not provided, the entire input object will be considered as one batch. If the `batch_col` is something else than `batch`, it will be renamed to `batch` during pipeline execution.                                                                                                                                                                                                               |
+| `symbol_col`                    | Column in the input file containing gene symbol information. Defaults to `index`. There are two special values that can be used: `index` and `none`. `index` will use the row names of the matrix as gene symbols. `none` will trigger the pipeline to perform gene symbol conversion using MyGene.info based on the `geneid_col` and the pipeline `--species` parameter. The values from `symbol_col` will be set as the index during pipeline execution. |
+| `geneid_col`                    | Column in the input file containing gene identifier information. Defaults to `index`. Only used if `symbol_col` is set to `none`.                                                                                                                                                                                                                                                                                                                          |
+| `label_col`                     | Column in the input file containing cell type information. Defaults to `label`. If the column does not exist in the input object, the pipeline will create a new column and put `unknown` in it. If the `label_col` is something else than `label`, it will be renamed to `label` during pipeline execution.                                                                                                                                               |
+| `condition_col`                 | Column in the input file containing condition information (e.g. disease state, treatment). If the column does not exist in the input object, the pipeline will create a new column and put `unknown` in it. If the `condition_col` is something else than `condition`, it will be renamed to `condition` during pipeline execution.                                                                                                                        |
+| `unknown_label`                 | Value in the `label_col` column that should be considered as unknown. Defaults to `unknown`. If the `unknown_label` is something else than `unknown`, it will be renamed to `unknown` during pipeline execution. If trying to perform integration with scANVI, more than one unique label other than `unknown` must exist in the input data.                                                                                                               |
+| `counts_layer`                  | Layer in the input file containing the raw counts matrix. Defaults to `X`.                                                                                                                                                                                                                                                                                                                                                                                 |
+| `min_genes`                     | Minimum number of genes required for a cell to be considered. Defaults to `1`.                                                                                                                                                                                                                                                                                                                                                                             |
+| `min_cells`                     | Minimum number of cells required for a gene to be considered. Defaults to `1`.                                                                                                                                                                                                                                                                                                                                                                             |
+| `min_counts_cell`               | Minimum number of counts required for a cell to be considered. Defaults to `1`.                                                                                                                                                                                                                                                                                                                                                                            |
+| `min_counts_gene`               | Minimum number of counts required for a gene to be considered. Defaults to `1`.                                                                                                                                                                                                                                                                                                                                                                            |
+| `expected_cells`                | Number of expected cells, used as input to CellBender for empty droplet detection.                                                                                                                                                                                                                                                                                                                                                                         |
+| `doublet_rate`                  | Optional expected doublet rate (0-1) for `scDblFinder`. If not provided, `scDblFinder` estimates it internally.                                                                                                                                                                                                                                                                                                                                            |
+| `max_mito_percentage`           | Maximum percentage of mitochondrial reads for a cell to be considered. Defaults to `100`.                                                                                                                                                                                                                                                                                                                                                                  |
+| `min_ribo_percentage`           | Minimum percentage of ribosomal reads for a cell to be considered. Defaults to `0`.                                                                                                                                                                                                                                                                                                                                                                        |
+| `max_hb_percentage`             | Maximum percentage of haemoglobin reads for a cell to be considered. Defaults to `100`.                                                                                                                                                                                                                                                                                                                                                                    |
+| `ambient_correction`            | Whether to perform ambient RNA correction for this sample. Set to `true` to use the globally configured method, `false` to skip ambient correction for this sample. Defaults to `true`.                                                                                                                                                                                                                                                                    |
+| `ambient_corrected_integration` | Whether to use ambient-corrected counts for integration for this sample. Set to `true` to use corrected counts in downstream integration, `false` to store them only as additional layers. Can override the global `--ambient_corrected_integration` parameter. Defaults to global setting.                                                                                                                                                                |
 
 An [example samplesheet](../assets/samplesheet.csv) has been provided with the pipeline.
 
@@ -185,6 +185,7 @@ nextflow run nf-core/scdownstream --input samplesheet.csv --outdir results --cel
 #### Species
 
 Bundled gene lists are provided for human and mouse.
+`--species` also selects the MyGene.info taxonomy used when samples have `symbol_col: none` and gene identifiers are converted via MyGene.info.
 Select the appropriate species with `--species`:
 
 ```bash
diff --git a/main.nf b/main.nf
index 4f032b32..63a26ad8 100644
--- a/main.nf
+++ b/main.nf
@@ -46,6 +46,7 @@ workflow NFCORE_SCDOWNSTREAM {
     cell_cycle_scoring            //   value: boolean
     s_genes                       //    path: file or []
     g2m_genes                     //    path: file or []
+    species                       //   value: string
     qc_only                       //   value: boolean
     celldex_reference             //   value: string
     celltypist_model              //   value: string
@@ -103,6 +104,7 @@ workflow NFCORE_SCDOWNSTREAM {
         cell_cycle_scoring,
         s_genes,
         g2m_genes,
+        species,
         qc_only,
         celldex_reference,
         celltypist_model,
@@ -202,6 +204,7 @@ workflow {
         params.cell_cycle_scoring,
         s_genes_file,
         g2m_genes_file,
+        params.species,
         params.qc_only,
         params.celldex_reference,
         params.celltypist_model,
diff --git a/modules/local/adata/mygene/main.nf b/modules/local/adata/mygene/main.nf
index d80629a4..6ab5b022 100644
--- a/modules/local/adata/mygene/main.nf
+++ b/modules/local/adata/mygene/main.nf
@@ -9,6 +9,7 @@ process ADATA_MYGENE {
 
     input:
     tuple val(meta), path(h5ad)
+    val(species)
 
     output:
     tuple val(meta), path("*.h5ad"), emit: h5ad
diff --git a/modules/local/adata/mygene/templates/mygene.py b/modules/local/adata/mygene/templates/mygene.py
index e5d14385..56173426 100644
--- a/modules/local/adata/mygene/templates/mygene.py
+++ b/modules/local/adata/mygene/templates/mygene.py
@@ -27,7 +27,7 @@
         inputs[i : i + 500],
         scopes=["symbol", "entrezgene", "ensemblgene"],
         fields="symbol",
-        species="human",
+        species="${species}",
         as_dataframe=True,
     )
     mapping.update(df_genes["symbol"].dropna().to_dict())
diff --git a/modules/local/adata/mygene/tests/main.nf.test b/modules/local/adata/mygene/tests/main.nf.test
index 75cf7191..97ec0dc4 100644
--- a/modules/local/adata/mygene/tests/main.nf.test
+++ b/modules/local/adata/mygene/tests/main.nf.test
@@ -20,6 +20,7 @@ nextflow_process {
                         file(params.modules_testdata_base_path + 'genomics/homo_sapiens/scrnaseq/h5ad/SRR28679759_filtered_matrix.h5ad', checkIfExists: true)
                     ]
                 )
+                input[1] = 'human'
                 """
             }
         }
@@ -51,6 +52,7 @@ nextflow_process {
                         file(params.modules_testdata_base_path + 'genomics/homo_sapiens/scrnaseq/h5ad/SRR28679759_filtered_matrix.h5ad', checkIfExists: true)
                     ]
                 )
+                input[1] = 'human'
                 """
             }
         }
diff --git a/nextflow_schema.json b/nextflow_schema.json
index a180dba5..43ed239b 100644
--- a/nextflow_schema.json
+++ b/nextflow_schema.json
@@ -86,7 +86,7 @@
                 "species": {
                     "type": "string",
                     "default": "human",
-                    "description": "Species of the input data. Used to auto-select bundled cell cycle gene lists (assets/cell_cycle_genes/<species>_s_genes.txt and _g2m_genes.txt). Bundled lists are provided for 'human' and 'mouse'. Ignored when --s_genes and --g2m_genes are set explicitly."
+                    "description": "Species of the input data. Used to auto-select bundled cell cycle gene lists (assets/cell_cycle_genes/<species>_s_genes.txt and _g2m_genes.txt) and as the MyGene.info taxonomy when converting gene identifiers (samplesheet symbol_col: none). Bundled cell cycle lists are provided for 'human' and 'mouse'. Ignored when --s_genes and --g2m_genes are set explicitly."
                 },
                 "cell_cycle_scoring": {
                     "type": "boolean",
diff --git a/subworkflows/local/quality_control/main.nf b/subworkflows/local/quality_control/main.nf
index 0aced122..07d07524 100644
--- a/subworkflows/local/quality_control/main.nf
+++ b/subworkflows/local/quality_control/main.nf
@@ -31,6 +31,7 @@ workflow QUALITY_CONTROL {
     cell_cycle_scoring            //   value: boolean
     s_genes                       //    path: file or []
     g2m_genes                     //    path: file or []
+    species                       //   value: string
 
     main:
     ch_multiqc_files = channel.empty()
@@ -117,7 +118,8 @@ workflow QUALITY_CONTROL {
         ch_h5ad,
         unify_gene_symbols,
         duplicate_var_resolution,
-        aggregate_isoforms
+        aggregate_isoforms,
+        species
     )
     ch_multiqc_files = ch_multiqc_files.mix(UNIFY.out.multiqc_files)
     ch_h5ad = UNIFY.out.h5ad
diff --git a/subworkflows/local/quality_control/tests/main.nf.test b/subworkflows/local/quality_control/tests/main.nf.test
index d582d91a..914abd60 100644
--- a/subworkflows/local/quality_control/tests/main.nf.test
+++ b/subworkflows/local/quality_control/tests/main.nf.test
@@ -35,6 +35,7 @@ nextflow_workflow {
                 input[12] = false
                 input[13] = []
                 input[14] = []
+                input[15] = 'human'
                 """
             }
         }
@@ -79,6 +80,7 @@ nextflow_workflow {
                 input[12] = false
                 input[13] = []
                 input[14] = []
+                input[15] = 'human'
                 """
             }
         }
@@ -122,6 +124,7 @@ nextflow_workflow {
                 input[12] = false
                 input[13] = []
                 input[14] = []
+                input[15] = 'human'
                 """
             }
         }
@@ -178,6 +181,7 @@ nextflow_workflow {
                 input[12] = false
                 input[13] = []
                 input[14] = []
+                input[15] = 'human'
                 """
             }
         }
@@ -217,6 +221,7 @@ nextflow_workflow {
                 input[12] = true
                 input[13] = file("${projectDir}/assets/cell_cycle_genes/human_s_genes.txt")
                 input[14] = file("${projectDir}/assets/cell_cycle_genes/human_g2m_genes.txt")
+                input[15] = 'human'
                 """
             }
         }
diff --git a/subworkflows/local/unify/main.nf b/subworkflows/local/unify/main.nf
index 631c79a4..250a6b95 100644
--- a/subworkflows/local/unify/main.nf
+++ b/subworkflows/local/unify/main.nf
@@ -11,6 +11,7 @@ workflow UNIFY {
     unify_gene_symbols       //   value: boolean
     duplicate_var_resolution //   value: string
     aggregate_isoforms       //   value: boolean
+    species                  //   value: string
 
     main:
     ch_multiqc_files = channel.empty()
@@ -21,7 +22,8 @@ workflow UNIFY {
     }
 
     MYGENE (
-        ch_h5ad.needs_symbol_conversion
+        ch_h5ad.needs_symbol_conversion,
+        species
     )
     ch_h5ad = ch_h5ad.has_symbol_col.mix(
         MYGENE.out.h5ad.map { meta, h5ad -> [meta + [symbol_col: 'symbols'], h5ad] }
diff --git a/subworkflows/local/unify/tests/main.nf.test b/subworkflows/local/unify/tests/main.nf.test
index a5c5cf08..35a9eb14 100644
--- a/subworkflows/local/unify/tests/main.nf.test
+++ b/subworkflows/local/unify/tests/main.nf.test
@@ -25,6 +25,7 @@ nextflow_workflow {
                 input[1] = false
                 input[2] = 'sum'
                 input[3] = false
+                input[4] = 'human'
                 """
             }
         }
@@ -52,6 +53,7 @@ nextflow_workflow {
                 input[1] = false
                 input[2] = 'sum'
                 input[3] = false
+                input[4] = 'human'
                 """
             }
         }
@@ -93,6 +95,7 @@ nextflow_workflow {
                 input[1] = false
                 input[2] = 'sum'
                 input[3] = false
+                input[4] = 'human'
                 """
             }
         }
@@ -120,6 +123,7 @@ nextflow_workflow {
                 input[1] = false
                 input[2] = 'sum'
                 input[3] = false
+                input[4] = 'human'
                 """
             }
         }
@@ -161,6 +165,7 @@ nextflow_workflow {
                 input[1] = true
                 input[2] = 'sum'
                 input[3] = false
+                input[4] = 'human'
                 """
             }
         }
@@ -188,6 +193,7 @@ nextflow_workflow {
                 input[1] = true
                 input[2] = 'sum'
                 input[3] = false
+                input[4] = 'human'
                 """
             }
         }
diff --git a/workflows/scdownstream.nf b/workflows/scdownstream.nf
index 3c99deae..e8a7c0e2 100644
--- a/workflows/scdownstream.nf
+++ b/workflows/scdownstream.nf
@@ -44,6 +44,7 @@ workflow SCDOWNSTREAM {
     cell_cycle_scoring            //   value: boolean
     s_genes                       //    path: file or []
     g2m_genes                     //    path: file or []
+    species                       //   value: string
     qc_only                       //   value: boolean
     celldex_reference              //   value: string
     celltypist_model               //   value: string
@@ -129,6 +130,7 @@ workflow SCDOWNSTREAM {
             cell_cycle_scoring,
             s_genes,
             g2m_genes,
+            species,
         )
         ch_multiqc_files = ch_multiqc_files.mix(QUALITY_CONTROL.out.multiqc_files)
         ch_h5ad = QUALITY_CONTROL.out.h5ad

From cef4bcd725d7a3ca642682f74f4048cd938dec83 Mon Sep 17 00:00:00 2001
From: Nico Trummer <nictru32@gmail.com>
Date: Tue, 2 Jun 2026 08:04:58 +0200
Subject: [PATCH 19/19] Add analysis plan to extension test

---
 assets/schema_analysis_plan.json                     | 12 ++++++++----
 nf-test.config                                       |  1 +
 .../local/utils_nfcore_scdownstream_pipeline/main.nf |  1 +
 tests/analysis_plan_extension.csv                    |  2 ++
 tests/main_pipeline_extend.nf.test                   |  1 +
 tests/main_pipeline_extend.nf.test.snap              | 10 +---------
 6 files changed, 14 insertions(+), 13 deletions(-)
 create mode 100644 tests/analysis_plan_extension.csv

diff --git a/assets/schema_analysis_plan.json b/assets/schema_analysis_plan.json
index 92be8712..fdb44dbc 100644
--- a/assets/schema_analysis_plan.json
+++ b/assets/schema_analysis_plan.json
@@ -12,27 +12,31 @@
                 "pattern": "^\\S*$",
                 "default": null,
                 "errorMessage": "Integration name cannot contain spaces",
-                "description": "Integration method name, or empty to match all integrations"
+                "description": "Integration method name, or empty to match all integrations",
+                "meta": ["integration"]
             },
             "subset": {
                 "type": "string",
                 "pattern": "^\\S*$",
                 "default": null,
                 "errorMessage": "Subset cannot contain spaces",
-                "description": "Clustering subset (global or a label value), or empty to match all subsets"
+                "description": "Clustering subset (global or a label value), or empty to match all subsets",
+                "meta": ["subset"]
             },
             "resolution": {
                 "type": "number",
                 "minimum": 0,
                 "default": null,
-                "description": "Leiden resolution, or empty to match all resolutions"
+                "description": "Leiden resolution, or empty to match all resolutions",
+                "meta": ["resolution"]
             },
             "analyses": {
                 "type": "string",
                 "pattern": "^(|paga|liana|de|cytetype)(,(paga|liana|de|cytetype))*$",
                 "default": null,
                 "errorMessage": "Analyses must be a comma-separated list of paga, liana, de, and/or cytetype",
-                "description": "Downstream analyses to run for matching clusterings, or empty to run all analyses"
+                "description": "Downstream analyses to run for matching clusterings, or empty to run all analyses",
+                "meta": ["analyses"]
             }
         }
     }
diff --git a/nf-test.config b/nf-test.config
index 52203d1e..c56bd5d9 100644
--- a/nf-test.config
+++ b/nf-test.config
@@ -29,6 +29,7 @@ config {
         'nf-test.config',
         'tests/.nftignore',
         'tests/nextflow.config',
+        'tests/analysis_plan_extension.csv',
     ]
 
     // load the necessary plugins
diff --git a/subworkflows/local/utils_nfcore_scdownstream_pipeline/main.nf b/subworkflows/local/utils_nfcore_scdownstream_pipeline/main.nf
index 9e0cff62..4c98fc5c 100644
--- a/subworkflows/local/utils_nfcore_scdownstream_pipeline/main.nf
+++ b/subworkflows/local/utils_nfcore_scdownstream_pipeline/main.nf
@@ -165,6 +165,7 @@ workflow PIPELINE_COMPLETION {
 def analysisPlanToList() {
     params.analysis_plan
         ? samplesheetToList(params.analysis_plan, "${projectDir}/assets/schema_analysis_plan.json")
+            .collect { row -> row[0] }
         : [[integration: null, subset: null, resolution: null, analyses: null]]
 }
 
diff --git a/tests/analysis_plan_extension.csv b/tests/analysis_plan_extension.csv
new file mode 100644
index 00000000..dc39c197
--- /dev/null
+++ b/tests/analysis_plan_extension.csv
@@ -0,0 +1,2 @@
+integration,subset,resolution,analyses
+scvi,global,0.5,"paga,liana,de"
diff --git a/tests/main_pipeline_extend.nf.test b/tests/main_pipeline_extend.nf.test
index bc311d04..daa0dd57 100644
--- a/tests/main_pipeline_extend.nf.test
+++ b/tests/main_pipeline_extend.nf.test
@@ -18,6 +18,7 @@ nextflow_pipeline {
                 scvi_model          = pipelines_testdata_base_path + 'extension_base/model.pt'
                 symphony_reference   = pipelines_testdata_base_path + 'extension_base/symphony_reference.h5ad'
                 base_adata          = pipelines_testdata_base_path + 'extension_base/merged.h5ad'
+                analysis_plan       = "${projectDir}/tests/analysis_plan_extension.csv"
             }
         }
 
diff --git a/tests/main_pipeline_extend.nf.test.snap b/tests/main_pipeline_extend.nf.test.snap
index 400a7ed3..1a1be83b 100644
--- a/tests/main_pipeline_extend.nf.test.snap
+++ b/tests/main_pipeline_extend.nf.test.snap
@@ -184,18 +184,10 @@
                 "cluster_dimred/scvi",
                 "cluster_dimred/scvi/entropy",
                 "cluster_dimred/scvi/entropy/scvi-global-0.5_entropy.png",
-                "cluster_dimred/scvi/entropy/scvi-global-1.0_entropy.png",
                 "cluster_dimred/scvi/leiden",
                 "cluster_dimred/scvi/leiden/scvi-global-0.5_leiden.png",
-                "cluster_dimred/scvi/leiden/scvi-global-1.0_leiden.png",
                 "cluster_dimred/scvi/umap",
                 "cluster_dimred/symphony",
-                "cluster_dimred/symphony/entropy",
-                "cluster_dimred/symphony/entropy/symphony-global-0.5_entropy.png",
-                "cluster_dimred/symphony/entropy/symphony-global-1.0_entropy.png",
-                "cluster_dimred/symphony/leiden",
-                "cluster_dimred/symphony/leiden/symphony-global-0.5_leiden.png",
-                "cluster_dimred/symphony/leiden/symphony-global-1.0_leiden.png",
                 "cluster_dimred/symphony/umap",
                 "combine",
                 "combine/integrate",
@@ -243,7 +235,7 @@
                 "qc-report.qmd:md5,13061014a897b3fbdafd6ea3212df0e0"
             ]
         ],
-        "timestamp": "2026-05-29T14:41:14.623955124",
+        "timestamp": "2026-06-02T08:01:50.281977805",
         "meta": {
             "nf-test": "0.9.4",
             "nextflow": "26.04.0"