From d06bb20d6faa5c5ea734fb48b188538d931acb83 Mon Sep 17 00:00:00 2001 From: Nico Trummer Date: Tue, 19 May 2026 11:38:42 +0200 Subject: [PATCH 01/19] Add Symphony integration via symphonypy. Introduces SYMPHONY_INTEGRATE module and wires it into INTEGRATE as integration_methods symphony, with nf-test coverage and docs. Requires symphonypy 0.2.3 for harmonypy 0.2 compatibility (symphonypy#9). --- README.md | 1 + assets/multiqc_config.yml | 6 +- conf/modules.config | 9 +++ docs/output.md | 1 + docs/reproducibility.md | 35 ++++----- .../local/symphony/integrate/environment.yml | 9 +++ modules/local/symphony/integrate/main.nf | 34 +++++++++ .../symphony/integrate/templates/integrate.py | 61 +++++++++++++++ .../symphony/integrate/tests/main.nf.test | 70 +++++++++++++++++ .../integrate/tests/main.nf.test.snap | 41 ++++++++++ nextflow_schema.json | 6 +- subworkflows/local/integrate/main.nf | 12 +++ .../local/integrate/tests/main.nf.test | 75 +++++++++++++++++++ .../local/integrate/tests/main.nf.test.snap | 55 ++++++++++++++ 14 files changed, 393 insertions(+), 22 deletions(-) create mode 100644 modules/local/symphony/integrate/environment.yml create mode 100644 modules/local/symphony/integrate/main.nf create mode 100644 modules/local/symphony/integrate/templates/integrate.py create mode 100644 modules/local/symphony/integrate/tests/main.nf.test create mode 100644 modules/local/symphony/integrate/tests/main.nf.test.snap diff --git a/README.md b/README.md index e531a984..c620552c 100644 --- a/README.md +++ b/README.md @@ -59,6 +59,7 @@ Steps marked with the boat icon are not yet implemented. For the other steps, th - [scVI](https://docs.scvi-tools.org/en/stable/user_guide/models/scvi.html) - [scANVI](https://docs.scvi-tools.org/en/stable/user_guide/models/scanvi.html) - [Harmony](https://portals.broadinstitute.org/harmony/articles/quickstart.html) + - [Symphony](https://symphonypy.readthedocs.io/) (via [symphonypy](https://pypi.org/project/symphonypy/)) - [BBKNN](https://github.com/Teichlab/bbknn) - [Combat](https://scanpy.readthedocs.io/en/latest/api/generated/scanpy.pp.combat.html) - [Seurat](https://satijalab.org/seurat/articles/integration_introduction) diff --git a/assets/multiqc_config.yml b/assets/multiqc_config.yml index bf1be27f..1af23e69 100644 --- a/assets/multiqc_config.yml +++ b/assets/multiqc_config.yml @@ -20,10 +20,12 @@ report_section_order: order: -1006 "harmony": order: -1007 - "bbknn": + "symphony": order: -1008 - "combat": + "bbknn": order: -1009 + "combat": + order: -1010 # If new tools are add. They need to be added here "nf-core-scdownstream-methods-description": order: -2001 diff --git a/conf/modules.config b/conf/modules.config index 84748e8e..cf2d1c0a 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -388,6 +388,15 @@ process { ] } + withName: SYMPHONY_INTEGRATE { + publishDir = [ + path: { "${params.outdir}/combine/integrate/symphony" }, + mode: params.publish_dir_mode, + enabled: params.save_intermediates, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename }, + ] + } + withName: SCANPY_BBKNN { publishDir = [ path: { "${params.outdir}/combine/integrate/bbknn" }, diff --git a/docs/output.md b/docs/output.md index b35ab9d4..cc52a656 100644 --- a/docs/output.md +++ b/docs/output.md @@ -35,6 +35,7 @@ The pipeline is built using [Nextflow](https://www.nextflow.io/) and processes d - [scVI](https://docs.scvi-tools.org/en/stable/user_guide/models/scvi.html) - [scANVI](https://docs.scvi-tools.org/en/stable/user_guide/models/scanvi.html) - [Harmony](https://portals.broadinstitute.org/harmony/articles/quickstart.html) + - [Symphony](https://symphonypy.readthedocs.io/) (via [symphonypy](https://pypi.org/project/symphonypy/)) - [BBKNN](https://github.com/Teichlab/bbknn) - [Combat](https://scanpy.readthedocs.io/en/latest/api/generated/scanpy.pp.combat.html) - [Seurat](https://satijalab.org/seurat/articles/integration_introduction) diff --git a/docs/reproducibility.md b/docs/reproducibility.md index 2e2e4312..cba5a394 100644 --- a/docs/reproducibility.md +++ b/docs/reproducibility.md @@ -120,23 +120,24 @@ The **Test strategy (this branch)** column describes what the tests on this bran ### `scanpy/` -| Module | Description | Reproducibility | Test strategy (this branch) | -| ------------------------ | -------------------------------------------------------------------------------------------------------------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | ---------------------------------------------------------------------------------------------- | -| `scanpy/bbknn` | Constructs a batch-balanced k-nearest-neighbour graph (BBKNN) on a PCA embedding. | Fully deterministic — kNN construction is deterministic given the input embedding. | structural — versions + schema only | -| `scanpy/cellcycle` | Scores each cell for S-phase and G2M-phase activity and assigns a predicted cell cycle phase. | Fully deterministic | hash | -| `scanpy/combat` | Applies ComBat batch correction and then runs PCA, storing the result as `X_emb`. | Seeded / quasi-deterministic — ComBat is deterministic; downstream PCA floats may vary across LAPACK backends. | structural — versions + schema only | -| `scanpy/filter` | Filters cells and genes by count, gene, and mitochondrial percentage thresholds. | Fully deterministic | hash + structural — standard `hash` triple; multiple parameter scenarios | -| `scanpy/harmony` | Runs Harmony batch integration after log-normalisation and PCA, storing the corrected embedding as `X_emb`. | **Non-deterministic** — Harmony is an iterative optimisation with no fixed seed; upstream PCA is also unseeded. | structural — versions + schema only; `variance_ratio` output removed | -| `scanpy/hvgs` | Selects highly variable genes and subsets the AnnData to those genes. | Seeded / quasi-deterministic — HVG variance statistics rely on NumPy/SciPy floating-point operations that can produce slightly different results across library versions. | structural — versions + schema only | -| `scanpy/leiden` | Performs Leiden community-detection clustering at a specified resolution. | **Non-deterministic** — Leiden uses random restarts with no fixed seed. | structural — range assertion on cluster count + versions + schema | -| `scanpy/neighbors` | Computes a k-nearest-neighbour graph on a specified embedding. | Fully deterministic given a fixed input embedding. | structural — versions + schema only | -| `scanpy/paga` | Computes PAGA coarse-grained cluster connectivity and saves a graph and plot. | Fully deterministic — PAGA is a deterministic graph-summarisation step given fixed Leiden labels. | hash | -| `scanpy/pca` | Runs PCA with `random_state=0` and stores the result under a specified key. | Seeded / quasi-deterministic — seed is fixed, but float coordinates can differ across LAPACK/MKL backends. | structural — versions + schema only | -| `scanpy/plotqc` | Calculates QC metrics and produces a counts-vs-genes scatter plot for MultiQC. | Fully deterministic | hash (no H5AD output — PNG / MultiQC JSON + versions) | -| `scanpy/rankgenesgroups` | Runs differential gene expression (rank genes groups) across clusters using a configurable statistical method. | **Seeded / quasi-deterministic** — wilcoxon and t-test are deterministic in theory, but tied-rank handling and floating-point tie-breaking can differ across SciPy versions. | structural — versions + `adata.yaml`; one path with **empty h5ad** snapshots **versions only** | -| `scanpy/readh5` | Reads a 10x Genomics HDF5 (`.h5`) file and writes it as an AnnData H5AD. | Fully deterministic | hash | -| `scanpy/sample` | Down-samples cells to a fixed count or fraction using `rng=0`. | Seeded / quasi-deterministic — seed is fixed, but sampled cell set may vary across NumPy versions. | hash | -| `scanpy/umap` | Computes a UMAP embedding from a pre-built neighbour graph using `random_state=0`. | Seeded / quasi-deterministic — seed is fixed, but float coordinates vary across umap-learn/numba versions. | structural — versions + schema only | +| Module | Description | Reproducibility | Test strategy (this branch) | +| ------------------------ | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | --------------------------------------------------------------------------------------------------------------------- | +| `scanpy/bbknn` | Constructs a batch-balanced k-nearest-neighbour graph (BBKNN) on a PCA embedding. | Fully deterministic — kNN construction is deterministic given the input embedding. | structural — versions + schema only | +| `scanpy/cellcycle` | Scores each cell for S-phase and G2M-phase activity and assigns a predicted cell cycle phase. | Fully deterministic | hash | +| `scanpy/combat` | Applies ComBat batch correction and then runs PCA, storing the result as `X_emb`. | Seeded / quasi-deterministic — ComBat is deterministic; downstream PCA floats may vary across LAPACK backends. | structural — versions + schema only | +| `scanpy/filter` | Filters cells and genes by count, gene, and mitochondrial percentage thresholds. | Fully deterministic | hash + structural — standard `hash` triple; multiple parameter scenarios | +| `scanpy/harmony` | Runs Harmony batch integration after log-normalisation and PCA, storing the corrected embedding as `X_emb`. | **Non-deterministic** — Harmony is an iterative optimisation with no fixed seed; upstream PCA is also unseeded. | structural — versions + schema only; `variance_ratio` output removed | +| `symphony/integrate` | Runs Symphony batch integration via symphonypy `harmony_integrate` after log-normalisation and PCA, storing `X_pca_symphony`, `X_emb`, and `uns['symphony']`. Requires symphonypy ≥0.2.3 ([symphonypy#8](https://github.com/potulabe/symphonypy/issues/8), [symphonypy#9](https://github.com/potulabe/symphonypy/issues/9)). | **Non-deterministic** — same Harmony backend as `scanpy/harmony`; symphonypy passes `random_seed=1` but upstream PCA is unseeded. | structural — versions + schema only (nf-test blocked until symphonypy 0.2.3 is on PyPI and the Wave image is rebuilt) | +| `scanpy/hvgs` | Selects highly variable genes and subsets the AnnData to those genes. | Seeded / quasi-deterministic — HVG variance statistics rely on NumPy/SciPy floating-point operations that can produce slightly different results across library versions. | structural — versions + schema only | +| `scanpy/leiden` | Performs Leiden community-detection clustering at a specified resolution. | **Non-deterministic** — Leiden uses random restarts with no fixed seed. | structural — range assertion on cluster count + versions + schema | +| `scanpy/neighbors` | Computes a k-nearest-neighbour graph on a specified embedding. | Fully deterministic given a fixed input embedding. | structural — versions + schema only | +| `scanpy/paga` | Computes PAGA coarse-grained cluster connectivity and saves a graph and plot. | Fully deterministic — PAGA is a deterministic graph-summarisation step given fixed Leiden labels. | hash | +| `scanpy/pca` | Runs PCA with `random_state=0` and stores the result under a specified key. | Seeded / quasi-deterministic — seed is fixed, but float coordinates can differ across LAPACK/MKL backends. | structural — versions + schema only | +| `scanpy/plotqc` | Calculates QC metrics and produces a counts-vs-genes scatter plot for MultiQC. | Fully deterministic | hash (no H5AD output — PNG / MultiQC JSON + versions) | +| `scanpy/rankgenesgroups` | Runs differential gene expression (rank genes groups) across clusters using a configurable statistical method. | **Seeded / quasi-deterministic** — wilcoxon and t-test are deterministic in theory, but tied-rank handling and floating-point tie-breaking can differ across SciPy versions. | structural — versions + `adata.yaml`; one path with **empty h5ad** snapshots **versions only** | +| `scanpy/readh5` | Reads a 10x Genomics HDF5 (`.h5`) file and writes it as an AnnData H5AD. | Fully deterministic | hash | +| `scanpy/sample` | Down-samples cells to a fixed count or fraction using `rng=0`. | Seeded / quasi-deterministic — seed is fixed, but sampled cell set may vary across NumPy versions. | hash | +| `scanpy/umap` | Computes a UMAP embedding from a pre-built neighbour graph using `random_state=0`. | Seeded / quasi-deterministic — seed is fixed, but float coordinates vary across umap-learn/numba versions. | structural — versions + schema only | ### `scimilarity/` diff --git a/modules/local/symphony/integrate/environment.yml b/modules/local/symphony/integrate/environment.yml new file mode 100644 index 00000000..01ce15ca --- /dev/null +++ b/modules/local/symphony/integrate/environment.yml @@ -0,0 +1,9 @@ +channels: + - conda-forge +dependencies: + - conda-forge::python=3.13.12 + - conda-forge::pyyaml=6.0.3 + - conda-forge::scanpy=1.12.1 + - pip + - pip: + - symphonypy==0.2.3 diff --git a/modules/local/symphony/integrate/main.nf b/modules/local/symphony/integrate/main.nf new file mode 100644 index 00000000..9c008451 --- /dev/null +++ b/modules/local/symphony/integrate/main.nf @@ -0,0 +1,34 @@ +process SYMPHONY_INTEGRATE { + tag "${meta.id}" + label 'process_medium' + + conda "${moduleDir}/environment.yml" + container "${workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container + ? 'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/74/748cbcefde497c7024bda26cfe90aacbd7c3f3095084693157542a23de793fe2/data' + : 'community.wave.seqera.io/library/python_pyyaml_scanpy_pip_symphonypy:604f5d2dde5f37d9'}" + + input: + tuple val(meta), path(h5ad) + val(batch_col) + val(counts_layer) + + output: + tuple val(meta), path("${prefix}.h5ad"), emit: h5ad + path "X_${prefix}.pkl" , emit: obsm + path "versions.yml" , emit: versions, topic: versions + + script: + prefix = task.ext.prefix ?: "${meta.id}" + if ("${prefix}.h5ad" == "${h5ad}") { + error "Input and output names are the same, use \"task.ext.prefix\" to disambiguate!" + } + template('integrate.py') + + stub: + prefix = task.ext.prefix ?: "${meta.id}" + """ + touch ${prefix}.h5ad + touch X_${prefix}.pkl + touch versions.yml + """ +} diff --git a/modules/local/symphony/integrate/templates/integrate.py b/modules/local/symphony/integrate/templates/integrate.py new file mode 100644 index 00000000..edb393b5 --- /dev/null +++ b/modules/local/symphony/integrate/templates/integrate.py @@ -0,0 +1,61 @@ +#!/usr/bin/env python3 + +# Disable OpenMP CPU topology detection for MacOS compatibility +import os +os.environ["KMP_AFFINITY"] = "disabled" + +import platform +import yaml + +os.environ["MPLCONFIGDIR"] = "./tmp/mpl" +os.environ["NUMBA_CACHE_DIR"] = "./tmp/numba" + +import scanpy as sc +import symphonypy as sp +import pandas as pd + +from threadpoolctl import threadpool_limits +threadpool_limits(int("${task.cpus}")) + +adata = sc.read_h5ad("${h5ad}") + +prefix = "${prefix}" + +adata_processing = adata.copy() + +if "${counts_layer}" != "X": + adata_processing.X = adata.layers["${counts_layer}"] + +sc.pp.log1p(adata_processing) +sc.pp.pca(adata_processing) + +sp.pp.harmony_integrate( + adata_processing, + key="${batch_col}", + flavor="python", + ref_basis_source="X_pca", + ref_basis_adjusted="X_pca_symphony", +) + +adata.obsm["X_pca_symphony"] = adata_processing.obsm["X_pca_symphony"] +adata.obsm["X_emb"] = adata_processing.obsm["X_pca_symphony"] +adata.uns["symphony"] = adata_processing.uns["harmony"] + +adata.write_h5ad(f"{prefix}.h5ad") + +df = pd.DataFrame(adata.obsm["X_emb"], index=adata.obs_names) +df.to_pickle(f"X_{prefix}.pkl") + +# Versions + +versions = { + "${task.process}": { + "python": platform.python_version(), + "scanpy": sc.__version__, + "symphonypy": sp.__version__, + "pandas": pd.__version__, + } +} + +with open("versions.yml", "w") as f: + yaml.dump(versions, f) diff --git a/modules/local/symphony/integrate/tests/main.nf.test b/modules/local/symphony/integrate/tests/main.nf.test new file mode 100644 index 00000000..3fbc1e92 --- /dev/null +++ b/modules/local/symphony/integrate/tests/main.nf.test @@ -0,0 +1,70 @@ +nextflow_process { + + name "Test Process SYMPHONY_INTEGRATE" + script "modules/local/symphony/integrate/main.nf" + process "SYMPHONY_INTEGRATE" + + tag "modules" + tag "modules_local" + + test("Should run without failures") { + + when { + params { + outdir = "$outputDir" + } + process { + """ + input[0] = channel.of([ + [ id: 'test' ], + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/scrnaseq/h5ad/combined_filtered_matrix.h5ad', checkIfExists: true) + ] + ) + input[1] = "sample" + input[2] = "X" + """ + } + } + + then { + def adata = anndata(process.out.h5ad[0][1]) + assert process.success + assert "X_emb" in adata.obsm + assert "symphony" in adata.uns + assert snapshot( + path(process.out.versions[0]).yaml, + adata.yaml + ).match() + } + + } + + test("Should run without failures - stub") { + + options '-stub' + + when { + params { + outdir = "$outputDir" + } + process { + """ + input[0] = channel.of([ + [ id: 'test' ], + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/scrnaseq/h5ad/combined_filtered_matrix.h5ad', checkIfExists: true) + ] + ) + input[1] = "sample" + input[2] = "X" + """ + } + } + + then { + assert process.success + assert snapshot(process.out).match() + } + + } + +} diff --git a/modules/local/symphony/integrate/tests/main.nf.test.snap b/modules/local/symphony/integrate/tests/main.nf.test.snap new file mode 100644 index 00000000..64d6e427 --- /dev/null +++ b/modules/local/symphony/integrate/tests/main.nf.test.snap @@ -0,0 +1,41 @@ +{ + "Should run without failures - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "test.h5ad:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "1": [ + "X_test.pkl:md5,d41d8cd98f00b204e9800998ecf8427e" + ], + "2": [ + "versions.yml:md5,d41d8cd98f00b204e9800998ecf8427e" + ], + "h5ad": [ + [ + { + "id": "test" + }, + "test.h5ad:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "obsm": [ + "X_test.pkl:md5,d41d8cd98f00b204e9800998ecf8427e" + ], + "versions": [ + "versions.yml:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + } + ], + "timestamp": "2026-05-19T10:59:10.495670438", + "meta": { + "nf-test": "0.9.4", + "nextflow": "26.04.0" + } + } +} \ No newline at end of file diff --git a/nextflow_schema.json b/nextflow_schema.json index be1d18d2..bacbb557 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -152,8 +152,8 @@ "type": "string", "default": "scvi", "description": "Specify the tool to use for integration", - "help_text": "If you want to use multiple tools, separate them with a comma. Available methods are: scvi, scanvi, harmony, bbknn, combat, seurat, scimilarity, pca, expimap", - "pattern": "^((scvi|scanvi|harmony|bbknn|combat|seurat|scimilarity|pca|expimap)(,(scvi|scanvi|harmony|bbknn|combat|seurat|scimilarity|pca|expimap))*)?$" + "help_text": "If you want to use multiple tools, separate them with a comma. Available methods are: scvi, scanvi, harmony, symphony, bbknn, combat, seurat, scimilarity, pca, expimap", + "pattern": "^((scvi|scanvi|harmony|symphony|bbknn|combat|seurat|scimilarity|pca|expimap)(,(scvi|scanvi|harmony|symphony|bbknn|combat|seurat|scimilarity|pca|expimap))*)?$" }, "integration_hvgs": { "type": "integer", @@ -228,7 +228,7 @@ "type": "string", "description": "The keys in the obsm of the base AnnData object that contain the embeddings (without leading `X_`). Required if `input` is not provided - otherwise it is ignored.", "help_text": "If the `input` parameter is not provided (no new data to add), integration will not be performed. In order to be able to utilize existing integration results, you need to provide the keys in the obsm of the base AnnData object that contain the embeddings (without leading `X_`).", - "pattern": "^((scvi|scanvi|harmony|bbknn|combat|seurat)(,(scvi|scanvi|harmony|bbknn|combat|seurat))*)?$" + "pattern": "^((scvi|scanvi|harmony|symphony|bbknn|combat|seurat)(,(scvi|scanvi|harmony|symphony|bbknn|combat|seurat))*)?$" } } }, diff --git a/subworkflows/local/integrate/main.nf b/subworkflows/local/integrate/main.nf index ebcce693..e790bf96 100644 --- a/subworkflows/local/integrate/main.nf +++ b/subworkflows/local/integrate/main.nf @@ -3,6 +3,7 @@ include { SCANPY_FILTER } from '../../../modules/local/scanpy/filter' include { SCVITOOLS_SCVI } from '../../../modules/local/scvitools/scvi' include { SCVITOOLS_SCANVI } from '../../../modules/local/scvitools/scanvi' include { SCANPY_HARMONY } from '../../../modules/local/scanpy/harmony' +include { SYMPHONY_INTEGRATE } from '../../../modules/local/symphony/integrate' include { SCANPY_BBKNN } from '../../../modules/local/scanpy/bbknn' include { SCANPY_COMBAT } from '../../../modules/local/scanpy/combat' include { SCANPY_PCA } from '../../../modules/local/scanpy/pca' @@ -115,6 +116,17 @@ workflow INTEGRATE { ch_obsm = ch_obsm.mix(SCANPY_HARMONY.out.obsm) } + if (methods.contains('symphony')) { + SYMPHONY_INTEGRATE ( + ch_h5ad_hvg.map { _meta, h5ad -> [[id: 'symphony'], h5ad] }, + "batch", + "X" + ) + ch_versions = ch_versions.mix(SYMPHONY_INTEGRATE.out.versions) + ch_integrations = ch_integrations.mix(SYMPHONY_INTEGRATE.out.h5ad) + ch_obsm = ch_obsm.mix(SYMPHONY_INTEGRATE.out.obsm) + } + if (methods.contains('bbknn')) { SCANPY_BBKNN ( ch_h5ad_hvg.map { _meta, h5ad -> [[id: 'bbknn'], h5ad] }, diff --git a/subworkflows/local/integrate/tests/main.nf.test b/subworkflows/local/integrate/tests/main.nf.test index 9562659e..60cb1fd2 100644 --- a/subworkflows/local/integrate/tests/main.nf.test +++ b/subworkflows/local/integrate/tests/main.nf.test @@ -82,6 +82,81 @@ nextflow_workflow { } + test("Should run without failures - symphony - stub") { + + options '-stub' + + when { + params { + outdir = "$outputDir" + } + workflow { + """ + input[0] = channel.of([ + [id: 'test'], + file(params.pipelines_testdata_base_path + '/anndata-variations/batch_correct_name.h5ad', checkIfExists: true) + ]) + input[1] = false + input[2] = 2000 + input[3] = [] + input[4] = ['symphony'] + input[5] = null + input[6] = null + input[7] = [] + input[8] = [] + input[9] = null + input[10] = null + input[11] = 'condition' + """ + } + } + + then { + assert workflow.success + assert snapshot(workflow.out).match() + } + + } + + test("Should run without failures - symphony") { + + when { + params { + outdir = "$outputDir" + } + workflow { + """ + input[0] = channel.of([ + [id: 'test'], + file(params.pipelines_testdata_base_path + '/anndata-variations/batch_correct_name.h5ad', checkIfExists: true) + ]) + input[1] = false + input[2] = 2000 + input[3] = [] + input[4] = ['symphony'] + input[5] = null + input[6] = null + input[7] = [] + input[8] = [] + input[9] = null + input[10] = null + input[11] = 'condition' + """ + } + } + + then { + def adata = anndata(workflow.out.integrations[0][1]) + assert workflow.success + assert "X_emb" in adata.obsm + assert snapshot( + workflow.out.versions, + adata.yaml + ).match() + } + + } + test("Should run without failures - bbknn - stub") { options '-stub' diff --git a/subworkflows/local/integrate/tests/main.nf.test.snap b/subworkflows/local/integrate/tests/main.nf.test.snap index 4feacdc8..c3a97012 100644 --- a/subworkflows/local/integrate/tests/main.nf.test.snap +++ b/subworkflows/local/integrate/tests/main.nf.test.snap @@ -44,6 +44,61 @@ "nextflow": "26.04.0" } }, + "Should run without failures - symphony - stub": { + "content": [ + { + "0": [ + [ + { + "id": "symphony" + }, + "symphony.h5ad:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "1": [ + + ], + "2": [ + + ], + "3": [ + "X_symphony.pkl:md5,d41d8cd98f00b204e9800998ecf8427e" + ], + "4": [ + "versions.yml:md5,d41d8cd98f00b204e9800998ecf8427e", + "versions.yml:md5,d41d8cd98f00b204e9800998ecf8427e", + "versions.yml:md5,d41d8cd98f00b204e9800998ecf8427e" + ], + "integrations": [ + [ + { + "id": "symphony" + }, + "symphony.h5ad:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "obs": [ + + ], + "obsm": [ + "X_symphony.pkl:md5,d41d8cd98f00b204e9800998ecf8427e" + ], + "var": [ + + ], + "versions": [ + "versions.yml:md5,d41d8cd98f00b204e9800998ecf8427e", + "versions.yml:md5,d41d8cd98f00b204e9800998ecf8427e", + "versions.yml:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + } + ], + "timestamp": "2026-05-19T11:01:29.131789506", + "meta": { + "nf-test": "0.9.4", + "nextflow": "26.04.0" + } + }, "Should run without failures - bbknn - stub": { "content": [ { From 9177225030754ecfaed8e22f3575659768ace90f Mon Sep 17 00:00:00 2001 From: Nico Trummer Date: Tue, 26 May 2026 17:19:19 +0200 Subject: [PATCH 02/19] Fix Symphony module container build and version reporting. Update symphonypy to 0.2.4 with a rebuilt Wave image, and use importlib.metadata for package versions since symphonypy does not expose __version__. Co-authored-by: Cursor --- .../local/symphony/integrate/environment.yml | 5 +- modules/local/symphony/integrate/main.nf | 4 +- .../symphony/integrate/templates/integrate.py | 5 +- .../integrate/tests/main.nf.test.snap | 52 +++++++++++++++++++ 4 files changed, 60 insertions(+), 6 deletions(-) diff --git a/modules/local/symphony/integrate/environment.yml b/modules/local/symphony/integrate/environment.yml index 01ce15ca..1e4070eb 100644 --- a/modules/local/symphony/integrate/environment.yml +++ b/modules/local/symphony/integrate/environment.yml @@ -1,9 +1,10 @@ channels: - conda-forge + - bioconda dependencies: - - conda-forge::python=3.13.12 + - conda-forge::python=3.13.13 - conda-forge::pyyaml=6.0.3 - conda-forge::scanpy=1.12.1 - pip - pip: - - symphonypy==0.2.3 + - symphonypy==0.2.4 diff --git a/modules/local/symphony/integrate/main.nf b/modules/local/symphony/integrate/main.nf index 9c008451..7eb4898f 100644 --- a/modules/local/symphony/integrate/main.nf +++ b/modules/local/symphony/integrate/main.nf @@ -4,8 +4,8 @@ process SYMPHONY_INTEGRATE { conda "${moduleDir}/environment.yml" container "${workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container - ? 'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/74/748cbcefde497c7024bda26cfe90aacbd7c3f3095084693157542a23de793fe2/data' - : 'community.wave.seqera.io/library/python_pyyaml_scanpy_pip_symphonypy:604f5d2dde5f37d9'}" + ? 'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/51/512121548a21b4d1bb8acfd5e30a75c5c2103ddd00cf1de4713c682b7e6b5387/data' + : 'community.wave.seqera.io/library/python_pyyaml_scanpy_pip_symphonypy:2198c27c5c9392d5'}" input: tuple val(meta), path(h5ad) diff --git a/modules/local/symphony/integrate/templates/integrate.py b/modules/local/symphony/integrate/templates/integrate.py index edb393b5..32de73b5 100644 --- a/modules/local/symphony/integrate/templates/integrate.py +++ b/modules/local/symphony/integrate/templates/integrate.py @@ -4,6 +4,7 @@ import os os.environ["KMP_AFFINITY"] = "disabled" +import importlib.metadata import platform import yaml @@ -51,8 +52,8 @@ versions = { "${task.process}": { "python": platform.python_version(), - "scanpy": sc.__version__, - "symphonypy": sp.__version__, + "scanpy": importlib.metadata.version("scanpy"), + "symphonypy": importlib.metadata.version("symphonypy"), "pandas": pd.__version__, } } diff --git a/modules/local/symphony/integrate/tests/main.nf.test.snap b/modules/local/symphony/integrate/tests/main.nf.test.snap index 64d6e427..39b39257 100644 --- a/modules/local/symphony/integrate/tests/main.nf.test.snap +++ b/modules/local/symphony/integrate/tests/main.nf.test.snap @@ -37,5 +37,57 @@ "nf-test": "0.9.4", "nextflow": "26.04.0" } + }, + "Should run without failures": { + "content": [ + { + "SYMPHONY_INTEGRATE": { + "pandas": "2.3.3", + "python": "3.13.13", + "scanpy": "1.12.1", + "symphonypy": "0.2.4" + } + }, + { + "n_obs": 38234, + "n_vars": 9887, + "obs": { + "index": "_index", + "columns": [ + "sample" + ] + }, + "var": { + "index": "_index", + "columns": [ + + ] + }, + "layers": [ + + ], + "obsm": [ + "X_emb", + "X_pca_symphony" + ], + "varm": [ + + ], + "obsp": [ + + ], + "varp": [ + + ], + "uns": [ + "symphony" + ] + } + ], + "timestamp": "2026-05-26T17:19:00.719609", + "meta": { + "nf-test": "0.9.5", + "nextflow": "26.04.2" + } } } \ No newline at end of file From 1f038c6f82729d58b0b4cb6044093afcbb3bdbf8 Mon Sep 17 00:00:00 2001 From: Nico Trummer Date: Tue, 26 May 2026 17:38:01 +0200 Subject: [PATCH 03/19] Rename Symphony process to SYMPHONY_HARMONYINTEGRATE. Align the process name with symphonypy's harmony_integrate entry point and update references and nf-test snapshots. Co-authored-by: Cursor --- conf/modules.config | 2 +- modules/local/symphony/integrate/main.nf | 2 +- modules/local/symphony/integrate/tests/main.nf.test | 4 ++-- .../local/symphony/integrate/tests/main.nf.test.snap | 4 ++-- subworkflows/local/integrate/main.nf | 10 +++++----- 5 files changed, 11 insertions(+), 11 deletions(-) diff --git a/conf/modules.config b/conf/modules.config index cf2d1c0a..5d1e3ec8 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -388,7 +388,7 @@ process { ] } - withName: SYMPHONY_INTEGRATE { + withName: SYMPHONY_HARMONYINTEGRATE { publishDir = [ path: { "${params.outdir}/combine/integrate/symphony" }, mode: params.publish_dir_mode, diff --git a/modules/local/symphony/integrate/main.nf b/modules/local/symphony/integrate/main.nf index 7eb4898f..93a0e116 100644 --- a/modules/local/symphony/integrate/main.nf +++ b/modules/local/symphony/integrate/main.nf @@ -1,4 +1,4 @@ -process SYMPHONY_INTEGRATE { +process SYMPHONY_HARMONYINTEGRATE { tag "${meta.id}" label 'process_medium' diff --git a/modules/local/symphony/integrate/tests/main.nf.test b/modules/local/symphony/integrate/tests/main.nf.test index 3fbc1e92..ea5cd9d0 100644 --- a/modules/local/symphony/integrate/tests/main.nf.test +++ b/modules/local/symphony/integrate/tests/main.nf.test @@ -1,8 +1,8 @@ nextflow_process { - name "Test Process SYMPHONY_INTEGRATE" + name "Test Process SYMPHONY_HARMONYINTEGRATE" script "modules/local/symphony/integrate/main.nf" - process "SYMPHONY_INTEGRATE" + process "SYMPHONY_HARMONYINTEGRATE" tag "modules" tag "modules_local" diff --git a/modules/local/symphony/integrate/tests/main.nf.test.snap b/modules/local/symphony/integrate/tests/main.nf.test.snap index 39b39257..6b03b90a 100644 --- a/modules/local/symphony/integrate/tests/main.nf.test.snap +++ b/modules/local/symphony/integrate/tests/main.nf.test.snap @@ -41,7 +41,7 @@ "Should run without failures": { "content": [ { - "SYMPHONY_INTEGRATE": { + "SYMPHONY_HARMONYINTEGRATE": { "pandas": "2.3.3", "python": "3.13.13", "scanpy": "1.12.1", @@ -84,7 +84,7 @@ ] } ], - "timestamp": "2026-05-26T17:19:00.719609", + "timestamp": "2026-05-26T17:36:49.568823", "meta": { "nf-test": "0.9.5", "nextflow": "26.04.2" diff --git a/subworkflows/local/integrate/main.nf b/subworkflows/local/integrate/main.nf index e790bf96..b89d4f72 100644 --- a/subworkflows/local/integrate/main.nf +++ b/subworkflows/local/integrate/main.nf @@ -3,7 +3,7 @@ include { SCANPY_FILTER } from '../../../modules/local/scanpy/filter' include { SCVITOOLS_SCVI } from '../../../modules/local/scvitools/scvi' include { SCVITOOLS_SCANVI } from '../../../modules/local/scvitools/scanvi' include { SCANPY_HARMONY } from '../../../modules/local/scanpy/harmony' -include { SYMPHONY_INTEGRATE } from '../../../modules/local/symphony/integrate' +include { SYMPHONY_HARMONYINTEGRATE } from '../../../modules/local/symphony/integrate' include { SCANPY_BBKNN } from '../../../modules/local/scanpy/bbknn' include { SCANPY_COMBAT } from '../../../modules/local/scanpy/combat' include { SCANPY_PCA } from '../../../modules/local/scanpy/pca' @@ -117,14 +117,14 @@ workflow INTEGRATE { } if (methods.contains('symphony')) { - SYMPHONY_INTEGRATE ( + SYMPHONY_HARMONYINTEGRATE ( ch_h5ad_hvg.map { _meta, h5ad -> [[id: 'symphony'], h5ad] }, "batch", "X" ) - ch_versions = ch_versions.mix(SYMPHONY_INTEGRATE.out.versions) - ch_integrations = ch_integrations.mix(SYMPHONY_INTEGRATE.out.h5ad) - ch_obsm = ch_obsm.mix(SYMPHONY_INTEGRATE.out.obsm) + ch_versions = ch_versions.mix(SYMPHONY_HARMONYINTEGRATE.out.versions) + ch_integrations = ch_integrations.mix(SYMPHONY_HARMONYINTEGRATE.out.h5ad) + ch_obsm = ch_obsm.mix(SYMPHONY_HARMONYINTEGRATE.out.obsm) } if (methods.contains('bbknn')) { From 1808682450196a185cd253a984580a42c378269a Mon Sep 17 00:00:00 2001 From: Nico Trummer Date: Tue, 26 May 2026 19:52:28 +0200 Subject: [PATCH 04/19] Retire scanpy/harmony in favor of symphony/harmonyintegrate. Route the harmony integration method through SYMPHONY_HARMONYINTEGRATE, remove the duplicate symphony option, and delete the old scanpy/harmony module. Co-authored-by: Cursor --- README.md | 3 +- assets/multiqc_config.yml | 2 - conf/modules.config | 11 +- docs/output.md | 3 +- docs/reproducibility.md | 8 +- modules/local/scanpy/harmony/environment.yml | 8 - modules/local/scanpy/harmony/main.nf | 34 ---- .../local/scanpy/harmony/templates/harmony.py | 71 -------- .../local/scanpy/harmony/tests/main.nf.test | 69 ------- .../scanpy/harmony/tests/main.nf.test.snap | 92 ---------- .../environment.yml | 0 .../{integrate => harmonyintegrate}/main.nf | 10 +- .../templates/harmonyintegrate.py} | 0 .../tests/main.nf.test | 3 +- .../tests/main.nf.test.snap | 0 nextflow_schema.json | 6 +- subworkflows/local/integrate/main.nf | 56 +++--- .../local/integrate/tests/main.nf.test | 77 +------- .../local/integrate/tests/main.nf.test.snap | 168 ++++++++++-------- 19 files changed, 150 insertions(+), 471 deletions(-) delete mode 100644 modules/local/scanpy/harmony/environment.yml delete mode 100644 modules/local/scanpy/harmony/main.nf delete mode 100644 modules/local/scanpy/harmony/templates/harmony.py delete mode 100644 modules/local/scanpy/harmony/tests/main.nf.test delete mode 100644 modules/local/scanpy/harmony/tests/main.nf.test.snap rename modules/local/symphony/{integrate => harmonyintegrate}/environment.yml (100%) rename modules/local/symphony/{integrate => harmonyintegrate}/main.nf (72%) rename modules/local/symphony/{integrate/templates/integrate.py => harmonyintegrate/templates/harmonyintegrate.py} (100%) rename modules/local/symphony/{integrate => harmonyintegrate}/tests/main.nf.test (94%) rename modules/local/symphony/{integrate => harmonyintegrate}/tests/main.nf.test.snap (100%) diff --git a/README.md b/README.md index c620552c..8433a1fd 100644 --- a/README.md +++ b/README.md @@ -58,8 +58,7 @@ Steps marked with the boat icon are not yet implemented. For the other steps, th 3. Integration - [scVI](https://docs.scvi-tools.org/en/stable/user_guide/models/scvi.html) - [scANVI](https://docs.scvi-tools.org/en/stable/user_guide/models/scanvi.html) - - [Harmony](https://portals.broadinstitute.org/harmony/articles/quickstart.html) - - [Symphony](https://symphonypy.readthedocs.io/) (via [symphonypy](https://pypi.org/project/symphonypy/)) + - [Harmony](https://portals.broadinstitute.org/harmony/articles/quickstart.html) (via [symphonypy](https://pypi.org/project/symphonypy/)) - [BBKNN](https://github.com/Teichlab/bbknn) - [Combat](https://scanpy.readthedocs.io/en/latest/api/generated/scanpy.pp.combat.html) - [Seurat](https://satijalab.org/seurat/articles/integration_introduction) diff --git a/assets/multiqc_config.yml b/assets/multiqc_config.yml index 1af23e69..945264e4 100644 --- a/assets/multiqc_config.yml +++ b/assets/multiqc_config.yml @@ -20,8 +20,6 @@ report_section_order: order: -1006 "harmony": order: -1007 - "symphony": - order: -1008 "bbknn": order: -1009 "combat": diff --git a/conf/modules.config b/conf/modules.config index 5d1e3ec8..41dacb79 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -379,18 +379,9 @@ process { ] } - withName: SCANPY_HARMONY { - publishDir = [ - path: { "${params.outdir}/combine/integrate/harmony" }, - mode: params.publish_dir_mode, - enabled: params.save_intermediates, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename }, - ] - } - withName: SYMPHONY_HARMONYINTEGRATE { publishDir = [ - path: { "${params.outdir}/combine/integrate/symphony" }, + path: { "${params.outdir}/combine/integrate/harmony" }, mode: params.publish_dir_mode, enabled: params.save_intermediates, saveAs: { filename -> filename.equals('versions.yml') ? null : filename }, diff --git a/docs/output.md b/docs/output.md index cc52a656..ca948532 100644 --- a/docs/output.md +++ b/docs/output.md @@ -34,8 +34,7 @@ The pipeline is built using [Nextflow](https://www.nextflow.io/) and processes d 3. Integration - [scVI](https://docs.scvi-tools.org/en/stable/user_guide/models/scvi.html) - [scANVI](https://docs.scvi-tools.org/en/stable/user_guide/models/scanvi.html) - - [Harmony](https://portals.broadinstitute.org/harmony/articles/quickstart.html) - - [Symphony](https://symphonypy.readthedocs.io/) (via [symphonypy](https://pypi.org/project/symphonypy/)) + - [Harmony](https://portals.broadinstitute.org/harmony/articles/quickstart.html) (via [symphonypy](https://pypi.org/project/symphonypy/)) - [BBKNN](https://github.com/Teichlab/bbknn) - [Combat](https://scanpy.readthedocs.io/en/latest/api/generated/scanpy.pp.combat.html) - [Seurat](https://satijalab.org/seurat/articles/integration_introduction) diff --git a/docs/reproducibility.md b/docs/reproducibility.md index cba5a394..814322dc 100644 --- a/docs/reproducibility.md +++ b/docs/reproducibility.md @@ -126,8 +126,6 @@ The **Test strategy (this branch)** column describes what the tests on this bran | `scanpy/cellcycle` | Scores each cell for S-phase and G2M-phase activity and assigns a predicted cell cycle phase. | Fully deterministic | hash | | `scanpy/combat` | Applies ComBat batch correction and then runs PCA, storing the result as `X_emb`. | Seeded / quasi-deterministic — ComBat is deterministic; downstream PCA floats may vary across LAPACK backends. | structural — versions + schema only | | `scanpy/filter` | Filters cells and genes by count, gene, and mitochondrial percentage thresholds. | Fully deterministic | hash + structural — standard `hash` triple; multiple parameter scenarios | -| `scanpy/harmony` | Runs Harmony batch integration after log-normalisation and PCA, storing the corrected embedding as `X_emb`. | **Non-deterministic** — Harmony is an iterative optimisation with no fixed seed; upstream PCA is also unseeded. | structural — versions + schema only; `variance_ratio` output removed | -| `symphony/integrate` | Runs Symphony batch integration via symphonypy `harmony_integrate` after log-normalisation and PCA, storing `X_pca_symphony`, `X_emb`, and `uns['symphony']`. Requires symphonypy ≥0.2.3 ([symphonypy#8](https://github.com/potulabe/symphonypy/issues/8), [symphonypy#9](https://github.com/potulabe/symphonypy/issues/9)). | **Non-deterministic** — same Harmony backend as `scanpy/harmony`; symphonypy passes `random_seed=1` but upstream PCA is unseeded. | structural — versions + schema only (nf-test blocked until symphonypy 0.2.3 is on PyPI and the Wave image is rebuilt) | | `scanpy/hvgs` | Selects highly variable genes and subsets the AnnData to those genes. | Seeded / quasi-deterministic — HVG variance statistics rely on NumPy/SciPy floating-point operations that can produce slightly different results across library versions. | structural — versions + schema only | | `scanpy/leiden` | Performs Leiden community-detection clustering at a specified resolution. | **Non-deterministic** — Leiden uses random restarts with no fixed seed. | structural — range assertion on cluster count + versions + schema | | `scanpy/neighbors` | Computes a k-nearest-neighbour graph on a specified embedding. | Fully deterministic given a fixed input embedding. | structural — versions + schema only | @@ -139,6 +137,12 @@ The **Test strategy (this branch)** column describes what the tests on this bran | `scanpy/sample` | Down-samples cells to a fixed count or fraction using `rng=0`. | Seeded / quasi-deterministic — seed is fixed, but sampled cell set may vary across NumPy versions. | hash | | `scanpy/umap` | Computes a UMAP embedding from a pre-built neighbour graph using `random_state=0`. | Seeded / quasi-deterministic — seed is fixed, but float coordinates vary across umap-learn/numba versions. | structural — versions + schema only | +### `symphony/` + +| Module | Description | Reproducibility | Test strategy (this branch) | +| ------------------------------ | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------- | ----------------------------------- | +| `symphony/harmonyintegrate` | Runs Harmony batch integration via symphonypy `harmony_integrate` after log-normalisation and PCA, storing `X_pca_symphony`, `X_emb`, and `uns['symphony']`. Requires symphonypy ≥0.2.3 ([symphonypy#8](https://github.com/potulabe/symphonypy/issues/8), [symphonypy#9](https://github.com/potulabe/symphonypy/issues/9)). | **Non-deterministic** — Harmony is an iterative optimisation; symphonypy passes `random_seed=1` but upstream PCA is unseeded. | structural — versions + schema only | + ### `scimilarity/` | Module | Description | Reproducibility | Test strategy (this branch) | diff --git a/modules/local/scanpy/harmony/environment.yml b/modules/local/scanpy/harmony/environment.yml deleted file mode 100644 index c048fcad..00000000 --- a/modules/local/scanpy/harmony/environment.yml +++ /dev/null @@ -1,8 +0,0 @@ -channels: - - conda-forge - - bioconda -dependencies: - - conda-forge::python=3.13.12 - - bioconda::harmonypy=0.2.0 - - conda-forge::pyyaml=6.0.3 - - conda-forge::scanpy=1.12 diff --git a/modules/local/scanpy/harmony/main.nf b/modules/local/scanpy/harmony/main.nf deleted file mode 100644 index 315d1553..00000000 --- a/modules/local/scanpy/harmony/main.nf +++ /dev/null @@ -1,34 +0,0 @@ -process SCANPY_HARMONY { - tag "${meta.id}" - label 'process_medium' - - conda "${moduleDir}/environment.yml" - container "${workflow.containerEngine in ['singularity', 'apptainer'] && !task.ext.singularity_pull_docker_container - ? 'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/45/45339bf761a2cf0cdb058492bc37f3df8b05b363731d491d1d3a14e9ba0b8f55/data' - : 'community.wave.seqera.io/library/harmonypy_anndata_leidenalg_numpy_pruned:43066d5f86f18261'}" - - input: - tuple val(meta), path(h5ad) - val(batch_col) - val(counts_layer) - - output: - tuple val(meta), path("${prefix}.h5ad"), emit: h5ad - path "X_${prefix}.pkl" , emit: obsm - path "versions.yml" , emit: versions, topic: versions - - script: - prefix = task.ext.prefix ?: "${meta.id}" - if ("${prefix}.h5ad" == "${h5ad}") { - error "Input and output names are the same, use \"task.ext.prefix\" to disambiguate!" - } - template('harmony.py') - - stub: - prefix = task.ext.prefix ?: "${meta.id}" - """ - touch ${prefix}.h5ad - touch X_${prefix}.pkl - touch versions.yml - """ -} diff --git a/modules/local/scanpy/harmony/templates/harmony.py b/modules/local/scanpy/harmony/templates/harmony.py deleted file mode 100644 index 61b307ef..00000000 --- a/modules/local/scanpy/harmony/templates/harmony.py +++ /dev/null @@ -1,71 +0,0 @@ -#!/usr/bin/env python3 - -# Disable OpenMP CPU topology detection for MacOS compatibility -import os -os.environ["KMP_AFFINITY"] = "disabled" - -import platform -import yaml - -os.environ["MPLCONFIGDIR"] = "./tmp/mpl" -os.environ["NUMBA_CACHE_DIR"] = "./tmp/numba" - -import harmonypy -import scanpy as sc -import pandas as pd - -from threadpoolctl import threadpool_limits -threadpool_limits(int("${task.cpus}")) - -adata = sc.read_h5ad("${h5ad}") - -prefix = "${prefix}" - -adata_processing = adata.copy() - -if "${counts_layer}" != "X": - adata_processing.X = adata.layers["${counts_layer}"] - -sc.pp.log1p(adata_processing) -sc.pp.pca(adata_processing) - -harmony_out = harmonypy.run_harmony( - adata_processing.obsm["X_pca"].astype("float64"), - adata_processing.obs, - "${batch_col}", -) - -emb = harmony_out.Z_corr - -# harmonypy 0.2.0 changed Z_corr orientation; accept either layout. -# See https://github.com/potulabe/symphonypy/issues/8 -if emb.shape == adata_processing.obsm["X_pca"].shape: - adata_processing.obsm["X_emb"] = emb -elif emb.T.shape == adata_processing.obsm["X_pca"].shape: - adata_processing.obsm["X_emb"] = emb.T -else: - raise ValueError( - f"Unexpected Harmony embedding shape {emb.shape}; " - f"expected {adata_processing.obsm['X_pca'].shape} or its transpose." - ) - -adata.obsm["X_emb"] = adata_processing.obsm["X_emb"] - -adata.write_h5ad(f"{prefix}.h5ad") - -df = pd.DataFrame(adata.obsm["X_emb"], index=adata.obs_names) -df.to_pickle(f"X_{prefix}.pkl") - -# Versions - -versions = { - "${task.process}": { - "python": platform.python_version(), - "scanpy": sc.__version__, - "harmonypy": harmonypy.__version__, - "pandas": pd.__version__ - } -} - -with open("versions.yml", "w") as f: - yaml.dump(versions, f) diff --git a/modules/local/scanpy/harmony/tests/main.nf.test b/modules/local/scanpy/harmony/tests/main.nf.test deleted file mode 100644 index 8c6b2478..00000000 --- a/modules/local/scanpy/harmony/tests/main.nf.test +++ /dev/null @@ -1,69 +0,0 @@ -nextflow_process { - - name "Test Process SCANPY_HARMONY" - script "modules/local/scanpy/harmony/main.nf" - process "SCANPY_HARMONY" - - tag "modules" - tag "modules_local" - - test("Should run without failures") { - - when { - params { - outdir = "$outputDir" - } - process { - """ - input[0] = channel.of([ - [ id: 'test' ], - file(params.modules_testdata_base_path + 'genomics/homo_sapiens/scrnaseq/h5ad/combined_filtered_matrix.h5ad', checkIfExists: true) - ] - ) - input[1] = "sample" - input[2] = "X" - """ - } - } - - then { - def adata = anndata(process.out.h5ad[0][1]) - assert process.success - assert "X_emb" in adata.obsm - assert snapshot( - path(process.out.versions[0]).yaml, - adata.yaml - ).match() - } - - } - - test("Should run without failures - stub") { - - options '-stub' - - when { - params { - outdir = "$outputDir" - } - process { - """ - input[0] = channel.of([ - [ id: 'test' ], - file(params.modules_testdata_base_path + 'genomics/homo_sapiens/scrnaseq/h5ad/combined_filtered_matrix.h5ad', checkIfExists: true) - ] - ) - input[1] = "sample" - input[2] = "X" - """ - } - } - - then { - assert process.success - assert snapshot(process.out).match() - } - - } - -} diff --git a/modules/local/scanpy/harmony/tests/main.nf.test.snap b/modules/local/scanpy/harmony/tests/main.nf.test.snap deleted file mode 100644 index 4a71e69d..00000000 --- a/modules/local/scanpy/harmony/tests/main.nf.test.snap +++ /dev/null @@ -1,92 +0,0 @@ -{ - "Should run without failures - stub": { - "content": [ - { - "0": [ - [ - { - "id": "test" - }, - "test.h5ad:md5,d41d8cd98f00b204e9800998ecf8427e" - ] - ], - "1": [ - "X_test.pkl:md5,d41d8cd98f00b204e9800998ecf8427e" - ], - "2": [ - "versions.yml:md5,d41d8cd98f00b204e9800998ecf8427e" - ], - "h5ad": [ - [ - { - "id": "test" - }, - "test.h5ad:md5,d41d8cd98f00b204e9800998ecf8427e" - ] - ], - "obsm": [ - "X_test.pkl:md5,d41d8cd98f00b204e9800998ecf8427e" - ], - "versions": [ - "versions.yml:md5,d41d8cd98f00b204e9800998ecf8427e" - ] - } - ], - "timestamp": "2026-03-22T10:56:43.269700775", - "meta": { - "nf-test": "0.9.4", - "nextflow": "25.10.2" - } - }, - "Should run without failures": { - "content": [ - { - "SCANPY_HARMONY": { - "harmonypy": "0.2.0", - "pandas": "2.3.3", - "python": "3.13.12", - "scanpy": "1.12" - } - }, - { - "n_obs": 38234, - "n_vars": 9887, - "obs": { - "index": "_index", - "columns": [ - "sample" - ] - }, - "var": { - "index": "_index", - "columns": [ - - ] - }, - "layers": [ - - ], - "obsm": [ - "X_emb" - ], - "varm": [ - - ], - "obsp": [ - - ], - "varp": [ - - ], - "uns": [ - - ] - } - ], - "timestamp": "2026-03-29T11:17:47.094134151", - "meta": { - "nf-test": "0.9.4", - "nextflow": "25.10.2" - } - } -} diff --git a/modules/local/symphony/integrate/environment.yml b/modules/local/symphony/harmonyintegrate/environment.yml similarity index 100% rename from modules/local/symphony/integrate/environment.yml rename to modules/local/symphony/harmonyintegrate/environment.yml diff --git a/modules/local/symphony/integrate/main.nf b/modules/local/symphony/harmonyintegrate/main.nf similarity index 72% rename from modules/local/symphony/integrate/main.nf rename to modules/local/symphony/harmonyintegrate/main.nf index 93a0e116..9a109b6a 100644 --- a/modules/local/symphony/integrate/main.nf +++ b/modules/local/symphony/harmonyintegrate/main.nf @@ -13,21 +13,23 @@ process SYMPHONY_HARMONYINTEGRATE { val(counts_layer) output: - tuple val(meta), path("${prefix}.h5ad"), emit: h5ad - path "X_${prefix}.pkl" , emit: obsm - path "versions.yml" , emit: versions, topic: versions + tuple val(meta), path("${prefix}.h5ad") , emit: h5ad + tuple val(meta), path("${prefix}_reference.h5ad"), emit: reference + path "X_${prefix}.pkl" , emit: obsm + path "versions.yml" , emit: versions, topic: versions script: prefix = task.ext.prefix ?: "${meta.id}" if ("${prefix}.h5ad" == "${h5ad}") { error "Input and output names are the same, use \"task.ext.prefix\" to disambiguate!" } - template('integrate.py') + template('harmonyintegrate.py') stub: prefix = task.ext.prefix ?: "${meta.id}" """ touch ${prefix}.h5ad + touch ${prefix}_reference.h5ad touch X_${prefix}.pkl touch versions.yml """ diff --git a/modules/local/symphony/integrate/templates/integrate.py b/modules/local/symphony/harmonyintegrate/templates/harmonyintegrate.py similarity index 100% rename from modules/local/symphony/integrate/templates/integrate.py rename to modules/local/symphony/harmonyintegrate/templates/harmonyintegrate.py diff --git a/modules/local/symphony/integrate/tests/main.nf.test b/modules/local/symphony/harmonyintegrate/tests/main.nf.test similarity index 94% rename from modules/local/symphony/integrate/tests/main.nf.test rename to modules/local/symphony/harmonyintegrate/tests/main.nf.test index ea5cd9d0..52dc354e 100644 --- a/modules/local/symphony/integrate/tests/main.nf.test +++ b/modules/local/symphony/harmonyintegrate/tests/main.nf.test @@ -1,7 +1,7 @@ nextflow_process { name "Test Process SYMPHONY_HARMONYINTEGRATE" - script "modules/local/symphony/integrate/main.nf" + script "modules/local/symphony/harmonyintegrate/main.nf" process "SYMPHONY_HARMONYINTEGRATE" tag "modules" @@ -30,6 +30,7 @@ nextflow_process { def adata = anndata(process.out.h5ad[0][1]) assert process.success assert "X_emb" in adata.obsm + assert "X_pca_symphony" in adata.obsm assert "symphony" in adata.uns assert snapshot( path(process.out.versions[0]).yaml, diff --git a/modules/local/symphony/integrate/tests/main.nf.test.snap b/modules/local/symphony/harmonyintegrate/tests/main.nf.test.snap similarity index 100% rename from modules/local/symphony/integrate/tests/main.nf.test.snap rename to modules/local/symphony/harmonyintegrate/tests/main.nf.test.snap diff --git a/nextflow_schema.json b/nextflow_schema.json index bacbb557..be1d18d2 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -152,8 +152,8 @@ "type": "string", "default": "scvi", "description": "Specify the tool to use for integration", - "help_text": "If you want to use multiple tools, separate them with a comma. Available methods are: scvi, scanvi, harmony, symphony, bbknn, combat, seurat, scimilarity, pca, expimap", - "pattern": "^((scvi|scanvi|harmony|symphony|bbknn|combat|seurat|scimilarity|pca|expimap)(,(scvi|scanvi|harmony|symphony|bbknn|combat|seurat|scimilarity|pca|expimap))*)?$" + "help_text": "If you want to use multiple tools, separate them with a comma. Available methods are: scvi, scanvi, harmony, bbknn, combat, seurat, scimilarity, pca, expimap", + "pattern": "^((scvi|scanvi|harmony|bbknn|combat|seurat|scimilarity|pca|expimap)(,(scvi|scanvi|harmony|bbknn|combat|seurat|scimilarity|pca|expimap))*)?$" }, "integration_hvgs": { "type": "integer", @@ -228,7 +228,7 @@ "type": "string", "description": "The keys in the obsm of the base AnnData object that contain the embeddings (without leading `X_`). Required if `input` is not provided - otherwise it is ignored.", "help_text": "If the `input` parameter is not provided (no new data to add), integration will not be performed. In order to be able to utilize existing integration results, you need to provide the keys in the obsm of the base AnnData object that contain the embeddings (without leading `X_`).", - "pattern": "^((scvi|scanvi|harmony|symphony|bbknn|combat|seurat)(,(scvi|scanvi|harmony|symphony|bbknn|combat|seurat))*)?$" + "pattern": "^((scvi|scanvi|harmony|bbknn|combat|seurat)(,(scvi|scanvi|harmony|bbknn|combat|seurat))*)?$" } } }, diff --git a/subworkflows/local/integrate/main.nf b/subworkflows/local/integrate/main.nf index b89d4f72..7af3e357 100644 --- a/subworkflows/local/integrate/main.nf +++ b/subworkflows/local/integrate/main.nf @@ -2,8 +2,8 @@ include { SCANPY_HVGS } from '../../../modules/local/scanpy/hvgs' include { SCANPY_FILTER } from '../../../modules/local/scanpy/filter' include { SCVITOOLS_SCVI } from '../../../modules/local/scvitools/scvi' include { SCVITOOLS_SCANVI } from '../../../modules/local/scvitools/scanvi' -include { SCANPY_HARMONY } from '../../../modules/local/scanpy/harmony' -include { SYMPHONY_HARMONYINTEGRATE } from '../../../modules/local/symphony/integrate' +include { SYMPHONY_HARMONYINTEGRATE } from '../../../modules/local/symphony/harmonyintegrate' +include { SYMPHONY_MAPEMBEDDING } from '../../../modules/local/symphony/mapembedding' include { SCANPY_BBKNN } from '../../../modules/local/scanpy/bbknn' include { SCANPY_COMBAT } from '../../../modules/local/scanpy/combat' include { SCANPY_PCA } from '../../../modules/local/scanpy/pca' @@ -24,10 +24,12 @@ workflow INTEGRATE { scvi_categorical_covariates // list of string scvi_continuous_covariates // list of string scimilarity_model // path + harmony_reference // path expimap_gmt // path condition_col // string main: + ch_versions = channel.empty() ch_obs = channel.empty() ch_var = channel.empty() ch_obsm = channel.empty() @@ -41,6 +43,7 @@ workflow INTEGRATE { n_hvgs, excluded_genes ) + ch_versions = ch_versions.mix(SCANPY_HVGS.out.versions) ch_h5ad_hvg = SCANPY_HVGS.out.h5ad // See issue 215 @@ -60,6 +63,7 @@ workflow INTEGRATE { [] ) ch_h5ad_hvg = SCANPY_FILTER.out.h5ad + ch_versions = ch_versions.mix(SCANPY_FILTER.out.versions) } else { ch_h5ad_hvg = ch_h5ad @@ -69,6 +73,7 @@ workflow INTEGRATE { SEURAT_INTEGRATION ( ch_h5ad_hvg.map { _meta, h5ad -> [[id: 'seurat'], h5ad] }, "batch" ) + ch_versions = ch_versions.mix(SEURAT_INTEGRATION.out.versions) ch_integrations = ch_integrations.mix(SEURAT_INTEGRATION.out.h5ad) } @@ -83,6 +88,7 @@ workflow INTEGRATE { scvi_categorical_covariates, scvi_continuous_covariates, ) + ch_versions = ch_versions.mix(SCVITOOLS_SCVI.out.versions) ch_integrations = ch_integrations.mix(SCVITOOLS_SCVI.out.h5ad) ch_obsm = ch_obsm.mix(SCVITOOLS_SCVI.out.obsm) } @@ -101,30 +107,34 @@ workflow INTEGRATE { scvi_categorical_covariates, scvi_continuous_covariates, ) + ch_versions = ch_versions.mix(SCVITOOLS_SCANVI.out.versions) ch_integrations = ch_integrations.mix(SCVITOOLS_SCANVI.out.h5ad) ch_obs = ch_obs.mix(SCVITOOLS_SCANVI.out.obs) ch_obsm = ch_obsm.mix(SCVITOOLS_SCANVI.out.obsm) } if (methods.contains('harmony')) { - SCANPY_HARMONY ( - ch_h5ad_hvg.map { _meta, h5ad -> [[id: 'harmony'], h5ad] }, - "batch", - "X" - ) - ch_integrations = ch_integrations.mix(SCANPY_HARMONY.out.h5ad) - ch_obsm = ch_obsm.mix(SCANPY_HARMONY.out.obsm) - } - - if (methods.contains('symphony')) { - SYMPHONY_HARMONYINTEGRATE ( - ch_h5ad_hvg.map { _meta, h5ad -> [[id: 'symphony'], h5ad] }, - "batch", - "X" - ) - ch_versions = ch_versions.mix(SYMPHONY_HARMONYINTEGRATE.out.versions) - ch_integrations = ch_integrations.mix(SYMPHONY_HARMONYINTEGRATE.out.h5ad) - ch_obsm = ch_obsm.mix(SYMPHONY_HARMONYINTEGRATE.out.obsm) + if (harmony_reference) { + SYMPHONY_MAPEMBEDDING ( + ch_h5ad.map { _meta, h5ad -> [[id: 'harmony'], h5ad] }, + channel.value([[id: 'harmony'], harmony_reference]), + "batch", + "X" + ) + ch_versions = ch_versions.mix(SYMPHONY_MAPEMBEDDING.out.versions) + ch_integrations = ch_integrations.mix(SYMPHONY_MAPEMBEDDING.out.h5ad) + ch_obsm = ch_obsm.mix(SYMPHONY_MAPEMBEDDING.out.obsm) + } + else { + SYMPHONY_HARMONYINTEGRATE ( + ch_h5ad_hvg.map { _meta, h5ad -> [[id: 'harmony'], h5ad] }, + "batch", + "X" + ) + ch_versions = ch_versions.mix(SYMPHONY_HARMONYINTEGRATE.out.versions) + ch_integrations = ch_integrations.mix(SYMPHONY_HARMONYINTEGRATE.out.h5ad) + ch_obsm = ch_obsm.mix(SYMPHONY_HARMONYINTEGRATE.out.obsm) + } } if (methods.contains('bbknn')) { @@ -132,6 +142,7 @@ workflow INTEGRATE { ch_h5ad_hvg.map { _meta, h5ad -> [[id: 'bbknn'], h5ad] }, "batch" ) + ch_versions = ch_versions.mix(SCANPY_BBKNN.out.versions) ch_integrations = ch_integrations.mix(SCANPY_BBKNN.out.h5ad) } @@ -140,6 +151,7 @@ workflow INTEGRATE { ch_h5ad_hvg.map { _meta, h5ad -> [[id: 'combat'], h5ad] }, "batch" ) + ch_versions = ch_versions.mix(SCANPY_COMBAT.out.versions) ch_integrations = ch_integrations.mix(SCANPY_COMBAT.out.h5ad) ch_obsm = ch_obsm.mix(SCANPY_COMBAT.out.obsm) } @@ -149,6 +161,7 @@ workflow INTEGRATE { ch_h5ad_hvg.map { _meta, h5ad -> [[id: 'pca'], h5ad] }, "X_emb" ) + ch_versions = ch_versions.mix(SCANPY_PCA.out.versions) ch_integrations = ch_integrations.mix(SCANPY_PCA.out.h5ad) ch_obsm = ch_obsm.mix(SCANPY_PCA.out.obsm) } @@ -162,6 +175,7 @@ workflow INTEGRATE { condition_col, "X" ) + ch_versions = ch_versions.mix(SCARCHES_EXPIMAP.out.versions) ch_integrations = ch_integrations.mix(SCARCHES_EXPIMAP.out.h5ad) ch_obsm = ch_obsm.mix(SCARCHES_EXPIMAP.out.obsm) } @@ -171,6 +185,7 @@ workflow INTEGRATE { ch_h5ad.map { _meta, h5ad -> [[id: 'scimilarity'], h5ad] }, scimilarity_model, ) + ch_versions = ch_versions.mix(SCIMILARITY.out.versions) ch_integrations = ch_integrations.mix(SCIMILARITY.out.integrations) ch_obs = ch_obs.mix(SCIMILARITY.out.obs) ch_obsm = ch_obsm.mix(SCIMILARITY.out.obsm) @@ -181,4 +196,5 @@ workflow INTEGRATE { obs = ch_obs // channel: [ pkl ] var = ch_var // channel: [ pkl ] obsm = ch_obsm // channel: [ pkl ] + versions = ch_versions // channel: [ versions.yml ] } diff --git a/subworkflows/local/integrate/tests/main.nf.test b/subworkflows/local/integrate/tests/main.nf.test index 60cb1fd2..0a59e5fb 100644 --- a/subworkflows/local/integrate/tests/main.nf.test +++ b/subworkflows/local/integrate/tests/main.nf.test @@ -74,81 +74,8 @@ nextflow_workflow { def adata = anndata(workflow.out.integrations[0][1]) assert workflow.success assert "X_emb" in adata.obsm - assert snapshot( - workflow.out.versions, - adata.yaml - ).match() - } - - } - - test("Should run without failures - symphony - stub") { - - options '-stub' - - when { - params { - outdir = "$outputDir" - } - workflow { - """ - input[0] = channel.of([ - [id: 'test'], - file(params.pipelines_testdata_base_path + '/anndata-variations/batch_correct_name.h5ad', checkIfExists: true) - ]) - input[1] = false - input[2] = 2000 - input[3] = [] - input[4] = ['symphony'] - input[5] = null - input[6] = null - input[7] = [] - input[8] = [] - input[9] = null - input[10] = null - input[11] = 'condition' - """ - } - } - - then { - assert workflow.success - assert snapshot(workflow.out).match() - } - - } - - test("Should run without failures - symphony") { - - when { - params { - outdir = "$outputDir" - } - workflow { - """ - input[0] = channel.of([ - [id: 'test'], - file(params.pipelines_testdata_base_path + '/anndata-variations/batch_correct_name.h5ad', checkIfExists: true) - ]) - input[1] = false - input[2] = 2000 - input[3] = [] - input[4] = ['symphony'] - input[5] = null - input[6] = null - input[7] = [] - input[8] = [] - input[9] = null - input[10] = null - input[11] = 'condition' - """ - } - } - - then { - def adata = anndata(workflow.out.integrations[0][1]) - assert workflow.success - assert "X_emb" in adata.obsm + assert "X_pca_symphony" in adata.obsm + assert "symphony" in adata.uns assert snapshot( workflow.out.versions, adata.yaml diff --git a/subworkflows/local/integrate/tests/main.nf.test.snap b/subworkflows/local/integrate/tests/main.nf.test.snap index c3a97012..17d238af 100644 --- a/subworkflows/local/integrate/tests/main.nf.test.snap +++ b/subworkflows/local/integrate/tests/main.nf.test.snap @@ -19,51 +19,6 @@ "3": [ "X_harmony.pkl:md5,d41d8cd98f00b204e9800998ecf8427e" ], - "integrations": [ - [ - { - "id": "harmony" - }, - "harmony.h5ad:md5,d41d8cd98f00b204e9800998ecf8427e" - ] - ], - "obs": [ - - ], - "obsm": [ - "X_harmony.pkl:md5,d41d8cd98f00b204e9800998ecf8427e" - ], - "var": [ - - ] - } - ], - "timestamp": "2026-05-20T20:47:09.819743733", - "meta": { - "nf-test": "0.9.4", - "nextflow": "26.04.0" - } - }, - "Should run without failures - symphony - stub": { - "content": [ - { - "0": [ - [ - { - "id": "symphony" - }, - "symphony.h5ad:md5,d41d8cd98f00b204e9800998ecf8427e" - ] - ], - "1": [ - - ], - "2": [ - - ], - "3": [ - "X_symphony.pkl:md5,d41d8cd98f00b204e9800998ecf8427e" - ], "4": [ "versions.yml:md5,d41d8cd98f00b204e9800998ecf8427e", "versions.yml:md5,d41d8cd98f00b204e9800998ecf8427e", @@ -72,16 +27,16 @@ "integrations": [ [ { - "id": "symphony" + "id": "harmony" }, - "symphony.h5ad:md5,d41d8cd98f00b204e9800998ecf8427e" + "harmony.h5ad:md5,d41d8cd98f00b204e9800998ecf8427e" ] ], "obs": [ ], "obsm": [ - "X_symphony.pkl:md5,d41d8cd98f00b204e9800998ecf8427e" + "X_harmony.pkl:md5,d41d8cd98f00b204e9800998ecf8427e" ], "var": [ @@ -93,10 +48,10 @@ ] } ], - "timestamp": "2026-05-19T11:01:29.131789506", + "timestamp": "2026-03-28T23:05:37.694952307", "meta": { "nf-test": "0.9.4", - "nextflow": "26.04.0" + "nextflow": "25.10.2" } }, "Should run without failures - bbknn - stub": { @@ -118,6 +73,11 @@ ], "3": [ + ], + "4": [ + "versions.yml:md5,d41d8cd98f00b204e9800998ecf8427e", + "versions.yml:md5,d41d8cd98f00b204e9800998ecf8427e", + "versions.yml:md5,d41d8cd98f00b204e9800998ecf8427e" ], "integrations": [ [ @@ -135,21 +95,30 @@ ], "var": [ + ], + "versions": [ + "versions.yml:md5,d41d8cd98f00b204e9800998ecf8427e", + "versions.yml:md5,d41d8cd98f00b204e9800998ecf8427e", + "versions.yml:md5,d41d8cd98f00b204e9800998ecf8427e" ] } ], - "timestamp": "2026-05-20T20:49:35.863158377", + "timestamp": "2026-03-25T15:47:40.215608271", "meta": { "nf-test": "0.9.4", - "nextflow": "26.04.0" + "nextflow": "25.10.2" } }, "Should run without failures - combat": { "content": [ - null, + [ + "versions.yml:md5,20020d8c9cf585aaa75dd5a14aa5d3ae", + "versions.yml:md5,a6c1e0a77e0d31423a9d77edba85127d", + "versions.yml:md5,d28b65c4c18c54e1abc34040b584b823" + ], { "n_obs": 12940, - "n_vars": 2077, + "n_vars": 2000, "obs": { "index": "_index", "columns": [ @@ -209,7 +178,7 @@ ] } ], - "timestamp": "2026-05-20T20:52:30.761335469", + "timestamp": "2026-05-28T14:03:08.524818368", "meta": { "nf-test": "0.9.4", "nextflow": "26.04.0" @@ -235,6 +204,11 @@ "3": [ "X_pca_pca.pkl:md5,d41d8cd98f00b204e9800998ecf8427e" ], + "4": [ + "versions.yml:md5,d41d8cd98f00b204e9800998ecf8427e", + "versions.yml:md5,d41d8cd98f00b204e9800998ecf8427e", + "versions.yml:md5,d41d8cd98f00b204e9800998ecf8427e" + ], "integrations": [ [ { @@ -251,13 +225,18 @@ ], "var": [ + ], + "versions": [ + "versions.yml:md5,d41d8cd98f00b204e9800998ecf8427e", + "versions.yml:md5,d41d8cd98f00b204e9800998ecf8427e", + "versions.yml:md5,d41d8cd98f00b204e9800998ecf8427e" ] } ], - "timestamp": "2026-05-20T20:53:05.651566126", + "timestamp": "2026-04-10T16:50:54.506012382", "meta": { "nf-test": "0.9.4", - "nextflow": "26.04.0" + "nextflow": "25.10.2" } }, "Should run without failures - extension mode - stub": { @@ -280,6 +259,9 @@ "3": [ "X_harmony.pkl:md5,d41d8cd98f00b204e9800998ecf8427e" ], + "4": [ + "versions.yml:md5,d41d8cd98f00b204e9800998ecf8427e" + ], "integrations": [ [ { @@ -296,21 +278,28 @@ ], "var": [ + ], + "versions": [ + "versions.yml:md5,d41d8cd98f00b204e9800998ecf8427e" ] } ], - "timestamp": "2026-05-20T20:52:48.773987405", + "timestamp": "2026-03-28T23:09:49.392744851", "meta": { "nf-test": "0.9.4", - "nextflow": "26.04.0" + "nextflow": "25.10.2" } }, "Should run without failures - pca": { "content": [ - null, + [ + "versions.yml:md5,20020d8c9cf585aaa75dd5a14aa5d3ae", + "versions.yml:md5,87a2cb96724430656d9c1276e91e0208", + "versions.yml:md5,d28b65c4c18c54e1abc34040b584b823" + ], { "n_obs": 12940, - "n_vars": 2077, + "n_vars": 2000, "obs": { "index": "_index", "columns": [ @@ -368,7 +357,7 @@ ] } ], - "timestamp": "2026-05-20T20:53:47.047398518", + "timestamp": "2026-05-28T14:04:36.10115423", "meta": { "nf-test": "0.9.4", "nextflow": "26.04.0" @@ -394,6 +383,11 @@ "3": [ "combat.pkl:md5,d41d8cd98f00b204e9800998ecf8427e" ], + "4": [ + "versions.yml:md5,d41d8cd98f00b204e9800998ecf8427e", + "versions.yml:md5,d41d8cd98f00b204e9800998ecf8427e", + "versions.yml:md5,d41d8cd98f00b204e9800998ecf8427e" + ], "integrations": [ [ { @@ -410,21 +404,30 @@ ], "var": [ + ], + "versions": [ + "versions.yml:md5,d41d8cd98f00b204e9800998ecf8427e", + "versions.yml:md5,d41d8cd98f00b204e9800998ecf8427e", + "versions.yml:md5,d41d8cd98f00b204e9800998ecf8427e" ] } ], - "timestamp": "2026-05-20T20:51:21.560122268", + "timestamp": "2026-03-25T15:49:26.334091777", "meta": { "nf-test": "0.9.4", - "nextflow": "26.04.0" + "nextflow": "25.10.2" } }, "Should run without failures - harmony": { "content": [ - null, + [ + "versions.yml:md5,0941a4daea5c41d9e3259be11e9f2263", + "versions.yml:md5,20020d8c9cf585aaa75dd5a14aa5d3ae", + "versions.yml:md5,d28b65c4c18c54e1abc34040b584b823" + ], { "n_obs": 12940, - "n_vars": 2077, + "n_vars": 2000, "obs": { "index": "_index", "columns": [ @@ -464,7 +467,8 @@ "counts" ], "obsm": [ - "X_emb" + "X_emb", + "X_symphony" ], "varm": [ @@ -481,7 +485,7 @@ ] } ], - "timestamp": "2026-05-20T20:48:46.532961357", + "timestamp": "2026-05-28T14:18:52.042984469", "meta": { "nf-test": "0.9.4", "nextflow": "26.04.0" @@ -489,10 +493,14 @@ }, "Should run without failures - bbknn": { "content": [ - null, + [ + "versions.yml:md5,20020d8c9cf585aaa75dd5a14aa5d3ae", + "versions.yml:md5,ccf730637c4c61a84ac4a002bf9832e0", + "versions.yml:md5,d28b65c4c18c54e1abc34040b584b823" + ], { "n_obs": 12940, - "n_vars": 2077, + "n_vars": 2000, "obs": { "index": "_index", "columns": [ @@ -552,7 +560,7 @@ ] } ], - "timestamp": "2026-05-20T20:51:00.345519561", + "timestamp": "2026-05-28T14:01:44.359301169", "meta": { "nf-test": "0.9.4", "nextflow": "26.04.0" @@ -560,7 +568,9 @@ }, "Should run without failures - expimap": { "content": [ - null, + [ + "versions.yml:md5,44d9a1bbfabdc0ecae8adc586c7c1b2d" + ], { "n_obs": 12940, "n_vars": 9887, @@ -596,10 +606,10 @@ ] } ], - "timestamp": "2026-05-20T20:55:15.288341932", + "timestamp": "2026-04-11T15:55:42.640248171", "meta": { "nf-test": "0.9.4", - "nextflow": "26.04.0" + "nextflow": "25.10.4" } }, "Should run without failures - expimap - stub": { @@ -622,6 +632,9 @@ "3": [ "X_expimap.pkl:md5,d41d8cd98f00b204e9800998ecf8427e" ], + "4": [ + "versions.yml:md5,d41d8cd98f00b204e9800998ecf8427e" + ], "integrations": [ [ { @@ -638,13 +651,16 @@ ], "var": [ + ], + "versions": [ + "versions.yml:md5,d41d8cd98f00b204e9800998ecf8427e" ] } ], - "timestamp": "2026-05-20T20:54:13.304991035", + "timestamp": "2026-04-11T09:14:13.34716941", "meta": { "nf-test": "0.9.4", - "nextflow": "26.04.0" + "nextflow": "25.10.2" } } } \ No newline at end of file From 23109e826a27d99ebe51edea34b6237e86e80bdc Mon Sep 17 00:00:00 2001 From: Nico Trummer Date: Thu, 28 May 2026 14:45:44 +0200 Subject: [PATCH 05/19] Add Symphony reference mapping support --- .gitignore | 3 + conf/modules.config | 16 ++++ contrib/nf-core-test-datasets/.gitattributes | 1 + contrib/nf-core-test-datasets/README.md | 49 ++++++++++ .../nf-core-test-datasets/build.params.json | 8 ++ .../collect-artifacts.sh | 15 +++ docs/output.md | 1 + docs/reproducibility.md | 39 ++++---- docs/usage.md | 12 ++- main.nf | 7 ++ modules/local/scanpy/hvgs/templates/hvgs.py | 1 + .../local/scanpy/hvgs/tests/main.nf.test.snap | 44 ++++----- modules/local/scanpy/pca/templates/pca.py | 3 +- .../local/scanpy/pca/tests/main.nf.test.snap | 17 ++-- .../templates/harmonyintegrate.py | 75 ++++++++++----- .../harmonyintegrate/tests/main.nf.test | 13 ++- .../harmonyintegrate/tests/main.nf.test.snap | 67 +++++++++++-- .../symphony/mapembedding/environment.yml | 10 ++ modules/local/symphony/mapembedding/main.nf | 35 +++++++ .../mapembedding/templates/map_embedding.py | 59 ++++++++++++ .../symphony/mapembedding/tests/main.nf.test | 93 +++++++++++++++++++ .../mapembedding/tests/main.nf.test.snap | 93 +++++++++++++++++++ nextflow.config | 1 + nextflow_schema.json | 8 ++ subworkflows/local/combine/main.nf | 2 + .../local/integrate/tests/main.nf.test | 36 ++++--- .../main.nf | 8 +- tests/main_pipeline_extend.nf.test | 5 +- tests/main_pipeline_extend.nf.test.snap | 21 ++++- tests/main_pipeline_reference_mapping.nf.test | 5 +- ...in_pipeline_reference_mapping.nf.test.snap | 21 ++++- workflows/scdownstream.nf | 2 + 32 files changed, 656 insertions(+), 114 deletions(-) create mode 100644 contrib/nf-core-test-datasets/.gitattributes create mode 100644 contrib/nf-core-test-datasets/README.md create mode 100644 contrib/nf-core-test-datasets/build.params.json create mode 100755 contrib/nf-core-test-datasets/collect-artifacts.sh create mode 100644 modules/local/symphony/mapembedding/environment.yml create mode 100644 modules/local/symphony/mapembedding/main.nf create mode 100644 modules/local/symphony/mapembedding/templates/map_embedding.py create mode 100644 modules/local/symphony/mapembedding/tests/main.nf.test create mode 100644 modules/local/symphony/mapembedding/tests/main.nf.test.snap diff --git a/.gitignore b/.gitignore index 5516a066..7c60a3c2 100644 --- a/.gitignore +++ b/.gitignore @@ -9,3 +9,6 @@ testing* null/ .lineage/ .nf-test* +tests/assets/*.h5ad +contrib/nf-core-test-datasets/build_output/ +contrib/nf-core-test-datasets/extension_base/ diff --git a/conf/modules.config b/conf/modules.config index 41dacb79..da4a96cb 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -380,6 +380,22 @@ process { } withName: SYMPHONY_HARMONYINTEGRATE { + publishDir = [ + path: { "${params.outdir}/combine/integrate/harmony" }, + mode: params.publish_dir_mode, + saveAs: { filename -> + if (filename.endsWith('_reference.h5ad')) { + return 'harmony_reference.h5ad' + } + if (params.save_intermediates && !filename.equals('versions.yml')) { + return filename + } + return null + }, + ] + } + + withName: SYMPHONY_MAPEMBEDDING { publishDir = [ path: { "${params.outdir}/combine/integrate/harmony" }, mode: params.publish_dir_mode, diff --git a/contrib/nf-core-test-datasets/.gitattributes b/contrib/nf-core-test-datasets/.gitattributes new file mode 100644 index 00000000..914d733f --- /dev/null +++ b/contrib/nf-core-test-datasets/.gitattributes @@ -0,0 +1 @@ +extension_base/*.h5ad filter=lfs diff=lfs merge=lfs -text diff --git a/contrib/nf-core-test-datasets/README.md b/contrib/nf-core-test-datasets/README.md new file mode 100644 index 00000000..b9b58a05 --- /dev/null +++ b/contrib/nf-core-test-datasets/README.md @@ -0,0 +1,49 @@ +# nf-core/test-datasets update — `scdownstream/extension_base` + +Copy the contents of `extension_base/` into the **`scdownstream` branch** of [nf-core/test-datasets](https://github.com/nf-core/test-datasets), replacing the existing files in `scdownstream/extension_base/`. + +## Files + +| File | Purpose | +| --------------------------------------- | ------------------------------------------------------------ | +| `extension_base/model.pt` | scVI checkpoint for reference mapping / extension | +| `extension_base/merged.h5ad` | Finalized atlas (`base_adata`) for extension | +| `extension_base/harmony_reference.h5ad` | Symphony reference for Harmony reference mapping / extension | + +All three must come from the **same pipeline run** (see below). + +> **Note:** If `extension_base/` already contains files but you have not run `collect-artifacts.sh` yet, `merged.h5ad` and `model.pt` may still be the current test-datasets versions and `harmony_reference.h5ad` from a Harmony-only build. **Do not open the test-datasets PR until you have run a unified build and `collect-artifacts.sh`** — all three files must be replaced together. + +## How these were generated + +```bash +# From the scdownstream repo root, with nf-core conda env active: +nextflow run main.nf -profile test,apptainer -params-file contrib/nf-core-test-datasets/build.params.json + +# Populate extension_base/ from the build output: +./contrib/nf-core-test-datasets/collect-artifacts.sh +``` + +Build parameters match the consolidated pipeline tests (`main_pipeline_reference_mapping.nf.test`, `main_pipeline_extend.nf.test`): + +- Input: `samplesheet.csv` (full atlas) +- Integration: `scvi,harmony` +- HVGs: 500 + +## PR checklist (test-datasets repo) + +1. Check out branch `scdownstream`. +2. Ensure Git LFS is enabled (`git lfs install`). +3. Copy `extension_base/*` into `scdownstream/extension_base/` (overwrite `model.pt` and `merged.h5ad`, add `harmony_reference.h5ad`). +4. Add or extend `.gitattributes` on the test-datasets repo: + + ``` + scdownstream/extension_base/*.h5ad filter=lfs diff=lfs merge=lfs -text + ``` + +5. Commit and open PR against `scdownstream`. +6. After merge, re-run `nftu` on the scdownstream pipeline reference-mapping and extend tests. + +## Pipeline version + +Record the scdownstream commit/tag used when generating these files in your PR description. diff --git a/contrib/nf-core-test-datasets/build.params.json b/contrib/nf-core-test-datasets/build.params.json new file mode 100644 index 00000000..8faa4a16 --- /dev/null +++ b/contrib/nf-core-test-datasets/build.params.json @@ -0,0 +1,8 @@ +{ + "input": "https://github.com/nf-core/test-datasets/raw/refs/heads/scdownstream/samplesheet.csv", + "integration_methods": "scvi,harmony", + "integration_hvgs": 500, + "doublet_detection": "scrublet,scdblfinder", + "celltypist_model": "Adult_COVID19_PBMC", + "outdir": "contrib/nf-core-test-datasets/build_output" +} diff --git a/contrib/nf-core-test-datasets/collect-artifacts.sh b/contrib/nf-core-test-datasets/collect-artifacts.sh new file mode 100755 index 00000000..75fa3d4e --- /dev/null +++ b/contrib/nf-core-test-datasets/collect-artifacts.sh @@ -0,0 +1,15 @@ +#!/usr/bin/env bash +set -euo pipefail + +ROOT="$(cd "$(dirname "${BASH_SOURCE[0]}")/../.." && pwd)" +OUT="${ROOT}/contrib/nf-core-test-datasets/build_output" +DEST="${ROOT}/contrib/nf-core-test-datasets/extension_base" + +mkdir -p "${DEST}" + +cp "${OUT}/combine/integrate/scvi/scvi_model/model.pt" "${DEST}/model.pt" +cp "${OUT}/finalized/merged.h5ad" "${DEST}/merged.h5ad" +cp "${OUT}/combine/integrate/harmony/harmony_reference.h5ad" "${DEST}/harmony_reference.h5ad" + +echo "Collected artifacts into ${DEST}:" +ls -lh "${DEST}" diff --git a/docs/output.md b/docs/output.md index ca948532..38d7356b 100644 --- a/docs/output.md +++ b/docs/output.md @@ -98,6 +98,7 @@ The `preprocess` directory contains a subdirectory for each sample, which contai - `${tool}` - `*.h5ad/*.rds`: The integrated H5AD or RDS file. - `X_${tool}.pkl`: Low-dimensional representation of the integrated data. + - `harmony_reference.h5ad` (Harmony only): Compact Symphony reference AnnData for query mapping, published from de novo Harmony runs. diff --git a/docs/reproducibility.md b/docs/reproducibility.md index 814322dc..78769582 100644 --- a/docs/reproducibility.md +++ b/docs/reproducibility.md @@ -120,28 +120,29 @@ The **Test strategy (this branch)** column describes what the tests on this bran ### `scanpy/` -| Module | Description | Reproducibility | Test strategy (this branch) | -| ------------------------ | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | --------------------------------------------------------------------------------------------------------------------- | -| `scanpy/bbknn` | Constructs a batch-balanced k-nearest-neighbour graph (BBKNN) on a PCA embedding. | Fully deterministic — kNN construction is deterministic given the input embedding. | structural — versions + schema only | -| `scanpy/cellcycle` | Scores each cell for S-phase and G2M-phase activity and assigns a predicted cell cycle phase. | Fully deterministic | hash | -| `scanpy/combat` | Applies ComBat batch correction and then runs PCA, storing the result as `X_emb`. | Seeded / quasi-deterministic — ComBat is deterministic; downstream PCA floats may vary across LAPACK backends. | structural — versions + schema only | -| `scanpy/filter` | Filters cells and genes by count, gene, and mitochondrial percentage thresholds. | Fully deterministic | hash + structural — standard `hash` triple; multiple parameter scenarios | -| `scanpy/hvgs` | Selects highly variable genes and subsets the AnnData to those genes. | Seeded / quasi-deterministic — HVG variance statistics rely on NumPy/SciPy floating-point operations that can produce slightly different results across library versions. | structural — versions + schema only | -| `scanpy/leiden` | Performs Leiden community-detection clustering at a specified resolution. | **Non-deterministic** — Leiden uses random restarts with no fixed seed. | structural — range assertion on cluster count + versions + schema | -| `scanpy/neighbors` | Computes a k-nearest-neighbour graph on a specified embedding. | Fully deterministic given a fixed input embedding. | structural — versions + schema only | -| `scanpy/paga` | Computes PAGA coarse-grained cluster connectivity and saves a graph and plot. | Fully deterministic — PAGA is a deterministic graph-summarisation step given fixed Leiden labels. | hash | -| `scanpy/pca` | Runs PCA with `random_state=0` and stores the result under a specified key. | Seeded / quasi-deterministic — seed is fixed, but float coordinates can differ across LAPACK/MKL backends. | structural — versions + schema only | -| `scanpy/plotqc` | Calculates QC metrics and produces a counts-vs-genes scatter plot for MultiQC. | Fully deterministic | hash (no H5AD output — PNG / MultiQC JSON + versions) | -| `scanpy/rankgenesgroups` | Runs differential gene expression (rank genes groups) across clusters using a configurable statistical method. | **Seeded / quasi-deterministic** — wilcoxon and t-test are deterministic in theory, but tied-rank handling and floating-point tie-breaking can differ across SciPy versions. | structural — versions + `adata.yaml`; one path with **empty h5ad** snapshots **versions only** | -| `scanpy/readh5` | Reads a 10x Genomics HDF5 (`.h5`) file and writes it as an AnnData H5AD. | Fully deterministic | hash | -| `scanpy/sample` | Down-samples cells to a fixed count or fraction using `rng=0`. | Seeded / quasi-deterministic — seed is fixed, but sampled cell set may vary across NumPy versions. | hash | -| `scanpy/umap` | Computes a UMAP embedding from a pre-built neighbour graph using `random_state=0`. | Seeded / quasi-deterministic — seed is fixed, but float coordinates vary across umap-learn/numba versions. | structural — versions + schema only | +| Module | Description | Reproducibility | Test strategy (this branch) | +| ------------------------ | ------------------------------------------------------------------------------------------------------------------------------------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ | ---------------------------------------------------------------------------------------------- | +| `scanpy/bbknn` | Constructs a batch-balanced k-nearest-neighbour graph (BBKNN) on a PCA embedding. | Fully deterministic — kNN construction is deterministic given the input embedding. | structural — versions + schema only | +| `scanpy/cellcycle` | Scores each cell for S-phase and G2M-phase activity and assigns a predicted cell cycle phase. | Fully deterministic | hash | +| `scanpy/combat` | Applies ComBat batch correction and then runs PCA, storing the result as `X_emb`. | Seeded / quasi-deterministic — ComBat is deterministic; downstream PCA floats may vary across LAPACK backends. | structural — versions + schema only | +| `scanpy/filter` | Filters cells and genes by count, gene, and mitochondrial percentage thresholds. | Fully deterministic | hash + structural — standard `hash` triple; multiple parameter scenarios | +| `scanpy/hvgs` | Normalizes counts (`normalize_total` → `log1p`), selects highly variable genes, and subsets the AnnData to those genes while keeping raw counts in `X`. | Seeded / quasi-deterministic — HVG variance statistics rely on NumPy/SciPy floating-point operations that can produce slightly different results across library versions. | structural — versions + schema only | +| `scanpy/leiden` | Performs Leiden community-detection clustering at a specified resolution. | **Non-deterministic** — Leiden uses random restarts with no fixed seed. | structural — range assertion on cluster count + versions + schema | +| `scanpy/neighbors` | Computes a k-nearest-neighbour graph on a specified embedding. | Fully deterministic given a fixed input embedding. | structural — versions + schema only | +| `scanpy/paga` | Computes PAGA coarse-grained cluster connectivity and saves a graph and plot. | Fully deterministic — PAGA is a deterministic graph-summarisation step given fixed Leiden labels. | hash | +| `scanpy/pca` | Runs library-size normalization, log1p, and PCA with `random_state=0`, storing the result under a specified key. | Seeded / quasi-deterministic — seed is fixed, but float coordinates can differ across LAPACK/MKL backends. Embeddings differ from earlier pipeline versions that ran PCA on unnormalized counts. | structural — versions + schema only | +| `scanpy/plotqc` | Calculates QC metrics and produces a counts-vs-genes scatter plot for MultiQC. | Fully deterministic | hash (no H5AD output — PNG / MultiQC JSON + versions) | +| `scanpy/rankgenesgroups` | Runs differential gene expression (rank genes groups) across clusters using a configurable statistical method. | **Seeded / quasi-deterministic** — wilcoxon and t-test are deterministic in theory, but tied-rank handling and floating-point tie-breaking can differ across SciPy versions. | structural — versions + `adata.yaml`; one path with **empty h5ad** snapshots **versions only** | +| `scanpy/readh5` | Reads a 10x Genomics HDF5 (`.h5`) file and writes it as an AnnData H5AD. | Fully deterministic | hash | +| `scanpy/sample` | Down-samples cells to a fixed count or fraction using `rng=0`. | Seeded / quasi-deterministic — seed is fixed, but sampled cell set may vary across NumPy versions. | hash | +| `scanpy/umap` | Computes a UMAP embedding from a pre-built neighbour graph using `random_state=0`. | Seeded / quasi-deterministic — seed is fixed, but float coordinates vary across umap-learn/numba versions. | structural — versions + schema only | ### `symphony/` -| Module | Description | Reproducibility | Test strategy (this branch) | -| ------------------------------ | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------- | ----------------------------------- | -| `symphony/harmonyintegrate` | Runs Harmony batch integration via symphonypy `harmony_integrate` after log-normalisation and PCA, storing `X_pca_symphony`, `X_emb`, and `uns['symphony']`. Requires symphonypy ≥0.2.3 ([symphonypy#8](https://github.com/potulabe/symphonypy/issues/8), [symphonypy#9](https://github.com/potulabe/symphonypy/issues/9)). | **Non-deterministic** — Harmony is an iterative optimisation; symphonypy passes `random_seed=1` but upstream PCA is unseeded. | structural — versions + schema only | +| Module | Description | Reproducibility | Test strategy (this branch) | +| --------------------------- | ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | ----------------------------------------------------------------------------------------------------------------------------- | ----------------------------------- | +| `symphony/harmonyintegrate` | Runs Harmony batch integration via symphonypy after normalize_total → log1p → scale(max_value=10) → PCA(zero_center=False), storing `X_symphony`, `X_emb`, Symphony reference metadata (`var` mean/std/HVG, `varm['PCs']`, `uns['harmony']`, `uns['normalize']`), and publishing a compact `harmony_reference.h5ad`. Requires symphonypy ≥0.2.3 ([symphonypy#8](https://github.com/potulabe/symphonypy/issues/8), [symphonypy#9](https://github.com/potulabe/symphonypy/issues/9)). | **Non-deterministic** — Harmony is an iterative optimisation; symphonypy passes `random_seed=1` but upstream PCA is unseeded. | structural — versions + schema only | +| `symphony/mapembedding` | Maps query cells onto a Symphony reference via symphonypy `map_embedding`, storing mapped coordinates in `X_symphony` and `X_emb`. | **Non-deterministic** — inherits Harmony mapping variability. | structural — versions + schema only | ### `scimilarity/` diff --git a/docs/usage.md b/docs/usage.md index 50ad2c88..6a387656 100644 --- a/docs/usage.md +++ b/docs/usage.md @@ -216,14 +216,16 @@ nextflow run nf-core/scdownstream --input samplesheet.csv --outdir results \ ### Reference mapping and extension **Reference mapping** means **mapping new cells into a latent space using a pre-trained model** instead of training that integration step only on the query data. -In this pipeline this can be done using **scVI**, **scANVI**, and **scimilarity**. -To enable it, add the corresponding method to [`integration_methods`](https://nf-co.re/scdownstream/parameters#integration_methods) (`scvi`, `scanvi`, and/or `scimilarity`) and set the matching model parameters for each method you use: [`scvi_model`](https://nf-co.re/scdownstream/parameters#scvi_model), [`scanvi_model`](https://nf-co.re/scdownstream/parameters#scanvi_model), and [`scimilarity_model`](https://nf-co.re/scdownstream/parameters#scimilarity_model) (see the [parameter reference](https://nf-co.re/scdownstream/parameters) for file types, defaults, and help text). +In this pipeline this can be done using **scVI**, **scANVI**, **scimilarity**, and **Harmony (Symphony)**. +To enable it, add the corresponding method to [`integration_methods`](https://nf-co.re/scdownstream/parameters#integration_methods) (`scvi`, `scanvi`, `scimilarity`, and/or `harmony`) and set the matching model parameters for each method you use: [`scvi_model`](https://nf-co.re/scdownstream/parameters#scvi_model), [`scanvi_model`](https://nf-co.re/scdownstream/parameters#scanvi_model), [`scimilarity_model`](https://nf-co.re/scdownstream/parameters#scimilarity_model), and [`harmony_reference`](https://nf-co.re/scdownstream/parameters#harmony_reference) (see the [parameter reference](https://nf-co.re/scdownstream/parameters) for file types, defaults, and help text). + +For Harmony reference mapping, provide the compact Symphony reference AnnData from a prior de novo run (`{outdir}/combine/integrate/harmony/harmony_reference.h5ad`). It contains the gene statistics, PCA loadings, Harmony centroids, and normalization metadata required for query mapping. **Extension** is for users that have outputs of a previous run of `nf-core/scdownstream` and want to extend it with new data, without re-running the integration from scratch. -It only works if `scvi`, `scanvi` and/or `scimilarity` have been enabled in `integration_methods` in the original pipeline run. -Other integration methods than the three mentioned before are not supported for this. +It only works if `scvi`, `scanvi`, `scimilarity`, and/or `harmony` have been enabled in `integration_methods` in the original pipeline run. +Other integration methods than the four mentioned before are not supported for this. In simple terms, in this setup the workflow is: (1) project new data into the latent space learned from the data in the original run, and then (2) combine the datasets. -For (1), provide the same checkpoints as for reference mapping ([`scvi_model`](https://nf-co.re/scdownstream/parameters#scvi_model), [`scanvi_model`](https://nf-co.re/scdownstream/parameters#scanvi_model), [`scimilarity_model`](https://nf-co.re/scdownstream/parameters#scimilarity_model)). +For (1), provide the same checkpoints as for reference mapping ([`scvi_model`](https://nf-co.re/scdownstream/parameters#scvi_model), [`scanvi_model`](https://nf-co.re/scdownstream/parameters#scanvi_model), [`scimilarity_model`](https://nf-co.re/scdownstream/parameters#scimilarity_model), [`harmony_reference`](https://nf-co.re/scdownstream/parameters#harmony_reference)). For (2), pass the integrated `.h5ad` from the original run as [`base_adata`](https://nf-co.re/scdownstream/parameters#base_adata). Pre-trained scVI models are also shared on [scvi-hub](https://huggingface.co/scvi-tools). diff --git a/main.nf b/main.nf index b99f8488..7aa62964 100644 --- a/main.nf +++ b/main.nf @@ -61,6 +61,7 @@ workflow NFCORE_SCDOWNSTREAM { scvi_categorical_covariates // value: string scvi_continuous_covariates // value: string scimilarity_model // value: string + harmony_reference // value: string expimap_gmt // value: string skip_liana // value: boolean skip_rankgenesgroups // value: boolean @@ -117,6 +118,7 @@ workflow NFCORE_SCDOWNSTREAM { scvi_categorical_covariates, scvi_continuous_covariates, scimilarity_model, + harmony_reference, expimap_gmt, skip_liana, skip_rankgenesgroups, @@ -180,6 +182,10 @@ workflow { def analysis_plan = analysisPlanToList() + def harmony_reference = params.harmony_reference + ? file(params.harmony_reference, checkIfExists: true) + : null + NFCORE_SCDOWNSTREAM ( PIPELINE_INITIALISATION.out.samplesheet, ch_base_adata, @@ -211,6 +217,7 @@ workflow { params.scvi_categorical_covariates, params.scvi_continuous_covariates, params.scimilarity_model, + harmony_reference, params.expimap_gmt, params.skip_liana, params.skip_rankgenesgroups, diff --git a/modules/local/scanpy/hvgs/templates/hvgs.py b/modules/local/scanpy/hvgs/templates/hvgs.py index 70e43873..9e8bf0a1 100644 --- a/modules/local/scanpy/hvgs/templates/hvgs.py +++ b/modules/local/scanpy/hvgs/templates/hvgs.py @@ -41,6 +41,7 @@ raw_counts = adata.X.copy() + sc.pp.normalize_total(adata) sc.pp.log1p(adata) sc.pp.highly_variable_genes(adata, **kwargs) diff --git a/modules/local/scanpy/hvgs/tests/main.nf.test.snap b/modules/local/scanpy/hvgs/tests/main.nf.test.snap index 0fb740d0..e8a736f2 100644 --- a/modules/local/scanpy/hvgs/tests/main.nf.test.snap +++ b/modules/local/scanpy/hvgs/tests/main.nf.test.snap @@ -9,7 +9,7 @@ }, { "n_obs": 38234, - "n_vars": 100, + "n_vars": 101, "obs": { "index": "_index", "columns": [ @@ -26,19 +26,19 @@ ] }, "layers": [ - + ], "obsm": [ - + ], "varm": [ - + ], "obsp": [ - + ], "varp": [ - + ], "uns": [ "hvg", @@ -46,10 +46,10 @@ ] } ], - "timestamp": "2026-03-29T11:18:05.314404083", + "timestamp": "2026-05-28T12:02:56.794195774", "meta": { "nf-test": "0.9.4", - "nextflow": "25.10.2" + "nextflow": "26.04.0" } }, "Should run without a specified number of HVGs": { @@ -62,7 +62,7 @@ }, { "n_obs": 38234, - "n_vars": 251, + "n_vars": 111, "obs": { "index": "_index", "columns": [ @@ -79,19 +79,19 @@ ] }, "layers": [ - + ], "obsm": [ - + ], "varm": [ - + ], "obsp": [ - + ], "varp": [ - + ], "uns": [ "hvg", @@ -99,10 +99,10 @@ ] } ], - "timestamp": "2026-03-29T11:17:05.806436168", + "timestamp": "2026-05-28T12:02:32.511106972", "meta": { "nf-test": "0.9.4", - "nextflow": "25.10.2" + "nextflow": "26.04.0" } }, "Should run without failures - stub": { @@ -171,19 +171,19 @@ ] }, "layers": [ - + ], "obsm": [ - + ], "varm": [ - + ], "obsp": [ - + ], "varp": [ - + ], "uns": [ "hvg", @@ -197,4 +197,4 @@ "nextflow": "25.10.2" } } -} +} \ No newline at end of file diff --git a/modules/local/scanpy/pca/templates/pca.py b/modules/local/scanpy/pca/templates/pca.py index 666ab4cb..b3554d0a 100644 --- a/modules/local/scanpy/pca/templates/pca.py +++ b/modules/local/scanpy/pca/templates/pca.py @@ -21,7 +21,8 @@ prefix = "${prefix}" key_added = "${key_added}" -# Run PCA +sc.pp.normalize_total(adata) +sc.pp.log1p(adata) sc.pp.pca(adata, random_state=0, key_added=key_added) adata.write_h5ad(f"{prefix}.h5ad") diff --git a/modules/local/scanpy/pca/tests/main.nf.test.snap b/modules/local/scanpy/pca/tests/main.nf.test.snap index dd729e71..1ae72069 100644 --- a/modules/local/scanpy/pca/tests/main.nf.test.snap +++ b/modules/local/scanpy/pca/tests/main.nf.test.snap @@ -59,11 +59,11 @@ "var": { "index": "_index", "columns": [ - + ] }, "layers": [ - + ], "obsm": [ "X_pca" @@ -72,20 +72,21 @@ "X_pca" ], "obsp": [ - + ], "varp": [ - + ], "uns": [ - "X_pca" + "X_pca", + "log1p" ] } ], - "timestamp": "2026-03-29T11:17:21.253081099", + "timestamp": "2026-05-28T12:10:47.461951809", "meta": { "nf-test": "0.9.4", - "nextflow": "25.10.2" + "nextflow": "26.04.0" } } -} +} \ No newline at end of file diff --git a/modules/local/symphony/harmonyintegrate/templates/harmonyintegrate.py b/modules/local/symphony/harmonyintegrate/templates/harmonyintegrate.py index 32de73b5..1afb31d4 100644 --- a/modules/local/symphony/harmonyintegrate/templates/harmonyintegrate.py +++ b/modules/local/symphony/harmonyintegrate/templates/harmonyintegrate.py @@ -1,53 +1,80 @@ #!/usr/bin/env python3 -# Disable OpenMP CPU topology detection for MacOS compatibility import os + os.environ["KMP_AFFINITY"] = "disabled" +os.environ["MPLCONFIGDIR"] = "./tmp/mpl" +os.environ["NUMBA_CACHE_DIR"] = "./tmp/numba" import importlib.metadata import platform -import yaml - -os.environ["MPLCONFIGDIR"] = "./tmp/mpl" -os.environ["NUMBA_CACHE_DIR"] = "./tmp/numba" +import numpy as np +import pandas as pd import scanpy as sc import symphonypy as sp -import pandas as pd - +import yaml +from anndata import AnnData +from scipy.sparse import csr_matrix from threadpoolctl import threadpool_limits + + +def build_reference(adata, target_sum): + harmony = adata.uns["harmony"] + return AnnData( + X=csr_matrix((0, adata.n_vars), dtype=np.float32), + var=adata.var[["mean", "std", "highly_variable"]].copy(), + varm={"PCs": adata.varm["PCs"].copy()}, + uns={ + "harmony": { + "Nr": harmony["Nr"], + "C": harmony["C"], + "K": harmony["K"], + "sigma": harmony.get("sigma"), + "ref_basis_loadings": harmony["ref_basis_loadings"], + }, + "normalize": {"target_sum": target_sum}, + }, + ) + + threadpool_limits(int("${task.cpus}")) adata = sc.read_h5ad("${h5ad}") - +adata_proc = adata.copy() prefix = "${prefix}" +batch_col = "${batch_col}" +counts_layer = "${counts_layer}" -adata_processing = adata.copy() - -if "${counts_layer}" != "X": - adata_processing.X = adata.layers["${counts_layer}"] +if counts_layer != "X": + adata_proc.X = adata_proc.layers[counts_layer] -sc.pp.log1p(adata_processing) -sc.pp.pca(adata_processing) +target_sum = float(np.median(np.asarray(adata_proc.X.sum(axis=1)).ravel())) +sc.pp.normalize_total(adata_proc, target_sum=target_sum) +sc.pp.log1p(adata_proc) +sc.pp.scale(adata_proc, max_value=10) +sc.pp.pca(adata_proc, n_comps=30, zero_center=False) +if "highly_variable" not in adata_proc.var.columns: + adata_proc.var["highly_variable"] = True sp.pp.harmony_integrate( - adata_processing, - key="${batch_col}", + adata_proc, + key=batch_col, flavor="python", ref_basis_source="X_pca", - ref_basis_adjusted="X_pca_symphony", + ref_basis_adjusted="X_symphony", ) -adata.obsm["X_pca_symphony"] = adata_processing.obsm["X_pca_symphony"] -adata.obsm["X_emb"] = adata_processing.obsm["X_pca_symphony"] -adata.uns["symphony"] = adata_processing.uns["harmony"] +adata_proc.uns["symphony"] = adata_proc.uns["harmony"] +adata_proc.uns["normalize"] = {"target_sum": target_sum} -adata.write_h5ad(f"{prefix}.h5ad") +build_reference(adata_proc, target_sum).write_h5ad(f"{prefix}_reference.h5ad") -df = pd.DataFrame(adata.obsm["X_emb"], index=adata.obs_names) -df.to_pickle(f"X_{prefix}.pkl") +adata.obsm["X_symphony"] = adata_proc.obsm["X_symphony"] +adata.obsm["X_emb"] = adata_proc.obsm["X_symphony"] +adata.write_h5ad(f"{prefix}.h5ad") -# Versions +pd.DataFrame(adata.obsm["X_emb"], index=adata.obs_names).to_pickle(f"X_{prefix}.pkl") versions = { "${task.process}": { diff --git a/modules/local/symphony/harmonyintegrate/tests/main.nf.test b/modules/local/symphony/harmonyintegrate/tests/main.nf.test index 52dc354e..47825dc4 100644 --- a/modules/local/symphony/harmonyintegrate/tests/main.nf.test +++ b/modules/local/symphony/harmonyintegrate/tests/main.nf.test @@ -28,13 +28,20 @@ nextflow_process { then { def adata = anndata(process.out.h5ad[0][1]) + def reference = anndata(process.out.reference[0][1]) assert process.success assert "X_emb" in adata.obsm - assert "X_pca_symphony" in adata.obsm - assert "symphony" in adata.uns + assert "harmony" in reference.uns + assert "normalize" in reference.uns + assert "mean" in reference.var.colnames + assert "std" in reference.var.colnames + assert "highly_variable" in reference.var.colnames + assert "PCs" in reference.varm + assert reference.n_obs == 0 assert snapshot( path(process.out.versions[0]).yaml, - adata.yaml + adata.yaml, + reference.yaml ).match() } diff --git a/modules/local/symphony/harmonyintegrate/tests/main.nf.test.snap b/modules/local/symphony/harmonyintegrate/tests/main.nf.test.snap index 6b03b90a..06b31468 100644 --- a/modules/local/symphony/harmonyintegrate/tests/main.nf.test.snap +++ b/modules/local/symphony/harmonyintegrate/tests/main.nf.test.snap @@ -11,9 +11,17 @@ ] ], "1": [ - "X_test.pkl:md5,d41d8cd98f00b204e9800998ecf8427e" + [ + { + "id": "test" + }, + "test_reference.h5ad:md5,d41d8cd98f00b204e9800998ecf8427e" + ] ], "2": [ + "X_test.pkl:md5,d41d8cd98f00b204e9800998ecf8427e" + ], + "3": [ "versions.yml:md5,d41d8cd98f00b204e9800998ecf8427e" ], "h5ad": [ @@ -27,12 +35,20 @@ "obsm": [ "X_test.pkl:md5,d41d8cd98f00b204e9800998ecf8427e" ], + "reference": [ + [ + { + "id": "test" + }, + "test_reference.h5ad:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], "versions": [ "versions.yml:md5,d41d8cd98f00b204e9800998ecf8427e" ] } ], - "timestamp": "2026-05-19T10:59:10.495670438", + "timestamp": "2026-05-28T13:28:41.939214142", "meta": { "nf-test": "0.9.4", "nextflow": "26.04.0" @@ -68,7 +84,7 @@ ], "obsm": [ "X_emb", - "X_pca_symphony" + "X_symphony" ], "varm": [ @@ -80,14 +96,51 @@ ], "uns": [ - "symphony" + + ] + }, + { + "n_obs": 0, + "n_vars": 9887, + "obs": { + "index": "_index", + "columns": [ + + ] + }, + "var": { + "index": "_index", + "columns": [ + "highly_variable", + "mean", + "std" + ] + }, + "layers": [ + + ], + "obsm": [ + + ], + "varm": [ + "PCs" + ], + "obsp": [ + + ], + "varp": [ + + ], + "uns": [ + "harmony", + "normalize" ] } ], - "timestamp": "2026-05-26T17:36:49.568823", + "timestamp": "2026-05-28T14:41:42.365043934", "meta": { - "nf-test": "0.9.5", - "nextflow": "26.04.2" + "nf-test": "0.9.4", + "nextflow": "26.04.0" } } } \ No newline at end of file diff --git a/modules/local/symphony/mapembedding/environment.yml b/modules/local/symphony/mapembedding/environment.yml new file mode 100644 index 00000000..1e4070eb --- /dev/null +++ b/modules/local/symphony/mapembedding/environment.yml @@ -0,0 +1,10 @@ +channels: + - conda-forge + - bioconda +dependencies: + - conda-forge::python=3.13.13 + - conda-forge::pyyaml=6.0.3 + - conda-forge::scanpy=1.12.1 + - pip + - pip: + - symphonypy==0.2.4 diff --git a/modules/local/symphony/mapembedding/main.nf b/modules/local/symphony/mapembedding/main.nf new file mode 100644 index 00000000..36bd70ce --- /dev/null +++ b/modules/local/symphony/mapembedding/main.nf @@ -0,0 +1,35 @@ +process SYMPHONY_MAPEMBEDDING { + tag "${meta.id}" + label 'process_medium' + + conda "${moduleDir}/environment.yml" + container "${workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container + ? 'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/51/512121548a21b4d1bb8acfd5e30a75c5c2103ddd00cf1de4713c682b7e6b5387/data' + : 'community.wave.seqera.io/library/python_pyyaml_scanpy_pip_symphonypy:2198c27c5c9392d5'}" + + input: + tuple val(meta), path(h5ad) + tuple val(meta2), path(reference_h5ad, stageAs: 'reference/reference.h5ad') + val(batch_col) + val(counts_layer) + + output: + tuple val(meta), path("${prefix}.h5ad"), emit: h5ad + path "X_${prefix}.pkl" , emit: obsm + path "versions.yml" , emit: versions, topic: versions + + script: + prefix = task.ext.prefix ?: "${meta.id}" + if ("${prefix}.h5ad" == "${h5ad}") { + error "Input and output names are the same, use \"task.ext.prefix\" to disambiguate!" + } + template('map_embedding.py') + + stub: + prefix = task.ext.prefix ?: "${meta.id}" + """ + touch ${prefix}.h5ad + touch X_${prefix}.pkl + touch versions.yml + """ +} diff --git a/modules/local/symphony/mapembedding/templates/map_embedding.py b/modules/local/symphony/mapembedding/templates/map_embedding.py new file mode 100644 index 00000000..4ade854e --- /dev/null +++ b/modules/local/symphony/mapembedding/templates/map_embedding.py @@ -0,0 +1,59 @@ +#!/usr/bin/env python3 + +import os + +os.environ["KMP_AFFINITY"] = "disabled" +os.environ["MPLCONFIGDIR"] = "./tmp/mpl" +os.environ["NUMBA_CACHE_DIR"] = "./tmp/numba" + +import importlib.metadata +import platform + +import pandas as pd +import scanpy as sc +import symphonypy as sp +import yaml +from threadpoolctl import threadpool_limits + + +threadpool_limits(int("${task.cpus}")) + +adata = sc.read_h5ad("${h5ad}") +adata_proc = adata.copy() +adata_ref = sc.read_h5ad("reference/reference.h5ad") +prefix = "${prefix}" +batch_col = "${batch_col}" +counts_layer = "${counts_layer}" + +if counts_layer != "X": + adata_proc.X = adata_proc.layers[counts_layer] + +target_sum = float(adata_ref.uns["normalize"]["target_sum"]) +sc.pp.normalize_total(adata_proc, target_sum=target_sum) +sc.pp.log1p(adata_proc) + +sp.tl.map_embedding( + adata_proc, + adata_ref, + key=batch_col, + transferred_adjusted_basis="X_symphony", + use_genes_column="highly_variable", +) + +adata.obsm["X_symphony"] = adata_proc.obsm["X_symphony"] +adata.obsm["X_emb"] = adata_proc.obsm["X_symphony"] + +adata.write_h5ad(f"{prefix}.h5ad") +pd.DataFrame(adata.obsm["X_emb"], index=adata.obs_names).to_pickle(f"X_{prefix}.pkl") + +versions = { + "${task.process}": { + "python": platform.python_version(), + "scanpy": importlib.metadata.version("scanpy"), + "symphonypy": importlib.metadata.version("symphonypy"), + "pandas": pd.__version__, + } +} + +with open("versions.yml", "w") as f: + yaml.dump(versions, f) diff --git a/modules/local/symphony/mapembedding/tests/main.nf.test b/modules/local/symphony/mapembedding/tests/main.nf.test new file mode 100644 index 00000000..e51fe09e --- /dev/null +++ b/modules/local/symphony/mapembedding/tests/main.nf.test @@ -0,0 +1,93 @@ +nextflow_process { + + name "Test Process SYMPHONY_MAPEMBEDDING" + script "modules/local/symphony/mapembedding/main.nf" + process "SYMPHONY_MAPEMBEDDING" + + tag "modules" + tag "modules_local" + + setup { + run("SYMPHONY_HARMONYINTEGRATE") { + script "modules/local/symphony/harmonyintegrate/main.nf" + process { + """ + input[0] = channel.of([ + [ id: 'harmony' ], + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/scrnaseq/h5ad/combined_filtered_matrix.h5ad', checkIfExists: true) + ] + ) + input[1] = "sample" + input[2] = "X" + """ + } + } + } + + test("Should run without failures") { + + when { + params { + outdir = "$outputDir" + } + process { + """ + input[0] = channel.of([ + [ id: 'harmony' ], + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/scrnaseq/h5ad/combined_filtered_matrix.h5ad', checkIfExists: true) + ] + ) + input[1] = SYMPHONY_HARMONYINTEGRATE.out.reference + input[2] = "sample" + input[3] = "X" + """ + } + } + + then { + def adata = anndata(process.out.h5ad[0][1]) + assert process.success + assert "X_emb" in adata.obsm + assert "X_symphony" in adata.obsm + assert snapshot( + path(process.out.versions[0]).yaml, + adata.yaml + ).match() + } + + } + + test("Should run without failures - stub") { + + options '-stub' + + when { + params { + outdir = "$outputDir" + } + process { + """ + input[0] = channel.of([ + [ id: 'harmony' ], + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/scrnaseq/h5ad/combined_filtered_matrix.h5ad', checkIfExists: true) + ] + ) + input[1] = channel.of([ + [ id: 'harmony' ], + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/scrnaseq/h5ad/combined_filtered_matrix.h5ad', checkIfExists: true) + ] + ) + input[2] = "sample" + input[3] = "X" + """ + } + } + + then { + assert process.success + assert snapshot(process.out).match() + } + + } + +} diff --git a/modules/local/symphony/mapembedding/tests/main.nf.test.snap b/modules/local/symphony/mapembedding/tests/main.nf.test.snap new file mode 100644 index 00000000..f46df979 --- /dev/null +++ b/modules/local/symphony/mapembedding/tests/main.nf.test.snap @@ -0,0 +1,93 @@ +{ + "Should run without failures - stub": { + "content": [ + { + "0": [ + [ + { + "id": "harmony" + }, + "harmony.h5ad:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "1": [ + "X_harmony.pkl:md5,d41d8cd98f00b204e9800998ecf8427e" + ], + "2": [ + "versions.yml:md5,d41d8cd98f00b204e9800998ecf8427e" + ], + "h5ad": [ + [ + { + "id": "harmony" + }, + "harmony.h5ad:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "obsm": [ + "X_harmony.pkl:md5,d41d8cd98f00b204e9800998ecf8427e" + ], + "versions": [ + "versions.yml:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + } + ], + "timestamp": "2026-05-28T08:08:04.937392964", + "meta": { + "nf-test": "0.9.4", + "nextflow": "26.04.0" + } + }, + "Should run without failures": { + "content": [ + { + "SYMPHONY_MAPEMBEDDING": { + "pandas": "2.3.3", + "python": "3.13.13", + "scanpy": "1.12.1", + "symphonypy": "0.2.4" + } + }, + { + "n_obs": 38234, + "n_vars": 9887, + "obs": { + "index": "_index", + "columns": [ + "sample" + ] + }, + "var": { + "index": "_index", + "columns": [ + + ] + }, + "layers": [ + + ], + "obsm": [ + "X_emb", + "X_symphony" + ], + "varm": [ + + ], + "obsp": [ + + ], + "varp": [ + + ], + "uns": [ + + ] + } + ], + "timestamp": "2026-05-28T13:56:04.774526372", + "meta": { + "nf-test": "0.9.4", + "nextflow": "26.04.0" + } + } +} \ No newline at end of file diff --git a/nextflow.config b/nextflow.config index 1774ba60..f2d6c135 100644 --- a/nextflow.config +++ b/nextflow.config @@ -43,6 +43,7 @@ params { scvi_model = null scanvi_model = null scimilarity_model = 'https://zenodo.org/records/10685499/files/model_v1.1.tar.gz' + harmony_reference = null expimap_gmt = null // Extension options diff --git a/nextflow_schema.json b/nextflow_schema.json index be1d18d2..43b1d63f 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -167,6 +167,14 @@ "description": "Optional file containing a list of gene symbols (one per line). If provided, these genes will be excluded from highly variable genes selection for integration.", "exists": true }, + "harmony_reference": { + "type": "string", + "format": "file-path", + "description": "Path to a Symphony reference AnnData, only relevant if Harmony is selected in `integration_methods`. If provided, query cells will be mapped onto this reference instead of running de novo Harmony integration.", + "help_text": "The file should be in the .h5ad format. It is produced by a prior de novo Harmony run as `{outdir}/combine/integrate/harmony/harmony_reference.h5ad` and contains the compact Symphony reference metadata required for query mapping. Required for Harmony reference mapping and when extending an atlas with `--base_adata`.", + "pattern": "^\\S+\\.h5ad$", + "exists": true + }, "scvi_model": { "type": "string", "format": "file-path", diff --git a/subworkflows/local/combine/main.nf b/subworkflows/local/combine/main.nf index 1c150fb2..80ec76c1 100644 --- a/subworkflows/local/combine/main.nf +++ b/subworkflows/local/combine/main.nf @@ -17,6 +17,7 @@ workflow COMBINE { scvi_categorical_covariates // value: string scvi_continuous_covariates // value: string scimilarity_model // value: string + harmony_reference // value: string expimap_gmt // value: string condition_col // value: string scib // value: boolean @@ -52,6 +53,7 @@ workflow COMBINE { scvi_categorical_covariates, scvi_continuous_covariates, scimilarity_model, + harmony_reference, expimap_gmt, condition_col ) diff --git a/subworkflows/local/integrate/tests/main.nf.test b/subworkflows/local/integrate/tests/main.nf.test index 0a59e5fb..e01cd89a 100644 --- a/subworkflows/local/integrate/tests/main.nf.test +++ b/subworkflows/local/integrate/tests/main.nf.test @@ -31,7 +31,8 @@ nextflow_workflow { input[8] = [] input[9] = null input[10] = null - input[11] = 'condition' + input[11] = null + input[12] = 'condition' """ } } @@ -65,7 +66,8 @@ nextflow_workflow { input[8] = [] input[9] = null input[10] = null - input[11] = 'condition' + input[11] = null + input[12] = 'condition' """ } } @@ -74,8 +76,7 @@ nextflow_workflow { def adata = anndata(workflow.out.integrations[0][1]) assert workflow.success assert "X_emb" in adata.obsm - assert "X_pca_symphony" in adata.obsm - assert "symphony" in adata.uns + assert "X_symphony" in adata.obsm assert snapshot( workflow.out.versions, adata.yaml @@ -108,7 +109,8 @@ nextflow_workflow { input[8] = [] input[9] = null input[10] = null - input[11] = 'condition' + input[11] = null + input[12] = 'condition' """ } } @@ -142,7 +144,8 @@ nextflow_workflow { input[8] = [] input[9] = null input[10] = null - input[11] = 'condition' + input[11] = null + input[12] = 'condition' """ } } @@ -187,7 +190,8 @@ nextflow_workflow { input[8] = [] input[9] = null input[10] = null - input[11] = 'condition' + input[11] = null + input[12] = 'condition' """ } } @@ -221,7 +225,8 @@ nextflow_workflow { input[8] = [] input[9] = null input[10] = null - input[11] = 'condition' + input[11] = null + input[12] = 'condition' """ } } @@ -262,7 +267,8 @@ nextflow_workflow { input[8] = [] input[9] = null input[10] = null - input[11] = 'condition' + input[11] = null + input[12] = 'condition' """ } } @@ -298,7 +304,8 @@ nextflow_workflow { input[8] = [] input[9] = null input[10] = null - input[11] = 'condition' + input[11] = null + input[12] = 'condition' """ } } @@ -332,7 +339,8 @@ nextflow_workflow { input[8] = [] input[9] = null input[10] = null - input[11] = 'condition' + input[11] = null + input[12] = 'condition' """ } } @@ -373,7 +381,8 @@ nextflow_workflow { input[8] = [] input[9] = null input[10] = null - input[11] = 'batch' + input[11] = null + input[12] = 'batch' """ } } @@ -407,7 +416,8 @@ nextflow_workflow { input[8] = [] input[9] = null input[10] = null - input[11] = 'batch' + input[11] = null + input[12] = 'batch' """ } } diff --git a/subworkflows/local/utils_nfcore_scdownstream_pipeline/main.nf b/subworkflows/local/utils_nfcore_scdownstream_pipeline/main.nf index 6d25a1e1..5f4ba6eb 100644 --- a/subworkflows/local/utils_nfcore_scdownstream_pipeline/main.nf +++ b/subworkflows/local/utils_nfcore_scdownstream_pipeline/main.nf @@ -181,8 +181,8 @@ def validateInputParameters() { } def integration_methods = params.integration_methods.split(',').collect { it -> it.trim().toLowerCase() } - if (params.input && params.base_adata && (integration_methods - ['scvi', 'scanvi', 'scimilarity']).size() > 0) { - throw new Exception("Only scvi, scanvi and scimilarity integration methods are supported if base_adata is provided") + if (params.input && params.base_adata && (integration_methods - ['scvi', 'scanvi', 'scimilarity', 'harmony']).size() > 0) { + throw new Exception("Only scvi, scanvi, scimilarity and harmony integration methods are supported if base_adata is provided") } if (params.base_adata && 'scvi' in integration_methods && !params.scvi_model) { @@ -197,6 +197,10 @@ def validateInputParameters() { throw new Exception("If base_adata is provided and scimilarity is used as integration method, scimilarity_model must be provided.") } + if (params.base_adata && 'harmony' in integration_methods && !params.harmony_reference) { + throw new Exception("If base_adata is provided and harmony is used as integration method, harmony_reference must be provided.") + } + // Validate sample_n and sample_fraction parameters if (params.sample_n && params.sample_fraction) { throw new Exception("Both sample_n and sample_fraction are set. Please use only one of them.") diff --git a/tests/main_pipeline_extend.nf.test b/tests/main_pipeline_extend.nf.test index 083adbb7..365d4b30 100644 --- a/tests/main_pipeline_extend.nf.test +++ b/tests/main_pipeline_extend.nf.test @@ -4,17 +4,18 @@ nextflow_pipeline { script "main.nf" tag "pipeline" - test("Should perform scvi reference extension") { + test("Should perform scvi and harmony reference extension") { when { params { input = 'https://github.com/nf-core/test-datasets/raw/refs/heads/scdownstream/samplesheet_single.csv' - integration_methods = 'scvi' + integration_methods = 'scvi,harmony' doublet_detection = 'scrublet,scdblfinder' celltypist_model = 'Adult_COVID19_PBMC' integration_hvgs = 500 outdir = "$outputDir" scvi_model = 'https://github.com/nf-core/test-datasets/raw/refs/heads/scdownstream/extension_base/model.pt' + harmony_reference = 'https://github.com/nf-core/test-datasets/raw/refs/heads/scdownstream/extension_base/harmony_reference.h5ad' base_adata = 'https://github.com/nf-core/test-datasets/raw/refs/heads/scdownstream/extension_base/merged.h5ad' } } diff --git a/tests/main_pipeline_extend.nf.test.snap b/tests/main_pipeline_extend.nf.test.snap index 63c6d111..add7d274 100644 --- a/tests/main_pipeline_extend.nf.test.snap +++ b/tests/main_pipeline_extend.nf.test.snap @@ -1,5 +1,5 @@ { - "Should perform scvi reference extension": { + "Should perform scvi and harmony reference extension": { "content": [ { "ADATA_EXTEND": { @@ -144,6 +144,12 @@ "SCVITOOLS_SCVI": { "scvi": "1.4.3" }, + "SYMPHONY_MAPEMBEDDING": { + "pandas": "2.3.3", + "python": "3.13.13", + "scanpy": "1.12.1", + "symphonypy": "0.2.4" + }, "UMAP": { "pandas": "2.3.3", "python": "3.13.12", @@ -175,6 +181,14 @@ "celltypes/singler/SRR28679759_singler_immune_direct_heatmap.pdf", "celltypes/singler/SRR28679759_singler_predictions.csv", "cluster_dimred", + "cluster_dimred/harmony", + "cluster_dimred/harmony/entropy", + "cluster_dimred/harmony/entropy/harmony-global-0.5_entropy.png", + "cluster_dimred/harmony/entropy/harmony-global-1.0_entropy.png", + "cluster_dimred/harmony/leiden", + "cluster_dimred/harmony/leiden/harmony-global-0.5_leiden.png", + "cluster_dimred/harmony/leiden/harmony-global-1.0_leiden.png", + "cluster_dimred/harmony/umap", "cluster_dimred/scvi", "cluster_dimred/scvi/entropy", "cluster_dimred/scvi/entropy/scvi-global-0.5_entropy.png", @@ -186,6 +200,7 @@ "combine", "combine/integrate", "combine/integrate/scib_metrics", + "combine/integrate/scib_metrics/harmony_metrics.tsv", "combine/integrate/scib_metrics/scvi_metrics.tsv", "combine/integrate/scvi", "combine/integrate/scvi/scvi_model", @@ -228,10 +243,10 @@ "qc-report.qmd:md5,13061014a897b3fbdafd6ea3212df0e0" ] ], - "timestamp": "2026-05-11T23:24:27.061703025", + "timestamp": "2026-05-28T11:14:08.38204652", "meta": { "nf-test": "0.9.4", "nextflow": "26.04.0" } } -} \ No newline at end of file +} diff --git a/tests/main_pipeline_reference_mapping.nf.test b/tests/main_pipeline_reference_mapping.nf.test index 9292b3b7..93d1b10f 100644 --- a/tests/main_pipeline_reference_mapping.nf.test +++ b/tests/main_pipeline_reference_mapping.nf.test @@ -4,17 +4,18 @@ nextflow_pipeline { script "main.nf" tag "pipeline" - test("Should perform scvi reference mapping") { + test("Should perform scvi and harmony reference mapping") { when { params { input = 'https://github.com/nf-core/test-datasets/raw/refs/heads/scdownstream/samplesheet_single.csv' - integration_methods = 'scvi' + integration_methods = 'scvi,harmony' doublet_detection = 'scrublet,scdblfinder' celltypist_model = 'Adult_COVID19_PBMC' integration_hvgs = 500 outdir = "$outputDir" scvi_model = 'https://github.com/nf-core/test-datasets/raw/refs/heads/scdownstream/extension_base/model.pt' + harmony_reference = 'https://github.com/nf-core/test-datasets/raw/refs/heads/scdownstream/extension_base/harmony_reference.h5ad' } } diff --git a/tests/main_pipeline_reference_mapping.nf.test.snap b/tests/main_pipeline_reference_mapping.nf.test.snap index 5812c99c..8353f4a6 100644 --- a/tests/main_pipeline_reference_mapping.nf.test.snap +++ b/tests/main_pipeline_reference_mapping.nf.test.snap @@ -1,5 +1,5 @@ { - "Should perform scvi reference mapping": { + "Should perform scvi and harmony reference mapping": { "content": [ { "ADATA_EXTEND": { @@ -144,6 +144,12 @@ "SCVITOOLS_SCVI": { "scvi": "1.4.3" }, + "SYMPHONY_MAPEMBEDDING": { + "pandas": "2.3.3", + "python": "3.13.13", + "scanpy": "1.12.1", + "symphonypy": "0.2.4" + }, "UMAP": { "pandas": "2.3.3", "python": "3.13.12", @@ -175,6 +181,14 @@ "celltypes/singler/SRR28679759_singler_immune_direct_heatmap.pdf", "celltypes/singler/SRR28679759_singler_predictions.csv", "cluster_dimred", + "cluster_dimred/harmony", + "cluster_dimred/harmony/entropy", + "cluster_dimred/harmony/entropy/harmony-global-0.5_entropy.png", + "cluster_dimred/harmony/entropy/harmony-global-1.0_entropy.png", + "cluster_dimred/harmony/leiden", + "cluster_dimred/harmony/leiden/harmony-global-0.5_leiden.png", + "cluster_dimred/harmony/leiden/harmony-global-1.0_leiden.png", + "cluster_dimred/harmony/umap", "cluster_dimred/scvi", "cluster_dimred/scvi/entropy", "cluster_dimred/scvi/entropy/scvi-global-0.5_entropy.png", @@ -186,6 +200,7 @@ "combine", "combine/integrate", "combine/integrate/scib_metrics", + "combine/integrate/scib_metrics/harmony_metrics.tsv", "combine/integrate/scib_metrics/scvi_metrics.tsv", "combine/integrate/scvi", "combine/integrate/scvi/scvi_model", @@ -228,10 +243,10 @@ "qc-report.qmd:md5,13061014a897b3fbdafd6ea3212df0e0" ] ], - "timestamp": "2026-05-12T08:53:40.785735333", + "timestamp": "2026-05-28T10:56:43.387329548", "meta": { "nf-test": "0.9.4", "nextflow": "26.04.0" } } -} \ No newline at end of file +} diff --git a/workflows/scdownstream.nf b/workflows/scdownstream.nf index 82579603..2528f2e7 100644 --- a/workflows/scdownstream.nf +++ b/workflows/scdownstream.nf @@ -59,6 +59,7 @@ workflow SCDOWNSTREAM { scvi_categorical_covariates // value: string scvi_continuous_covariates // value: string scimilarity_model // value: string + harmony_reference // value: string expimap_gmt // value: string skip_liana // value: boolean skip_rankgenesgroups // value: boolean @@ -178,6 +179,7 @@ workflow SCDOWNSTREAM { scvi_categorical_covariates, scvi_continuous_covariates, scimilarity_model, + harmony_reference, expimap_gmt, condition_col, scib, From 18e6d6579f3ba8c2205a5c7fbaa6fcb5ac21e36c Mon Sep 17 00:00:00 2001 From: Nico Trummer Date: Fri, 29 May 2026 19:20:37 +0200 Subject: [PATCH 06/19] Remove local contrib test-datasets helper after upstream merge. The extension_base artifacts now live in nf-core/test-datasets, so the temporary build and collection scripts are no longer needed in this repo. --- contrib/nf-core-test-datasets/.gitattributes | 1 - contrib/nf-core-test-datasets/README.md | 49 ------------------- .../nf-core-test-datasets/build.params.json | 8 --- .../collect-artifacts.sh | 15 ------ 4 files changed, 73 deletions(-) delete mode 100644 contrib/nf-core-test-datasets/.gitattributes delete mode 100644 contrib/nf-core-test-datasets/README.md delete mode 100644 contrib/nf-core-test-datasets/build.params.json delete mode 100755 contrib/nf-core-test-datasets/collect-artifacts.sh diff --git a/contrib/nf-core-test-datasets/.gitattributes b/contrib/nf-core-test-datasets/.gitattributes deleted file mode 100644 index 914d733f..00000000 --- a/contrib/nf-core-test-datasets/.gitattributes +++ /dev/null @@ -1 +0,0 @@ -extension_base/*.h5ad filter=lfs diff=lfs merge=lfs -text diff --git a/contrib/nf-core-test-datasets/README.md b/contrib/nf-core-test-datasets/README.md deleted file mode 100644 index b9b58a05..00000000 --- a/contrib/nf-core-test-datasets/README.md +++ /dev/null @@ -1,49 +0,0 @@ -# nf-core/test-datasets update — `scdownstream/extension_base` - -Copy the contents of `extension_base/` into the **`scdownstream` branch** of [nf-core/test-datasets](https://github.com/nf-core/test-datasets), replacing the existing files in `scdownstream/extension_base/`. - -## Files - -| File | Purpose | -| --------------------------------------- | ------------------------------------------------------------ | -| `extension_base/model.pt` | scVI checkpoint for reference mapping / extension | -| `extension_base/merged.h5ad` | Finalized atlas (`base_adata`) for extension | -| `extension_base/harmony_reference.h5ad` | Symphony reference for Harmony reference mapping / extension | - -All three must come from the **same pipeline run** (see below). - -> **Note:** If `extension_base/` already contains files but you have not run `collect-artifacts.sh` yet, `merged.h5ad` and `model.pt` may still be the current test-datasets versions and `harmony_reference.h5ad` from a Harmony-only build. **Do not open the test-datasets PR until you have run a unified build and `collect-artifacts.sh`** — all three files must be replaced together. - -## How these were generated - -```bash -# From the scdownstream repo root, with nf-core conda env active: -nextflow run main.nf -profile test,apptainer -params-file contrib/nf-core-test-datasets/build.params.json - -# Populate extension_base/ from the build output: -./contrib/nf-core-test-datasets/collect-artifacts.sh -``` - -Build parameters match the consolidated pipeline tests (`main_pipeline_reference_mapping.nf.test`, `main_pipeline_extend.nf.test`): - -- Input: `samplesheet.csv` (full atlas) -- Integration: `scvi,harmony` -- HVGs: 500 - -## PR checklist (test-datasets repo) - -1. Check out branch `scdownstream`. -2. Ensure Git LFS is enabled (`git lfs install`). -3. Copy `extension_base/*` into `scdownstream/extension_base/` (overwrite `model.pt` and `merged.h5ad`, add `harmony_reference.h5ad`). -4. Add or extend `.gitattributes` on the test-datasets repo: - - ``` - scdownstream/extension_base/*.h5ad filter=lfs diff=lfs merge=lfs -text - ``` - -5. Commit and open PR against `scdownstream`. -6. After merge, re-run `nftu` on the scdownstream pipeline reference-mapping and extend tests. - -## Pipeline version - -Record the scdownstream commit/tag used when generating these files in your PR description. diff --git a/contrib/nf-core-test-datasets/build.params.json b/contrib/nf-core-test-datasets/build.params.json deleted file mode 100644 index 8faa4a16..00000000 --- a/contrib/nf-core-test-datasets/build.params.json +++ /dev/null @@ -1,8 +0,0 @@ -{ - "input": "https://github.com/nf-core/test-datasets/raw/refs/heads/scdownstream/samplesheet.csv", - "integration_methods": "scvi,harmony", - "integration_hvgs": 500, - "doublet_detection": "scrublet,scdblfinder", - "celltypist_model": "Adult_COVID19_PBMC", - "outdir": "contrib/nf-core-test-datasets/build_output" -} diff --git a/contrib/nf-core-test-datasets/collect-artifacts.sh b/contrib/nf-core-test-datasets/collect-artifacts.sh deleted file mode 100755 index 75fa3d4e..00000000 --- a/contrib/nf-core-test-datasets/collect-artifacts.sh +++ /dev/null @@ -1,15 +0,0 @@ -#!/usr/bin/env bash -set -euo pipefail - -ROOT="$(cd "$(dirname "${BASH_SOURCE[0]}")/../.." && pwd)" -OUT="${ROOT}/contrib/nf-core-test-datasets/build_output" -DEST="${ROOT}/contrib/nf-core-test-datasets/extension_base" - -mkdir -p "${DEST}" - -cp "${OUT}/combine/integrate/scvi/scvi_model/model.pt" "${DEST}/model.pt" -cp "${OUT}/finalized/merged.h5ad" "${DEST}/merged.h5ad" -cp "${OUT}/combine/integrate/harmony/harmony_reference.h5ad" "${DEST}/harmony_reference.h5ad" - -echo "Collected artifacts into ${DEST}:" -ls -lh "${DEST}" From 00b6c170fc30f2f776f61a8fefd57960c790d784 Mon Sep 17 00:00:00 2001 From: Nico Trummer Date: Fri, 29 May 2026 19:23:00 +0200 Subject: [PATCH 07/19] Rename harmony integration method to symphony. Users now select symphony in integration_methods and pass symphony_reference for query mapping, with outputs published under combine/integrate/symphony. --- README.md | 2 +- assets/multiqc_config.yml | 2 +- conf/modules.config | 6 +++--- conf/test.config | 2 +- conf/test_full.config | 2 +- docs/output.md | 4 ++-- docs/reproducibility.md | 12 ++++++------ docs/usage.md | 14 +++++++------- main.nf | 10 +++++----- .../adata/prepcellxgene/templates/prepcellxgene.py | 2 +- .../local/symphony/mapembedding/tests/main.nf.test | 8 ++++---- nextflow.config | 2 +- nextflow_schema.json | 14 +++++++------- subworkflows/local/cluster/tests/main.nf.test | 2 +- subworkflows/local/combine/main.nf | 4 ++-- subworkflows/local/integrate/main.nf | 12 ++++++------ subworkflows/local/integrate/tests/main.nf.test | 10 +++++----- .../utils_nfcore_scdownstream_pipeline/main.nf | 8 ++++---- tests/main_pipeline_build.nf.test | 2 +- workflows/scdownstream.nf | 4 ++-- 20 files changed, 61 insertions(+), 61 deletions(-) diff --git a/README.md b/README.md index 8433a1fd..d45ee4ae 100644 --- a/README.md +++ b/README.md @@ -58,7 +58,7 @@ Steps marked with the boat icon are not yet implemented. For the other steps, th 3. Integration - [scVI](https://docs.scvi-tools.org/en/stable/user_guide/models/scvi.html) - [scANVI](https://docs.scvi-tools.org/en/stable/user_guide/models/scanvi.html) - - [Harmony](https://portals.broadinstitute.org/harmony/articles/quickstart.html) (via [symphonypy](https://pypi.org/project/symphonypy/)) + - [Symphony](https://github.com/immunogenomics/symphony) / Harmony (via [symphonypy](https://pypi.org/project/symphonypy/)) - [BBKNN](https://github.com/Teichlab/bbknn) - [Combat](https://scanpy.readthedocs.io/en/latest/api/generated/scanpy.pp.combat.html) - [Seurat](https://satijalab.org/seurat/articles/integration_introduction) diff --git a/assets/multiqc_config.yml b/assets/multiqc_config.yml index 945264e4..976136b4 100644 --- a/assets/multiqc_config.yml +++ b/assets/multiqc_config.yml @@ -18,7 +18,7 @@ report_section_order: order: -1005 "scanvi": order: -1006 - "harmony": + "symphony": order: -1007 "bbknn": order: -1009 diff --git a/conf/modules.config b/conf/modules.config index da4a96cb..4260f400 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -381,11 +381,11 @@ process { withName: SYMPHONY_HARMONYINTEGRATE { publishDir = [ - path: { "${params.outdir}/combine/integrate/harmony" }, + path: { "${params.outdir}/combine/integrate/symphony" }, mode: params.publish_dir_mode, saveAs: { filename -> if (filename.endsWith('_reference.h5ad')) { - return 'harmony_reference.h5ad' + return 'symphony_reference.h5ad' } if (params.save_intermediates && !filename.equals('versions.yml')) { return filename @@ -397,7 +397,7 @@ process { withName: SYMPHONY_MAPEMBEDDING { publishDir = [ - path: { "${params.outdir}/combine/integrate/harmony" }, + path: { "${params.outdir}/combine/integrate/symphony" }, mode: params.publish_dir_mode, enabled: params.save_intermediates, saveAs: { filename -> filename.equals('versions.yml') ? null : filename }, diff --git a/conf/test.config b/conf/test.config index 31b5a05b..fc2ae4f0 100644 --- a/conf/test.config +++ b/conf/test.config @@ -24,7 +24,7 @@ params { // Input data input = params.pipelines_testdata_base_path + 'samplesheet.csv' - integration_methods = 'scvi,harmony,bbknn,combat' + integration_methods = 'scvi,symphony,bbknn,combat' doublet_detection = 'solo,scrublet,scdblfinder' celltypist_model = 'Adult_Human_Skin' celldex_reference = 'https://raw.githubusercontent.com/nf-core/test-datasets/scdownstream/singleR/references.csv' diff --git a/conf/test_full.config b/conf/test_full.config index f4629109..96e64421 100644 --- a/conf/test_full.config +++ b/conf/test_full.config @@ -24,7 +24,7 @@ params { // Input data for full size test input = params.pipelines_testdata_base_path + 'samplesheet.csv' - integration_methods = 'scvi,harmony,bbknn,combat' + integration_methods = 'scvi,symphony,bbknn,combat' doublet_detection = 'solo,scrublet,doubletdetection,scdblfinder' celltypist_model = 'Adult_Human_Skin' celldex_reference = 'https://raw.githubusercontent.com/nf-core/test-datasets/scdownstream/singleR/references.csv' diff --git a/docs/output.md b/docs/output.md index 38d7356b..97403fec 100644 --- a/docs/output.md +++ b/docs/output.md @@ -34,7 +34,7 @@ The pipeline is built using [Nextflow](https://www.nextflow.io/) and processes d 3. Integration - [scVI](https://docs.scvi-tools.org/en/stable/user_guide/models/scvi.html) - [scANVI](https://docs.scvi-tools.org/en/stable/user_guide/models/scanvi.html) - - [Harmony](https://portals.broadinstitute.org/harmony/articles/quickstart.html) (via [symphonypy](https://pypi.org/project/symphonypy/)) + - [Symphony](https://github.com/immunogenomics/symphony) / Harmony (via [symphonypy](https://pypi.org/project/symphonypy/)) - [BBKNN](https://github.com/Teichlab/bbknn) - [Combat](https://scanpy.readthedocs.io/en/latest/api/generated/scanpy.pp.combat.html) - [Seurat](https://satijalab.org/seurat/articles/integration_introduction) @@ -98,7 +98,7 @@ The `preprocess` directory contains a subdirectory for each sample, which contai - `${tool}` - `*.h5ad/*.rds`: The integrated H5AD or RDS file. - `X_${tool}.pkl`: Low-dimensional representation of the integrated data. - - `harmony_reference.h5ad` (Harmony only): Compact Symphony reference AnnData for query mapping, published from de novo Harmony runs. + - `symphony_reference.h5ad` (Symphony only): Compact Symphony reference AnnData for query mapping, published from de novo Symphony runs. diff --git a/docs/reproducibility.md b/docs/reproducibility.md index 78769582..f93ff464 100644 --- a/docs/reproducibility.md +++ b/docs/reproducibility.md @@ -139,10 +139,10 @@ The **Test strategy (this branch)** column describes what the tests on this bran ### `symphony/` -| Module | Description | Reproducibility | Test strategy (this branch) | -| --------------------------- | ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | ----------------------------------------------------------------------------------------------------------------------------- | ----------------------------------- | -| `symphony/harmonyintegrate` | Runs Harmony batch integration via symphonypy after normalize_total → log1p → scale(max_value=10) → PCA(zero_center=False), storing `X_symphony`, `X_emb`, Symphony reference metadata (`var` mean/std/HVG, `varm['PCs']`, `uns['harmony']`, `uns['normalize']`), and publishing a compact `harmony_reference.h5ad`. Requires symphonypy ≥0.2.3 ([symphonypy#8](https://github.com/potulabe/symphonypy/issues/8), [symphonypy#9](https://github.com/potulabe/symphonypy/issues/9)). | **Non-deterministic** — Harmony is an iterative optimisation; symphonypy passes `random_seed=1` but upstream PCA is unseeded. | structural — versions + schema only | -| `symphony/mapembedding` | Maps query cells onto a Symphony reference via symphonypy `map_embedding`, storing mapped coordinates in `X_symphony` and `X_emb`. | **Non-deterministic** — inherits Harmony mapping variability. | structural — versions + schema only | +| Module | Description | Reproducibility | Test strategy (this branch) | +| --------------------------- | --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | ----------------------------------------------------------------------------------------------------------------------------- | ----------------------------------- | +| `symphony/harmonyintegrate` | Runs Symphony integration via symphonypy/Harmony after normalize_total → log1p → scale(max_value=10) → PCA(zero_center=False), storing `X_symphony`, `X_emb`, Symphony reference metadata (`var` mean/std/HVG, `varm['PCs']`, `uns['harmony']`, `uns['normalize']`), and publishing a compact `symphony_reference.h5ad`. Requires symphonypy ≥0.2.3 ([symphonypy#8](https://github.com/potulabe/symphonypy/issues/8), [symphonypy#9](https://github.com/potulabe/symphonypy/issues/9)). | **Non-deterministic** — Harmony is an iterative optimisation; symphonypy passes `random_seed=1` but upstream PCA is unseeded. | structural — versions + schema only | +| `symphony/mapembedding` | Maps query cells onto a Symphony reference via symphonypy `map_embedding`, storing mapped coordinates in `X_symphony` and `X_emb`. | **Non-deterministic** — inherits Symphony/Harmony mapping variability. | structural — versions + schema only | ### `scimilarity/` @@ -186,11 +186,11 @@ The **Test strategy (this branch)** column describes what the tests on this bran | `ambient_correction` | Dispatches ambient RNA correction to decontX, SoupX, or none based on a parameter. | **Non-deterministic** for decontX (no seed) and SoupX (seeded clustering but variable results); fully deterministic for the `none` passthrough. | **Scenario-dependent:** often `versions` as YAML + `adata.yaml` when an H5AD is produced; **`none` / meta-disabled** paths may snapshot **only `versions` + `workflow.out.h5ad.size()`** (counts, not hashes). | | `celltype_assignment` | Orchestrates cell type annotation by running SingleR and/or CellTypist. | Fully deterministic at inference time for both methods. | **`workflow.out.versions` + `workflow.out.obs.size()`** for non-stub tests; separate **stub** test exercises subworkflow wiring. | | `cluster` | Full clustering pipeline: neighbours → UMAP → Leiden at multiple resolutions → Shannon entropy. | Seeded / quasi-deterministic for UMAP; **non-deterministic** due to unseeded Leiden. | structural — **`workflow.out.versions` only** (each as YAML); graph / embedding presence asserted in code outside `snapshot`. | -| `combine` | Merges all samples and runs all configured integration methods. | Inherits from constituent modules — ranges from fully deterministic (no integration) to seeded/quasi-deterministic (scVI, Harmony, Seurat). | structural — **`workflow.out.versions` (YAML) + `adata.yaml`** on merged H5AD. | +| `combine` | Merges all samples and runs all configured integration methods. | Inherits from constituent modules — ranges from fully deterministic (no integration) to seeded/quasi-deterministic (scVI, Symphony, Seurat). | structural — **`workflow.out.versions` (YAML) + `adata.yaml`** on merged H5AD. | | `differential_expression` | Runs rank-genes-groups DE analysis across all combinations of clustering labels, conditions, and cell-type subsets. | Fully deterministic for the default wilcoxon/t-test methods. | structural — **`workflow.out.versions` only** (YAML); DE / MultiQC presence asserted outside `snapshot` where needed. | | `doublet_detection` | Runs one or more doublet-detection methods (scdblfinder, solo, scrublet, doubletdetection) and removes called doublets. | **Non-deterministic** — solo, scrublet, and doubletdetection have stochastic components; scdblfinder is seeded. | structural + **range assertion** on **`n_obs`**; snapshot uses **`versions` (YAML) + `adata.yaml`**. | | `finalize` | Assembles the final AnnData by extending it with all collected obs/obsm/uns/layers outputs. | Fully deterministic | hash — **`workflow.out.h5ad` + `workflow.out.versions` (YAML) + `adata.yaml`** — not a bare `snapshot(workflow.out)` in non-stub tests. | -| `integrate` | Applies HVG selection then one or more integration methods (scVI, scANVI, Harmony, BBKNN, ComBat, Seurat, SCimilarity, PCA, EXPIMAP). | Seeded / quasi-deterministic for scVI/scANVI/ComBat/Seurat/BBKNN/PCA; **non-deterministic** for Harmony and EXPIMAP (iterative training). | structural — **`workflow.out.versions` (YAML) + `adata.yaml`** on integration H5AD (e.g. Harmony / BBKNN / ComBat / PCA tests). | +| `integrate` | Applies HVG selection then one or more integration methods (scVI, scANVI, Symphony, BBKNN, ComBat, Seurat, SCimilarity, PCA, EXPIMAP). | Seeded / quasi-deterministic for scVI/scANVI/ComBat/Seurat/BBKNN/PCA; **non-deterministic** for Symphony and EXPIMAP (iterative training). | structural — **`workflow.out.versions` (YAML) + `adata.yaml`** on integration H5AD (e.g. Symphony / BBKNN / ComBat / PCA tests). | | `load_h5ad` | Loads input files in H5AD, 10x H5, RDS, or CSV format and converts all to AnnData H5AD. | Fully deterministic | hash — **`snapshot(workflow.out)` only** (passthrough-safe; avoids `anndata().yaml` on unstaged inputs per nf-test rules). | | `per_group` | Runs PAGA, LIANA rank-aggregate, rank-genes DE, and optional CyteType per cluster grouping. | **Seeded / quasi-deterministic** — inherits from constituent modules; CyteType is non-deterministic when enabled. | structural — **`workflow.out.versions` only** (YAML); optional `workflow.out.obs.size()` when CyteType is enabled. | | `pseudobulking` | Aggregates single-cell data into pseudobulk profiles grouped by specified metadata columns. | Fully deterministic | hash — **`workflow.out` + `versions` (YAML) + `adata.yaml`** on pseudobulk H5AD. | diff --git a/docs/usage.md b/docs/usage.md index 6a387656..a7dcc04a 100644 --- a/docs/usage.md +++ b/docs/usage.md @@ -216,16 +216,16 @@ nextflow run nf-core/scdownstream --input samplesheet.csv --outdir results \ ### Reference mapping and extension **Reference mapping** means **mapping new cells into a latent space using a pre-trained model** instead of training that integration step only on the query data. -In this pipeline this can be done using **scVI**, **scANVI**, **scimilarity**, and **Harmony (Symphony)**. -To enable it, add the corresponding method to [`integration_methods`](https://nf-co.re/scdownstream/parameters#integration_methods) (`scvi`, `scanvi`, `scimilarity`, and/or `harmony`) and set the matching model parameters for each method you use: [`scvi_model`](https://nf-co.re/scdownstream/parameters#scvi_model), [`scanvi_model`](https://nf-co.re/scdownstream/parameters#scanvi_model), [`scimilarity_model`](https://nf-co.re/scdownstream/parameters#scimilarity_model), and [`harmony_reference`](https://nf-co.re/scdownstream/parameters#harmony_reference) (see the [parameter reference](https://nf-co.re/scdownstream/parameters) for file types, defaults, and help text). +In this pipeline this can be done using **scVI**, **scANVI**, **scimilarity**, and **Symphony**. +To enable it, add the corresponding method to [`integration_methods`](https://nf-co.re/scdownstream/parameters#integration_methods) (`scvi`, `scanvi`, `scimilarity`, and/or `symphony`) and set the matching model parameters for each method you use: [`scvi_model`](https://nf-co.re/scdownstream/parameters#scvi_model), [`scanvi_model`](https://nf-co.re/scdownstream/parameters#scanvi_model), [`scimilarity_model`](https://nf-co.re/scdownstream/parameters#scimilarity_model), and [`symphony_reference`](https://nf-co.re/scdownstream/parameters#symphony_reference) (see the [parameter reference](https://nf-co.re/scdownstream/parameters) for file types, defaults, and help text). -For Harmony reference mapping, provide the compact Symphony reference AnnData from a prior de novo run (`{outdir}/combine/integrate/harmony/harmony_reference.h5ad`). It contains the gene statistics, PCA loadings, Harmony centroids, and normalization metadata required for query mapping. +For Symphony reference mapping, provide the compact Symphony reference AnnData from a prior de novo run (`{outdir}/combine/integrate/symphony/symphony_reference.h5ad`). It contains the gene statistics, PCA loadings, Harmony centroids, and normalization metadata required for query mapping. **Extension** is for users that have outputs of a previous run of `nf-core/scdownstream` and want to extend it with new data, without re-running the integration from scratch. -It only works if `scvi`, `scanvi`, `scimilarity`, and/or `harmony` have been enabled in `integration_methods` in the original pipeline run. +It only works if `scvi`, `scanvi`, `scimilarity`, and/or `symphony` have been enabled in `integration_methods` in the original pipeline run. Other integration methods than the four mentioned before are not supported for this. In simple terms, in this setup the workflow is: (1) project new data into the latent space learned from the data in the original run, and then (2) combine the datasets. -For (1), provide the same checkpoints as for reference mapping ([`scvi_model`](https://nf-co.re/scdownstream/parameters#scvi_model), [`scanvi_model`](https://nf-co.re/scdownstream/parameters#scanvi_model), [`scimilarity_model`](https://nf-co.re/scdownstream/parameters#scimilarity_model), [`harmony_reference`](https://nf-co.re/scdownstream/parameters#harmony_reference)). +For (1), provide the same checkpoints as for reference mapping ([`scvi_model`](https://nf-co.re/scdownstream/parameters#scvi_model), [`scanvi_model`](https://nf-co.re/scdownstream/parameters#scanvi_model), [`scimilarity_model`](https://nf-co.re/scdownstream/parameters#scimilarity_model), [`symphony_reference`](https://nf-co.re/scdownstream/parameters#symphony_reference)). For (2), pass the integrated `.h5ad` from the original run as [`base_adata`](https://nf-co.re/scdownstream/parameters#base_adata). Pre-trained scVI models are also shared on [scvi-hub](https://huggingface.co/scvi-tools). @@ -283,11 +283,11 @@ Each row in the CSV selects a subset of clusterings. **All columns are optional* When multiple rows match a clustering result, their `analyses` lists are **combined** (duplicates removed). If any matching row leaves `analyses` empty, all analyses run for that clustering. Clusterings that match **no** row are excluded from Leiden and all downstream analyses — but their UMAP and neighbour graph are still computed. -Example plan: full analysis on Harmony at resolution 0.5, DE-only at resolution 1.0 for every integration, and DE-only for scVI at any resolution: +Example plan: full analysis on Symphony at resolution 0.5, DE-only at resolution 1.0 for every integration, and DE-only for scVI at any resolution: ```csv title="analysis_plan.csv" integration,subset,resolution,analyses -harmony,global,0.5,"paga,de,cytetype" +symphony,global,0.5,"paga,de,cytetype" ,,1.0,de scvi,,,de ``` diff --git a/main.nf b/main.nf index 7aa62964..4f032b32 100644 --- a/main.nf +++ b/main.nf @@ -61,7 +61,7 @@ workflow NFCORE_SCDOWNSTREAM { scvi_categorical_covariates // value: string scvi_continuous_covariates // value: string scimilarity_model // value: string - harmony_reference // value: string + symphony_reference // value: string expimap_gmt // value: string skip_liana // value: boolean skip_rankgenesgroups // value: boolean @@ -118,7 +118,7 @@ workflow NFCORE_SCDOWNSTREAM { scvi_categorical_covariates, scvi_continuous_covariates, scimilarity_model, - harmony_reference, + symphony_reference, expimap_gmt, skip_liana, skip_rankgenesgroups, @@ -182,8 +182,8 @@ workflow { def analysis_plan = analysisPlanToList() - def harmony_reference = params.harmony_reference - ? file(params.harmony_reference, checkIfExists: true) + def symphony_reference = params.symphony_reference + ? file(params.symphony_reference, checkIfExists: true) : null NFCORE_SCDOWNSTREAM ( @@ -217,7 +217,7 @@ workflow { params.scvi_categorical_covariates, params.scvi_continuous_covariates, params.scimilarity_model, - harmony_reference, + symphony_reference, params.expimap_gmt, params.skip_liana, params.skip_rankgenesgroups, diff --git a/modules/local/adata/prepcellxgene/templates/prepcellxgene.py b/modules/local/adata/prepcellxgene/templates/prepcellxgene.py index a36ffce3..7f00e6c9 100644 --- a/modules/local/adata/prepcellxgene/templates/prepcellxgene.py +++ b/modules/local/adata/prepcellxgene/templates/prepcellxgene.py @@ -17,7 +17,7 @@ adata = ad.read_h5ad("${h5ad}") -integration_methods = ["harmony", "scvi", "scanvi", "scimilarity", "seurat", "bbknn", "combat", "pca", "expimap"] +integration_methods = ["symphony", "scvi", "scanvi", "scimilarity", "seurat", "bbknn", "combat", "pca", "expimap"] for integration in integration_methods: embedding_key = f"X_{integration}" diff --git a/modules/local/symphony/mapembedding/tests/main.nf.test b/modules/local/symphony/mapembedding/tests/main.nf.test index e51fe09e..8699ba98 100644 --- a/modules/local/symphony/mapembedding/tests/main.nf.test +++ b/modules/local/symphony/mapembedding/tests/main.nf.test @@ -13,7 +13,7 @@ nextflow_process { process { """ input[0] = channel.of([ - [ id: 'harmony' ], + [ id: 'symphony' ], file(params.modules_testdata_base_path + 'genomics/homo_sapiens/scrnaseq/h5ad/combined_filtered_matrix.h5ad', checkIfExists: true) ] ) @@ -33,7 +33,7 @@ nextflow_process { process { """ input[0] = channel.of([ - [ id: 'harmony' ], + [ id: 'symphony' ], file(params.modules_testdata_base_path + 'genomics/homo_sapiens/scrnaseq/h5ad/combined_filtered_matrix.h5ad', checkIfExists: true) ] ) @@ -68,12 +68,12 @@ nextflow_process { process { """ input[0] = channel.of([ - [ id: 'harmony' ], + [ id: 'symphony' ], file(params.modules_testdata_base_path + 'genomics/homo_sapiens/scrnaseq/h5ad/combined_filtered_matrix.h5ad', checkIfExists: true) ] ) input[1] = channel.of([ - [ id: 'harmony' ], + [ id: 'symphony' ], file(params.modules_testdata_base_path + 'genomics/homo_sapiens/scrnaseq/h5ad/combined_filtered_matrix.h5ad', checkIfExists: true) ] ) diff --git a/nextflow.config b/nextflow.config index f2d6c135..b94a1c3d 100644 --- a/nextflow.config +++ b/nextflow.config @@ -43,7 +43,7 @@ params { scvi_model = null scanvi_model = null scimilarity_model = 'https://zenodo.org/records/10685499/files/model_v1.1.tar.gz' - harmony_reference = null + symphony_reference = null expimap_gmt = null // Extension options diff --git a/nextflow_schema.json b/nextflow_schema.json index 43b1d63f..762c1fc3 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -152,8 +152,8 @@ "type": "string", "default": "scvi", "description": "Specify the tool to use for integration", - "help_text": "If you want to use multiple tools, separate them with a comma. Available methods are: scvi, scanvi, harmony, bbknn, combat, seurat, scimilarity, pca, expimap", - "pattern": "^((scvi|scanvi|harmony|bbknn|combat|seurat|scimilarity|pca|expimap)(,(scvi|scanvi|harmony|bbknn|combat|seurat|scimilarity|pca|expimap))*)?$" + "help_text": "If you want to use multiple tools, separate them with a comma. Available methods are: scvi, scanvi, symphony, bbknn, combat, seurat, scimilarity, pca, expimap", + "pattern": "^((scvi|scanvi|symphony|bbknn|combat|seurat|scimilarity|pca|expimap)(,(scvi|scanvi|symphony|bbknn|combat|seurat|scimilarity|pca|expimap))*)?$" }, "integration_hvgs": { "type": "integer", @@ -167,11 +167,11 @@ "description": "Optional file containing a list of gene symbols (one per line). If provided, these genes will be excluded from highly variable genes selection for integration.", "exists": true }, - "harmony_reference": { + "symphony_reference": { "type": "string", "format": "file-path", - "description": "Path to a Symphony reference AnnData, only relevant if Harmony is selected in `integration_methods`. If provided, query cells will be mapped onto this reference instead of running de novo Harmony integration.", - "help_text": "The file should be in the .h5ad format. It is produced by a prior de novo Harmony run as `{outdir}/combine/integrate/harmony/harmony_reference.h5ad` and contains the compact Symphony reference metadata required for query mapping. Required for Harmony reference mapping and when extending an atlas with `--base_adata`.", + "description": "Path to a Symphony reference AnnData, only relevant if Symphony is selected in `integration_methods`. If provided, query cells will be mapped onto this reference instead of running de novo Symphony integration.", + "help_text": "The file should be in the .h5ad format. It is produced by a prior de novo Symphony run as `{outdir}/combine/integrate/symphony/symphony_reference.h5ad` and contains the compact Symphony reference metadata required for query mapping. Required for Symphony reference mapping and when extending an atlas with `--base_adata`.", "pattern": "^\\S+\\.h5ad$", "exists": true }, @@ -236,7 +236,7 @@ "type": "string", "description": "The keys in the obsm of the base AnnData object that contain the embeddings (without leading `X_`). Required if `input` is not provided - otherwise it is ignored.", "help_text": "If the `input` parameter is not provided (no new data to add), integration will not be performed. In order to be able to utilize existing integration results, you need to provide the keys in the obsm of the base AnnData object that contain the embeddings (without leading `X_`).", - "pattern": "^((scvi|scanvi|harmony|bbknn|combat|seurat)(,(scvi|scanvi|harmony|bbknn|combat|seurat))*)?$" + "pattern": "^((scvi|scanvi|symphony|bbknn|combat|seurat)(,(scvi|scanvi|symphony|bbknn|combat|seurat))*)?$" } } }, @@ -577,7 +577,7 @@ "type": "string", "fa_icon": "far fa-check-circle", "description": "Base URL or local path to location of pipeline test dataset files", - "default": "https://raw.githubusercontent.com/nf-core/test-datasets/3ba0ba7174a5667fc2e005430594ffb063f986c7/", + "default": "https://raw.githubusercontent.com/nf-core/test-datasets/e3a7f43eb802a090affac918026d2ba5dce8fcd5/", "hidden": true }, "trace_report_suffix": { diff --git a/subworkflows/local/cluster/tests/main.nf.test b/subworkflows/local/cluster/tests/main.nf.test index 7bf52994..34d39524 100644 --- a/subworkflows/local/cluster/tests/main.nf.test +++ b/subworkflows/local/cluster/tests/main.nf.test @@ -292,7 +292,7 @@ nextflow_workflow { input[1] = false input[2] = true input[3] = '' - input[4] = [[integration: 'harmony', subset: null, resolution: null, analyses: null]] + input[4] = [[integration: 'symphony', subset: null, resolution: null, analyses: null]] input[5] = ['0.5', '1'] input[6] = 'sample' input[7] = 'X_scvi' diff --git a/subworkflows/local/combine/main.nf b/subworkflows/local/combine/main.nf index 80ec76c1..78c0d12a 100644 --- a/subworkflows/local/combine/main.nf +++ b/subworkflows/local/combine/main.nf @@ -17,7 +17,7 @@ workflow COMBINE { scvi_categorical_covariates // value: string scvi_continuous_covariates // value: string scimilarity_model // value: string - harmony_reference // value: string + symphony_reference // value: string expimap_gmt // value: string condition_col // value: string scib // value: boolean @@ -53,7 +53,7 @@ workflow COMBINE { scvi_categorical_covariates, scvi_continuous_covariates, scimilarity_model, - harmony_reference, + symphony_reference, expimap_gmt, condition_col ) diff --git a/subworkflows/local/integrate/main.nf b/subworkflows/local/integrate/main.nf index 7af3e357..9d3d7e5c 100644 --- a/subworkflows/local/integrate/main.nf +++ b/subworkflows/local/integrate/main.nf @@ -24,7 +24,7 @@ workflow INTEGRATE { scvi_categorical_covariates // list of string scvi_continuous_covariates // list of string scimilarity_model // path - harmony_reference // path + symphony_reference // path expimap_gmt // path condition_col // string @@ -113,11 +113,11 @@ workflow INTEGRATE { ch_obsm = ch_obsm.mix(SCVITOOLS_SCANVI.out.obsm) } - if (methods.contains('harmony')) { - if (harmony_reference) { + if (methods.contains('symphony')) { + if (symphony_reference) { SYMPHONY_MAPEMBEDDING ( - ch_h5ad.map { _meta, h5ad -> [[id: 'harmony'], h5ad] }, - channel.value([[id: 'harmony'], harmony_reference]), + ch_h5ad.map { _meta, h5ad -> [[id: 'symphony'], h5ad] }, + channel.value([[id: 'symphony'], symphony_reference]), "batch", "X" ) @@ -127,7 +127,7 @@ workflow INTEGRATE { } else { SYMPHONY_HARMONYINTEGRATE ( - ch_h5ad_hvg.map { _meta, h5ad -> [[id: 'harmony'], h5ad] }, + ch_h5ad_hvg.map { _meta, h5ad -> [[id: 'symphony'], h5ad] }, "batch", "X" ) diff --git a/subworkflows/local/integrate/tests/main.nf.test b/subworkflows/local/integrate/tests/main.nf.test index e01cd89a..9ee96c5d 100644 --- a/subworkflows/local/integrate/tests/main.nf.test +++ b/subworkflows/local/integrate/tests/main.nf.test @@ -7,7 +7,7 @@ nextflow_workflow { tag "subworkflows" tag "subworkflows_local" - test("Should run without failures - harmony - stub") { + test("Should run without failures - symphony - stub") { options '-stub' @@ -24,7 +24,7 @@ nextflow_workflow { input[1] = false input[2] = 2000 input[3] = [] - input[4] = ['harmony'] + input[4] = ['symphony'] input[5] = null input[6] = null input[7] = [] @@ -44,7 +44,7 @@ nextflow_workflow { } - test("Should run without failures - harmony") { + test("Should run without failures - symphony") { when { params { @@ -59,7 +59,7 @@ nextflow_workflow { input[1] = false input[2] = 2000 input[3] = [] - input[4] = ['harmony'] + input[4] = ['symphony'] input[5] = null input[6] = null input[7] = [] @@ -260,7 +260,7 @@ nextflow_workflow { input[1] = true input[2] = -1 input[3] = [] - input[4] = ['harmony'] + input[4] = ['symphony'] input[5] = null input[6] = null input[7] = [] diff --git a/subworkflows/local/utils_nfcore_scdownstream_pipeline/main.nf b/subworkflows/local/utils_nfcore_scdownstream_pipeline/main.nf index 5f4ba6eb..9e0cff62 100644 --- a/subworkflows/local/utils_nfcore_scdownstream_pipeline/main.nf +++ b/subworkflows/local/utils_nfcore_scdownstream_pipeline/main.nf @@ -181,8 +181,8 @@ def validateInputParameters() { } def integration_methods = params.integration_methods.split(',').collect { it -> it.trim().toLowerCase() } - if (params.input && params.base_adata && (integration_methods - ['scvi', 'scanvi', 'scimilarity', 'harmony']).size() > 0) { - throw new Exception("Only scvi, scanvi, scimilarity and harmony integration methods are supported if base_adata is provided") + if (params.input && params.base_adata && (integration_methods - ['scvi', 'scanvi', 'scimilarity', 'symphony']).size() > 0) { + throw new Exception("Only scvi, scanvi, scimilarity and symphony integration methods are supported if base_adata is provided") } if (params.base_adata && 'scvi' in integration_methods && !params.scvi_model) { @@ -197,8 +197,8 @@ def validateInputParameters() { throw new Exception("If base_adata is provided and scimilarity is used as integration method, scimilarity_model must be provided.") } - if (params.base_adata && 'harmony' in integration_methods && !params.harmony_reference) { - throw new Exception("If base_adata is provided and harmony is used as integration method, harmony_reference must be provided.") + if (params.base_adata && 'symphony' in integration_methods && !params.symphony_reference) { + throw new Exception("If base_adata is provided and symphony is used as integration method, symphony_reference must be provided.") } // Validate sample_n and sample_fraction parameters diff --git a/tests/main_pipeline_build.nf.test b/tests/main_pipeline_build.nf.test index f07b1ebb..1932054a 100644 --- a/tests/main_pipeline_build.nf.test +++ b/tests/main_pipeline_build.nf.test @@ -9,7 +9,7 @@ nextflow_pipeline { when { params { input = 'https://github.com/nf-core/test-datasets/raw/refs/heads/scdownstream/samplesheet.csv' - integration_methods = 'scvi,harmony,bbknn,combat,seurat' + integration_methods = 'scvi,symphony,bbknn,combat,seurat' doublet_detection = 'scrublet,scdblfinder' celltypist_model = 'Adult_COVID19_PBMC' integration_hvgs = 500 diff --git a/workflows/scdownstream.nf b/workflows/scdownstream.nf index 2528f2e7..3c99deae 100644 --- a/workflows/scdownstream.nf +++ b/workflows/scdownstream.nf @@ -59,7 +59,7 @@ workflow SCDOWNSTREAM { scvi_categorical_covariates // value: string scvi_continuous_covariates // value: string scimilarity_model // value: string - harmony_reference // value: string + symphony_reference // value: string expimap_gmt // value: string skip_liana // value: boolean skip_rankgenesgroups // value: boolean @@ -179,7 +179,7 @@ workflow SCDOWNSTREAM { scvi_categorical_covariates, scvi_continuous_covariates, scimilarity_model, - harmony_reference, + symphony_reference, expimap_gmt, condition_col, scib, From 1da576927061e24308cbb1dde2d07ca3dd293640 Mon Sep 17 00:00:00 2001 From: Nico Trummer Date: Fri, 29 May 2026 19:23:12 +0200 Subject: [PATCH 08/19] Point pipeline tests at official nf-core test-datasets. Use the upstream extension_base commit for reference mapping, extension, and sub-atlas tests instead of branch URLs or the temporary fork. --- nextflow.config | 2 +- tests/main_pipeline_extend.nf.test | 13 +++++++------ tests/main_pipeline_reference_mapping.nf.test | 11 ++++++----- tests/main_pipeline_sub.nf.test | 5 +++-- tests/nextflow.config | 2 +- 5 files changed, 18 insertions(+), 15 deletions(-) diff --git a/nextflow.config b/nextflow.config index b94a1c3d..9e612096 100644 --- a/nextflow.config +++ b/nextflow.config @@ -113,7 +113,7 @@ params { help_full = false show_hidden = false version = false - pipelines_testdata_base_path = 'https://raw.githubusercontent.com/nf-core/test-datasets/3ba0ba7174a5667fc2e005430594ffb063f986c7/' + pipelines_testdata_base_path = 'https://raw.githubusercontent.com/nf-core/test-datasets/e3a7f43eb802a090affac918026d2ba5dce8fcd5/' trace_report_suffix = new java.util.Date().format( 'yyyy-MM-dd_HH-mm-ss') // Config options config_profile_name = null diff --git a/tests/main_pipeline_extend.nf.test b/tests/main_pipeline_extend.nf.test index 365d4b30..d549b821 100644 --- a/tests/main_pipeline_extend.nf.test +++ b/tests/main_pipeline_extend.nf.test @@ -4,19 +4,20 @@ nextflow_pipeline { script "main.nf" tag "pipeline" - test("Should perform scvi and harmony reference extension") { + test("Should perform scvi and symphony reference extension") { when { params { - input = 'https://github.com/nf-core/test-datasets/raw/refs/heads/scdownstream/samplesheet_single.csv' - integration_methods = 'scvi,harmony' + pipelines_testdata_base_path = 'https://raw.githubusercontent.com/nf-core/test-datasets/e3a7f43eb802a090affac918026d2ba5dce8fcd5/' + input = pipelines_testdata_base_path + 'samplesheet_single.csv' + integration_methods = 'scvi,symphony' doublet_detection = 'scrublet,scdblfinder' celltypist_model = 'Adult_COVID19_PBMC' integration_hvgs = 500 outdir = "$outputDir" - scvi_model = 'https://github.com/nf-core/test-datasets/raw/refs/heads/scdownstream/extension_base/model.pt' - harmony_reference = 'https://github.com/nf-core/test-datasets/raw/refs/heads/scdownstream/extension_base/harmony_reference.h5ad' - base_adata = 'https://github.com/nf-core/test-datasets/raw/refs/heads/scdownstream/extension_base/merged.h5ad' + scvi_model = pipelines_testdata_base_path + 'extension_base/model.pt' + symphony_reference = pipelines_testdata_base_path + 'extension_base/symphony_reference.h5ad' + base_adata = pipelines_testdata_base_path + 'extension_base/merged.h5ad' } } diff --git a/tests/main_pipeline_reference_mapping.nf.test b/tests/main_pipeline_reference_mapping.nf.test index 93d1b10f..fdaa3411 100644 --- a/tests/main_pipeline_reference_mapping.nf.test +++ b/tests/main_pipeline_reference_mapping.nf.test @@ -4,18 +4,19 @@ nextflow_pipeline { script "main.nf" tag "pipeline" - test("Should perform scvi and harmony reference mapping") { + test("Should perform scvi and symphony reference mapping") { when { params { - input = 'https://github.com/nf-core/test-datasets/raw/refs/heads/scdownstream/samplesheet_single.csv' - integration_methods = 'scvi,harmony' + pipelines_testdata_base_path = 'https://raw.githubusercontent.com/nf-core/test-datasets/e3a7f43eb802a090affac918026d2ba5dce8fcd5/' + input = pipelines_testdata_base_path + 'samplesheet_single.csv' + integration_methods = 'scvi,symphony' doublet_detection = 'scrublet,scdblfinder' celltypist_model = 'Adult_COVID19_PBMC' integration_hvgs = 500 outdir = "$outputDir" - scvi_model = 'https://github.com/nf-core/test-datasets/raw/refs/heads/scdownstream/extension_base/model.pt' - harmony_reference = 'https://github.com/nf-core/test-datasets/raw/refs/heads/scdownstream/extension_base/harmony_reference.h5ad' + scvi_model = pipelines_testdata_base_path + 'extension_base/model.pt' + symphony_reference = pipelines_testdata_base_path + 'extension_base/symphony_reference.h5ad' } } diff --git a/tests/main_pipeline_sub.nf.test b/tests/main_pipeline_sub.nf.test index 8ede02e5..5bfb2b79 100644 --- a/tests/main_pipeline_sub.nf.test +++ b/tests/main_pipeline_sub.nf.test @@ -8,10 +8,11 @@ nextflow_pipeline { when { params { + pipelines_testdata_base_path = 'https://raw.githubusercontent.com/nf-core/test-datasets/e3a7f43eb802a090affac918026d2ba5dce8fcd5/' outdir = "$outputDir" input = null - base_adata = 'https://github.com/nf-core/test-datasets/raw/refs/heads/scdownstream/extension_base/merged.h5ad' - base_embeddings = 'combat,harmony,scvi' + base_adata = pipelines_testdata_base_path + 'extension_base/merged.h5ad' + base_embeddings = 'symphony' cluster_global = false cluster_per_label = true base_label_col = 'sample' diff --git a/tests/nextflow.config b/tests/nextflow.config index aa29d8ba..71b5b6be 100644 --- a/tests/nextflow.config +++ b/tests/nextflow.config @@ -8,7 +8,7 @@ // Or any resources requirements params { modules_testdata_base_path = 'https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/' - pipelines_testdata_base_path = 'https://raw.githubusercontent.com/nictru/test-datasets/97addfb0946c0e51dbb70ee1391142d12e70f085' + pipelines_testdata_base_path = 'https://raw.githubusercontent.com/nf-core/test-datasets/e3a7f43eb802a090affac918026d2ba5dce8fcd5/' // CyteType is slow (remote LLM API); module nf-tests cover it — keep off in pipeline/subworkflow nf-tests cytetype_study_context = '' } From e3f2923666beb0a46ab1beddafccaddc443ef5d5 Mon Sep 17 00:00:00 2001 From: Nico Trummer Date: Fri, 29 May 2026 19:23:20 +0200 Subject: [PATCH 09/19] Refresh nf-test snapshots after symphony rename and test-data update. Regenerated module, subworkflow, and pipeline snapshots with real H5AD outputs from the new official test datasets. --- .../prepcellxgene/tests/main.nf.test.snap | 212 +++++++++++++++--- .../splitembeddings/tests/main.nf.test.snap | 205 ++++++++++++++--- .../mapembedding/tests/main.nf.test.snap | 14 +- .../local/integrate/tests/main.nf.test.snap | 184 +++++++-------- tests/default.nf.test.snap | 34 +-- tests/main_pipeline_build.nf.test.snap | 34 +-- tests/main_pipeline_extend.nf.test.snap | 24 +- ...in_pipeline_reference_mapping.nf.test.snap | 24 +- tests/main_pipeline_sub.nf.test.snap | 70 ++---- 9 files changed, 541 insertions(+), 260 deletions(-) diff --git a/modules/local/adata/prepcellxgene/tests/main.nf.test.snap b/modules/local/adata/prepcellxgene/tests/main.nf.test.snap index 58b78d2a..e2982530 100644 --- a/modules/local/adata/prepcellxgene/tests/main.nf.test.snap +++ b/modules/local/adata/prepcellxgene/tests/main.nf.test.snap @@ -40,7 +40,7 @@ { "id": "test" }, - "test.h5ad:md5,e962f73664186924dfe5269caed069bb" + "test.h5ad:md5,e213f1b004bae37e440c83b3966890f3" ] ], "1": [ @@ -51,7 +51,7 @@ { "id": "test" }, - "test.h5ad:md5,e962f73664186924dfe5269caed069bb" + "test.h5ad:md5,e213f1b004bae37e440c83b3966890f3" ] ], "versions": [ @@ -67,40 +67,193 @@ } }, { - "n_obs": 23364, + "n_obs": 32135, "n_vars": 9887, "obs": { "index": "_index", "columns": [ + "G2M_score", + "S_score", "batch", "bbknn-global-0.5:entropy", "bbknn-global-0.5_leiden", "bbknn-global-1.0:entropy", "bbknn-global-1.0_leiden", - "celltypist:Adult_Human_Skin", - "celltypist:Adult_Human_Skin:conf", + "celldex_hpca__2024.02.26_h5_se.tar.delta.next_hpca_direct", + "celldex_hpca__2024.02.26_h5_se.tar.labels_hpca_direct", + "celldex_hpca__2024.02.26_h5_se.tar.pruned.labels_hpca_direct", + "celldex_hpca__2024.02.26_h5_se.tar.scores_hpca_direct.Astrocyte", + "celldex_hpca__2024.02.26_h5_se.tar.scores_hpca_direct.BM", + "celldex_hpca__2024.02.26_h5_se.tar.scores_hpca_direct.BM...Prog.", + "celldex_hpca__2024.02.26_h5_se.tar.scores_hpca_direct.B_cell", + "celldex_hpca__2024.02.26_h5_se.tar.scores_hpca_direct.CMP", + "celldex_hpca__2024.02.26_h5_se.tar.scores_hpca_direct.Chondrocytes", + "celldex_hpca__2024.02.26_h5_se.tar.scores_hpca_direct.DC", + "celldex_hpca__2024.02.26_h5_se.tar.scores_hpca_direct.Embryonic_stem_cells", + "celldex_hpca__2024.02.26_h5_se.tar.scores_hpca_direct.Endothelial_cells", + "celldex_hpca__2024.02.26_h5_se.tar.scores_hpca_direct.Epithelial_cells", + "celldex_hpca__2024.02.26_h5_se.tar.scores_hpca_direct.Erythroblast", + "celldex_hpca__2024.02.26_h5_se.tar.scores_hpca_direct.Fibroblasts", + "celldex_hpca__2024.02.26_h5_se.tar.scores_hpca_direct.GMP", + "celldex_hpca__2024.02.26_h5_se.tar.scores_hpca_direct.Gametocytes", + "celldex_hpca__2024.02.26_h5_se.tar.scores_hpca_direct.HSC_.G.CSF", + "celldex_hpca__2024.02.26_h5_se.tar.scores_hpca_direct.HSC_CD34.", + "celldex_hpca__2024.02.26_h5_se.tar.scores_hpca_direct.Hepatocytes", + "celldex_hpca__2024.02.26_h5_se.tar.scores_hpca_direct.Keratinocytes", + "celldex_hpca__2024.02.26_h5_se.tar.scores_hpca_direct.MEP", + "celldex_hpca__2024.02.26_h5_se.tar.scores_hpca_direct.MSC", + "celldex_hpca__2024.02.26_h5_se.tar.scores_hpca_direct.Macrophage", + "celldex_hpca__2024.02.26_h5_se.tar.scores_hpca_direct.Monocyte", + "celldex_hpca__2024.02.26_h5_se.tar.scores_hpca_direct.Myelocyte", + "celldex_hpca__2024.02.26_h5_se.tar.scores_hpca_direct.NK_cell", + "celldex_hpca__2024.02.26_h5_se.tar.scores_hpca_direct.Neuroepithelial_cell", + "celldex_hpca__2024.02.26_h5_se.tar.scores_hpca_direct.Neurons", + "celldex_hpca__2024.02.26_h5_se.tar.scores_hpca_direct.Neutrophils", + "celldex_hpca__2024.02.26_h5_se.tar.scores_hpca_direct.Osteoblasts", + "celldex_hpca__2024.02.26_h5_se.tar.scores_hpca_direct.Platelets", + "celldex_hpca__2024.02.26_h5_se.tar.scores_hpca_direct.Pre.B_cell_CD34.", + "celldex_hpca__2024.02.26_h5_se.tar.scores_hpca_direct.Pro.B_cell_CD34.", + "celldex_hpca__2024.02.26_h5_se.tar.scores_hpca_direct.Pro.Myelocyte", + "celldex_hpca__2024.02.26_h5_se.tar.scores_hpca_direct.Smooth_muscle_cells", + "celldex_hpca__2024.02.26_h5_se.tar.scores_hpca_direct.T_cells", + "celldex_hpca__2024.02.26_h5_se.tar.scores_hpca_direct.Tissue_stem_cells", + "celldex_hpca__2024.02.26_h5_se.tar.scores_hpca_direct.iPS_cells", + "celldex_monaco_immune__2024.02.26_h5_se.tar.delta.next_immune_direct", + "celldex_monaco_immune__2024.02.26_h5_se.tar.labels_immune_direct", + "celldex_monaco_immune__2024.02.26_h5_se.tar.pruned.labels_immune_direct", + "celldex_monaco_immune__2024.02.26_h5_se.tar.scores_immune_direct.Central.memory.CD8.T.cells", + "celldex_monaco_immune__2024.02.26_h5_se.tar.scores_immune_direct.Classical.monocytes", + "celldex_monaco_immune__2024.02.26_h5_se.tar.scores_immune_direct.Effector.memory.CD8.T.cells", + "celldex_monaco_immune__2024.02.26_h5_se.tar.scores_immune_direct.Exhausted.B.cells", + "celldex_monaco_immune__2024.02.26_h5_se.tar.scores_immune_direct.Follicular.helper.T.cells", + "celldex_monaco_immune__2024.02.26_h5_se.tar.scores_immune_direct.Intermediate.monocytes", + "celldex_monaco_immune__2024.02.26_h5_se.tar.scores_immune_direct.Low.density.basophils", + "celldex_monaco_immune__2024.02.26_h5_se.tar.scores_immune_direct.Low.density.neutrophils", + "celldex_monaco_immune__2024.02.26_h5_se.tar.scores_immune_direct.MAIT.cells", + "celldex_monaco_immune__2024.02.26_h5_se.tar.scores_immune_direct.Myeloid.dendritic.cells", + "celldex_monaco_immune__2024.02.26_h5_se.tar.scores_immune_direct.Naive.B.cells", + "celldex_monaco_immune__2024.02.26_h5_se.tar.scores_immune_direct.Naive.CD4.T.cells", + "celldex_monaco_immune__2024.02.26_h5_se.tar.scores_immune_direct.Naive.CD8.T.cells", + "celldex_monaco_immune__2024.02.26_h5_se.tar.scores_immune_direct.Natural.killer.cells", + "celldex_monaco_immune__2024.02.26_h5_se.tar.scores_immune_direct.Non.Vd2.gd.T.cells", + "celldex_monaco_immune__2024.02.26_h5_se.tar.scores_immune_direct.Non.classical.monocytes", + "celldex_monaco_immune__2024.02.26_h5_se.tar.scores_immune_direct.Non.switched.memory.B.cells", + "celldex_monaco_immune__2024.02.26_h5_se.tar.scores_immune_direct.Plasmablasts", + "celldex_monaco_immune__2024.02.26_h5_se.tar.scores_immune_direct.Plasmacytoid.dendritic.cells", + "celldex_monaco_immune__2024.02.26_h5_se.tar.scores_immune_direct.Progenitor.cells", + "celldex_monaco_immune__2024.02.26_h5_se.tar.scores_immune_direct.Switched.memory.B.cells", + "celldex_monaco_immune__2024.02.26_h5_se.tar.scores_immune_direct.T.regulatory.cells", + "celldex_monaco_immune__2024.02.26_h5_se.tar.scores_immune_direct.Terminal.effector.CD4.T.cells", + "celldex_monaco_immune__2024.02.26_h5_se.tar.scores_immune_direct.Terminal.effector.CD8.T.cells", + "celldex_monaco_immune__2024.02.26_h5_se.tar.scores_immune_direct.Th1.Th17.cells", + "celldex_monaco_immune__2024.02.26_h5_se.tar.scores_immune_direct.Th1.cells", + "celldex_monaco_immune__2024.02.26_h5_se.tar.scores_immune_direct.Th17.cells", + "celldex_monaco_immune__2024.02.26_h5_se.tar.scores_immune_direct.Th2.cells", + "celldex_monaco_immune__2024.02.26_h5_se.tar.scores_immune_direct.Vd2.gd.T.cells", + "celltypist:Adult_COVID19_PBMC", + "celltypist:Adult_COVID19_PBMC:conf", "combat-global-0.5:entropy", "combat-global-0.5_leiden", "combat-global-1.0:entropy", "combat-global-1.0_leiden", "condition", - "harmony-global-0.5:entropy", - "harmony-global-0.5_leiden", - "harmony-global-1.0:entropy", - "harmony-global-1.0_leiden", + "hpca_celldex.tar.delta.next_hpca_celldex", + "hpca_celldex.tar.labels_hpca_celldex", + "hpca_celldex.tar.pruned.labels_hpca_celldex", + "hpca_celldex.tar.scores_hpca_celldex.Astrocyte", + "hpca_celldex.tar.scores_hpca_celldex.BM", + "hpca_celldex.tar.scores_hpca_celldex.BM...Prog.", + "hpca_celldex.tar.scores_hpca_celldex.B_cell", + "hpca_celldex.tar.scores_hpca_celldex.CMP", + "hpca_celldex.tar.scores_hpca_celldex.Chondrocytes", + "hpca_celldex.tar.scores_hpca_celldex.DC", + "hpca_celldex.tar.scores_hpca_celldex.Embryonic_stem_cells", + "hpca_celldex.tar.scores_hpca_celldex.Endothelial_cells", + "hpca_celldex.tar.scores_hpca_celldex.Epithelial_cells", + "hpca_celldex.tar.scores_hpca_celldex.Erythroblast", + "hpca_celldex.tar.scores_hpca_celldex.Fibroblasts", + "hpca_celldex.tar.scores_hpca_celldex.GMP", + "hpca_celldex.tar.scores_hpca_celldex.Gametocytes", + "hpca_celldex.tar.scores_hpca_celldex.HSC_.G.CSF", + "hpca_celldex.tar.scores_hpca_celldex.HSC_CD34.", + "hpca_celldex.tar.scores_hpca_celldex.Hepatocytes", + "hpca_celldex.tar.scores_hpca_celldex.Keratinocytes", + "hpca_celldex.tar.scores_hpca_celldex.MEP", + "hpca_celldex.tar.scores_hpca_celldex.MSC", + "hpca_celldex.tar.scores_hpca_celldex.Macrophage", + "hpca_celldex.tar.scores_hpca_celldex.Monocyte", + "hpca_celldex.tar.scores_hpca_celldex.Myelocyte", + "hpca_celldex.tar.scores_hpca_celldex.NK_cell", + "hpca_celldex.tar.scores_hpca_celldex.Neuroepithelial_cell", + "hpca_celldex.tar.scores_hpca_celldex.Neurons", + "hpca_celldex.tar.scores_hpca_celldex.Neutrophils", + "hpca_celldex.tar.scores_hpca_celldex.Osteoblasts", + "hpca_celldex.tar.scores_hpca_celldex.Platelets", + "hpca_celldex.tar.scores_hpca_celldex.Pre.B_cell_CD34.", + "hpca_celldex.tar.scores_hpca_celldex.Pro.B_cell_CD34.", + "hpca_celldex.tar.scores_hpca_celldex.Pro.Myelocyte", + "hpca_celldex.tar.scores_hpca_celldex.Smooth_muscle_cells", + "hpca_celldex.tar.scores_hpca_celldex.T_cells", + "hpca_celldex.tar.scores_hpca_celldex.Tissue_stem_cells", + "hpca_celldex.tar.scores_hpca_celldex.iPS_cells", + "immune_celldex.tar.delta.next_immune_celldex", + "immune_celldex.tar.labels_immune_celldex", + "immune_celldex.tar.pruned.labels_immune_celldex", + "immune_celldex.tar.scores_immune_celldex.Central.memory.CD8.T.cells", + "immune_celldex.tar.scores_immune_celldex.Classical.monocytes", + "immune_celldex.tar.scores_immune_celldex.Effector.memory.CD8.T.cells", + "immune_celldex.tar.scores_immune_celldex.Exhausted.B.cells", + "immune_celldex.tar.scores_immune_celldex.Follicular.helper.T.cells", + "immune_celldex.tar.scores_immune_celldex.Intermediate.monocytes", + "immune_celldex.tar.scores_immune_celldex.Low.density.basophils", + "immune_celldex.tar.scores_immune_celldex.Low.density.neutrophils", + "immune_celldex.tar.scores_immune_celldex.MAIT.cells", + "immune_celldex.tar.scores_immune_celldex.Myeloid.dendritic.cells", + "immune_celldex.tar.scores_immune_celldex.Naive.B.cells", + "immune_celldex.tar.scores_immune_celldex.Naive.CD4.T.cells", + "immune_celldex.tar.scores_immune_celldex.Naive.CD8.T.cells", + "immune_celldex.tar.scores_immune_celldex.Natural.killer.cells", + "immune_celldex.tar.scores_immune_celldex.Non.Vd2.gd.T.cells", + "immune_celldex.tar.scores_immune_celldex.Non.classical.monocytes", + "immune_celldex.tar.scores_immune_celldex.Non.switched.memory.B.cells", + "immune_celldex.tar.scores_immune_celldex.Plasmablasts", + "immune_celldex.tar.scores_immune_celldex.Plasmacytoid.dendritic.cells", + "immune_celldex.tar.scores_immune_celldex.Progenitor.cells", + "immune_celldex.tar.scores_immune_celldex.Switched.memory.B.cells", + "immune_celldex.tar.scores_immune_celldex.T.regulatory.cells", + "immune_celldex.tar.scores_immune_celldex.Terminal.effector.CD4.T.cells", + "immune_celldex.tar.scores_immune_celldex.Terminal.effector.CD8.T.cells", + "immune_celldex.tar.scores_immune_celldex.Th1.Th17.cells", + "immune_celldex.tar.scores_immune_celldex.Th1.cells", + "immune_celldex.tar.scores_immune_celldex.Th17.cells", + "immune_celldex.tar.scores_immune_celldex.Th2.cells", + "immune_celldex.tar.scores_immune_celldex.Vd2.gd.T.cells", "label", "n_counts", "n_genes", "n_genes_by_counts", + "pct_counts_hb", "pct_counts_mt", + "pct_counts_ribo", + "phase", "sample", "sample_original", "scvi-global-0.5:entropy", "scvi-global-0.5_leiden", "scvi-global-1.0:entropy", "scvi-global-1.0_leiden", + "seurat-global-0.5:entropy", + "seurat-global-0.5_leiden", + "seurat-global-1.0:entropy", + "seurat-global-1.0_leiden", + "symphony-global-0.5:entropy", + "symphony-global-0.5_leiden", + "symphony-global-1.0:entropy", + "symphony-global-1.0_leiden", "total_counts", - "total_counts_mt" + "total_counts_hb", + "total_counts_mt", + "total_counts_ribo" ] }, "var": { @@ -115,11 +268,12 @@ "obsm": [ "X_bbknn-global_umap", "X_combat-global_umap", - "X_harmony-global_umap", "X_scvi-global_umap", + "X_seurat-global_umap", + "X_symphony-global_umap", "combat", - "harmony", - "scvi" + "scvi", + "symphony" ], "varm": [ @@ -131,32 +285,34 @@ ], "uns": [ - "bbknn-global-0.5_characteristic_genes", + "bbknn-global-0.5_leiden_characteristic_genes", "bbknn-global-0.5_paga", - "bbknn-global-1.0_characteristic_genes", + "bbknn-global-1.0_leiden_characteristic_genes", "bbknn-global-1.0_paga", - "combat-global-0.5_characteristic_genes", - "combat-global-0.5_liana", + "combat-global-0.5_leiden_characteristic_genes", "combat-global-0.5_paga", - "combat-global-1.0_characteristic_genes", - "combat-global-1.0_liana", + "combat-global-1.0_leiden_characteristic_genes", "combat-global-1.0_paga", - "harmony-global-0.5_characteristic_genes", - "harmony-global-0.5_paga", - "harmony-global-1.0_characteristic_genes", - "harmony-global-1.0_paga", "log1p", - "scvi-global-0.5_characteristic_genes", + "scvi-global-0.5_leiden_characteristic_genes", "scvi-global-0.5_paga", - "scvi-global-1.0_characteristic_genes", - "scvi-global-1.0_paga" + "scvi-global-1.0_leiden_characteristic_genes", + "scvi-global-1.0_paga", + "seurat-global-0.5_leiden_characteristic_genes", + "seurat-global-0.5_paga", + "seurat-global-1.0_leiden_characteristic_genes", + "seurat-global-1.0_paga", + "symphony-global-0.5_leiden_characteristic_genes", + "symphony-global-0.5_paga", + "symphony-global-1.0_leiden_characteristic_genes", + "symphony-global-1.0_paga" ] } ], - "timestamp": "2026-03-29T12:57:46.020211425", + "timestamp": "2026-05-29T11:36:58.839746387", "meta": { "nf-test": "0.9.4", - "nextflow": "25.10.2" + "nextflow": "26.04.0" } } } \ No newline at end of file diff --git a/modules/local/adata/splitembeddings/tests/main.nf.test.snap b/modules/local/adata/splitembeddings/tests/main.nf.test.snap index 911f0c48..1da9eb04 100644 --- a/modules/local/adata/splitembeddings/tests/main.nf.test.snap +++ b/modules/local/adata/splitembeddings/tests/main.nf.test.snap @@ -40,7 +40,7 @@ { "id": "test" }, - "scvi.h5ad:md5,41c46e638fbd817665eca0ce1921585e" + "scvi.h5ad:md5,70a63fb030713420c635aab523701691" ] ], "1": [ @@ -51,7 +51,7 @@ { "id": "test" }, - "scvi.h5ad:md5,41c46e638fbd817665eca0ce1921585e" + "scvi.h5ad:md5,70a63fb030713420c635aab523701691" ] ], "versions": [ @@ -65,40 +65,193 @@ } }, { - "n_obs": 23364, + "n_obs": 32135, "n_vars": 9887, "obs": { "index": "_index", "columns": [ + "G2M_score", + "S_score", "batch", "bbknn-global-0.5:entropy", "bbknn-global-0.5_leiden", "bbknn-global-1.0:entropy", "bbknn-global-1.0_leiden", - "celltypist:Adult_Human_Skin", - "celltypist:Adult_Human_Skin:conf", + "celldex_hpca__2024.02.26_h5_se.tar.delta.next_hpca_direct", + "celldex_hpca__2024.02.26_h5_se.tar.labels_hpca_direct", + "celldex_hpca__2024.02.26_h5_se.tar.pruned.labels_hpca_direct", + "celldex_hpca__2024.02.26_h5_se.tar.scores_hpca_direct.Astrocyte", + "celldex_hpca__2024.02.26_h5_se.tar.scores_hpca_direct.BM", + "celldex_hpca__2024.02.26_h5_se.tar.scores_hpca_direct.BM...Prog.", + "celldex_hpca__2024.02.26_h5_se.tar.scores_hpca_direct.B_cell", + "celldex_hpca__2024.02.26_h5_se.tar.scores_hpca_direct.CMP", + "celldex_hpca__2024.02.26_h5_se.tar.scores_hpca_direct.Chondrocytes", + "celldex_hpca__2024.02.26_h5_se.tar.scores_hpca_direct.DC", + "celldex_hpca__2024.02.26_h5_se.tar.scores_hpca_direct.Embryonic_stem_cells", + "celldex_hpca__2024.02.26_h5_se.tar.scores_hpca_direct.Endothelial_cells", + "celldex_hpca__2024.02.26_h5_se.tar.scores_hpca_direct.Epithelial_cells", + "celldex_hpca__2024.02.26_h5_se.tar.scores_hpca_direct.Erythroblast", + "celldex_hpca__2024.02.26_h5_se.tar.scores_hpca_direct.Fibroblasts", + "celldex_hpca__2024.02.26_h5_se.tar.scores_hpca_direct.GMP", + "celldex_hpca__2024.02.26_h5_se.tar.scores_hpca_direct.Gametocytes", + "celldex_hpca__2024.02.26_h5_se.tar.scores_hpca_direct.HSC_.G.CSF", + "celldex_hpca__2024.02.26_h5_se.tar.scores_hpca_direct.HSC_CD34.", + "celldex_hpca__2024.02.26_h5_se.tar.scores_hpca_direct.Hepatocytes", + "celldex_hpca__2024.02.26_h5_se.tar.scores_hpca_direct.Keratinocytes", + "celldex_hpca__2024.02.26_h5_se.tar.scores_hpca_direct.MEP", + "celldex_hpca__2024.02.26_h5_se.tar.scores_hpca_direct.MSC", + "celldex_hpca__2024.02.26_h5_se.tar.scores_hpca_direct.Macrophage", + "celldex_hpca__2024.02.26_h5_se.tar.scores_hpca_direct.Monocyte", + "celldex_hpca__2024.02.26_h5_se.tar.scores_hpca_direct.Myelocyte", + "celldex_hpca__2024.02.26_h5_se.tar.scores_hpca_direct.NK_cell", + "celldex_hpca__2024.02.26_h5_se.tar.scores_hpca_direct.Neuroepithelial_cell", + "celldex_hpca__2024.02.26_h5_se.tar.scores_hpca_direct.Neurons", + "celldex_hpca__2024.02.26_h5_se.tar.scores_hpca_direct.Neutrophils", + "celldex_hpca__2024.02.26_h5_se.tar.scores_hpca_direct.Osteoblasts", + "celldex_hpca__2024.02.26_h5_se.tar.scores_hpca_direct.Platelets", + "celldex_hpca__2024.02.26_h5_se.tar.scores_hpca_direct.Pre.B_cell_CD34.", + "celldex_hpca__2024.02.26_h5_se.tar.scores_hpca_direct.Pro.B_cell_CD34.", + "celldex_hpca__2024.02.26_h5_se.tar.scores_hpca_direct.Pro.Myelocyte", + "celldex_hpca__2024.02.26_h5_se.tar.scores_hpca_direct.Smooth_muscle_cells", + "celldex_hpca__2024.02.26_h5_se.tar.scores_hpca_direct.T_cells", + "celldex_hpca__2024.02.26_h5_se.tar.scores_hpca_direct.Tissue_stem_cells", + "celldex_hpca__2024.02.26_h5_se.tar.scores_hpca_direct.iPS_cells", + "celldex_monaco_immune__2024.02.26_h5_se.tar.delta.next_immune_direct", + "celldex_monaco_immune__2024.02.26_h5_se.tar.labels_immune_direct", + "celldex_monaco_immune__2024.02.26_h5_se.tar.pruned.labels_immune_direct", + "celldex_monaco_immune__2024.02.26_h5_se.tar.scores_immune_direct.Central.memory.CD8.T.cells", + "celldex_monaco_immune__2024.02.26_h5_se.tar.scores_immune_direct.Classical.monocytes", + "celldex_monaco_immune__2024.02.26_h5_se.tar.scores_immune_direct.Effector.memory.CD8.T.cells", + "celldex_monaco_immune__2024.02.26_h5_se.tar.scores_immune_direct.Exhausted.B.cells", + "celldex_monaco_immune__2024.02.26_h5_se.tar.scores_immune_direct.Follicular.helper.T.cells", + "celldex_monaco_immune__2024.02.26_h5_se.tar.scores_immune_direct.Intermediate.monocytes", + "celldex_monaco_immune__2024.02.26_h5_se.tar.scores_immune_direct.Low.density.basophils", + "celldex_monaco_immune__2024.02.26_h5_se.tar.scores_immune_direct.Low.density.neutrophils", + "celldex_monaco_immune__2024.02.26_h5_se.tar.scores_immune_direct.MAIT.cells", + "celldex_monaco_immune__2024.02.26_h5_se.tar.scores_immune_direct.Myeloid.dendritic.cells", + "celldex_monaco_immune__2024.02.26_h5_se.tar.scores_immune_direct.Naive.B.cells", + "celldex_monaco_immune__2024.02.26_h5_se.tar.scores_immune_direct.Naive.CD4.T.cells", + "celldex_monaco_immune__2024.02.26_h5_se.tar.scores_immune_direct.Naive.CD8.T.cells", + "celldex_monaco_immune__2024.02.26_h5_se.tar.scores_immune_direct.Natural.killer.cells", + "celldex_monaco_immune__2024.02.26_h5_se.tar.scores_immune_direct.Non.Vd2.gd.T.cells", + "celldex_monaco_immune__2024.02.26_h5_se.tar.scores_immune_direct.Non.classical.monocytes", + "celldex_monaco_immune__2024.02.26_h5_se.tar.scores_immune_direct.Non.switched.memory.B.cells", + "celldex_monaco_immune__2024.02.26_h5_se.tar.scores_immune_direct.Plasmablasts", + "celldex_monaco_immune__2024.02.26_h5_se.tar.scores_immune_direct.Plasmacytoid.dendritic.cells", + "celldex_monaco_immune__2024.02.26_h5_se.tar.scores_immune_direct.Progenitor.cells", + "celldex_monaco_immune__2024.02.26_h5_se.tar.scores_immune_direct.Switched.memory.B.cells", + "celldex_monaco_immune__2024.02.26_h5_se.tar.scores_immune_direct.T.regulatory.cells", + "celldex_monaco_immune__2024.02.26_h5_se.tar.scores_immune_direct.Terminal.effector.CD4.T.cells", + "celldex_monaco_immune__2024.02.26_h5_se.tar.scores_immune_direct.Terminal.effector.CD8.T.cells", + "celldex_monaco_immune__2024.02.26_h5_se.tar.scores_immune_direct.Th1.Th17.cells", + "celldex_monaco_immune__2024.02.26_h5_se.tar.scores_immune_direct.Th1.cells", + "celldex_monaco_immune__2024.02.26_h5_se.tar.scores_immune_direct.Th17.cells", + "celldex_monaco_immune__2024.02.26_h5_se.tar.scores_immune_direct.Th2.cells", + "celldex_monaco_immune__2024.02.26_h5_se.tar.scores_immune_direct.Vd2.gd.T.cells", + "celltypist:Adult_COVID19_PBMC", + "celltypist:Adult_COVID19_PBMC:conf", "combat-global-0.5:entropy", "combat-global-0.5_leiden", "combat-global-1.0:entropy", "combat-global-1.0_leiden", "condition", - "harmony-global-0.5:entropy", - "harmony-global-0.5_leiden", - "harmony-global-1.0:entropy", - "harmony-global-1.0_leiden", + "hpca_celldex.tar.delta.next_hpca_celldex", + "hpca_celldex.tar.labels_hpca_celldex", + "hpca_celldex.tar.pruned.labels_hpca_celldex", + "hpca_celldex.tar.scores_hpca_celldex.Astrocyte", + "hpca_celldex.tar.scores_hpca_celldex.BM", + "hpca_celldex.tar.scores_hpca_celldex.BM...Prog.", + "hpca_celldex.tar.scores_hpca_celldex.B_cell", + "hpca_celldex.tar.scores_hpca_celldex.CMP", + "hpca_celldex.tar.scores_hpca_celldex.Chondrocytes", + "hpca_celldex.tar.scores_hpca_celldex.DC", + "hpca_celldex.tar.scores_hpca_celldex.Embryonic_stem_cells", + "hpca_celldex.tar.scores_hpca_celldex.Endothelial_cells", + "hpca_celldex.tar.scores_hpca_celldex.Epithelial_cells", + "hpca_celldex.tar.scores_hpca_celldex.Erythroblast", + "hpca_celldex.tar.scores_hpca_celldex.Fibroblasts", + "hpca_celldex.tar.scores_hpca_celldex.GMP", + "hpca_celldex.tar.scores_hpca_celldex.Gametocytes", + "hpca_celldex.tar.scores_hpca_celldex.HSC_.G.CSF", + "hpca_celldex.tar.scores_hpca_celldex.HSC_CD34.", + "hpca_celldex.tar.scores_hpca_celldex.Hepatocytes", + "hpca_celldex.tar.scores_hpca_celldex.Keratinocytes", + "hpca_celldex.tar.scores_hpca_celldex.MEP", + "hpca_celldex.tar.scores_hpca_celldex.MSC", + "hpca_celldex.tar.scores_hpca_celldex.Macrophage", + "hpca_celldex.tar.scores_hpca_celldex.Monocyte", + "hpca_celldex.tar.scores_hpca_celldex.Myelocyte", + "hpca_celldex.tar.scores_hpca_celldex.NK_cell", + "hpca_celldex.tar.scores_hpca_celldex.Neuroepithelial_cell", + "hpca_celldex.tar.scores_hpca_celldex.Neurons", + "hpca_celldex.tar.scores_hpca_celldex.Neutrophils", + "hpca_celldex.tar.scores_hpca_celldex.Osteoblasts", + "hpca_celldex.tar.scores_hpca_celldex.Platelets", + "hpca_celldex.tar.scores_hpca_celldex.Pre.B_cell_CD34.", + "hpca_celldex.tar.scores_hpca_celldex.Pro.B_cell_CD34.", + "hpca_celldex.tar.scores_hpca_celldex.Pro.Myelocyte", + "hpca_celldex.tar.scores_hpca_celldex.Smooth_muscle_cells", + "hpca_celldex.tar.scores_hpca_celldex.T_cells", + "hpca_celldex.tar.scores_hpca_celldex.Tissue_stem_cells", + "hpca_celldex.tar.scores_hpca_celldex.iPS_cells", + "immune_celldex.tar.delta.next_immune_celldex", + "immune_celldex.tar.labels_immune_celldex", + "immune_celldex.tar.pruned.labels_immune_celldex", + "immune_celldex.tar.scores_immune_celldex.Central.memory.CD8.T.cells", + "immune_celldex.tar.scores_immune_celldex.Classical.monocytes", + "immune_celldex.tar.scores_immune_celldex.Effector.memory.CD8.T.cells", + "immune_celldex.tar.scores_immune_celldex.Exhausted.B.cells", + "immune_celldex.tar.scores_immune_celldex.Follicular.helper.T.cells", + "immune_celldex.tar.scores_immune_celldex.Intermediate.monocytes", + "immune_celldex.tar.scores_immune_celldex.Low.density.basophils", + "immune_celldex.tar.scores_immune_celldex.Low.density.neutrophils", + "immune_celldex.tar.scores_immune_celldex.MAIT.cells", + "immune_celldex.tar.scores_immune_celldex.Myeloid.dendritic.cells", + "immune_celldex.tar.scores_immune_celldex.Naive.B.cells", + "immune_celldex.tar.scores_immune_celldex.Naive.CD4.T.cells", + "immune_celldex.tar.scores_immune_celldex.Naive.CD8.T.cells", + "immune_celldex.tar.scores_immune_celldex.Natural.killer.cells", + "immune_celldex.tar.scores_immune_celldex.Non.Vd2.gd.T.cells", + "immune_celldex.tar.scores_immune_celldex.Non.classical.monocytes", + "immune_celldex.tar.scores_immune_celldex.Non.switched.memory.B.cells", + "immune_celldex.tar.scores_immune_celldex.Plasmablasts", + "immune_celldex.tar.scores_immune_celldex.Plasmacytoid.dendritic.cells", + "immune_celldex.tar.scores_immune_celldex.Progenitor.cells", + "immune_celldex.tar.scores_immune_celldex.Switched.memory.B.cells", + "immune_celldex.tar.scores_immune_celldex.T.regulatory.cells", + "immune_celldex.tar.scores_immune_celldex.Terminal.effector.CD4.T.cells", + "immune_celldex.tar.scores_immune_celldex.Terminal.effector.CD8.T.cells", + "immune_celldex.tar.scores_immune_celldex.Th1.Th17.cells", + "immune_celldex.tar.scores_immune_celldex.Th1.cells", + "immune_celldex.tar.scores_immune_celldex.Th17.cells", + "immune_celldex.tar.scores_immune_celldex.Th2.cells", + "immune_celldex.tar.scores_immune_celldex.Vd2.gd.T.cells", "label", "n_counts", "n_genes", "n_genes_by_counts", + "pct_counts_hb", "pct_counts_mt", + "pct_counts_ribo", + "phase", "sample", "sample_original", "scvi-global-0.5:entropy", "scvi-global-0.5_leiden", "scvi-global-1.0:entropy", "scvi-global-1.0_leiden", + "seurat-global-0.5:entropy", + "seurat-global-0.5_leiden", + "seurat-global-1.0:entropy", + "seurat-global-1.0_leiden", + "symphony-global-0.5:entropy", + "symphony-global-0.5_leiden", + "symphony-global-1.0:entropy", + "symphony-global-1.0_leiden", "total_counts", - "total_counts_mt" + "total_counts_hb", + "total_counts_mt", + "total_counts_ribo" ] }, "var": { @@ -123,31 +276,33 @@ ], "uns": [ - "bbknn-global-0.5_characteristic_genes", + "bbknn-global-0.5_leiden_characteristic_genes", "bbknn-global-0.5_paga", - "bbknn-global-1.0_characteristic_genes", + "bbknn-global-1.0_leiden_characteristic_genes", "bbknn-global-1.0_paga", - "combat-global-0.5_characteristic_genes", - "combat-global-0.5_liana", + "combat-global-0.5_leiden_characteristic_genes", "combat-global-0.5_paga", - "combat-global-1.0_characteristic_genes", - "combat-global-1.0_liana", + "combat-global-1.0_leiden_characteristic_genes", "combat-global-1.0_paga", - "harmony-global-0.5_characteristic_genes", - "harmony-global-0.5_paga", - "harmony-global-1.0_characteristic_genes", - "harmony-global-1.0_paga", - "scvi-global-0.5_characteristic_genes", + "scvi-global-0.5_leiden_characteristic_genes", "scvi-global-0.5_paga", - "scvi-global-1.0_characteristic_genes", - "scvi-global-1.0_paga" + "scvi-global-1.0_leiden_characteristic_genes", + "scvi-global-1.0_paga", + "seurat-global-0.5_leiden_characteristic_genes", + "seurat-global-0.5_paga", + "seurat-global-1.0_leiden_characteristic_genes", + "seurat-global-1.0_paga", + "symphony-global-0.5_leiden_characteristic_genes", + "symphony-global-0.5_paga", + "symphony-global-1.0_leiden_characteristic_genes", + "symphony-global-1.0_paga" ] } ], - "timestamp": "2026-03-29T14:55:42.179483745", + "timestamp": "2026-05-29T11:36:58.752520992", "meta": { "nf-test": "0.9.4", - "nextflow": "25.10.2" + "nextflow": "26.04.0" } } } \ No newline at end of file diff --git a/modules/local/symphony/mapembedding/tests/main.nf.test.snap b/modules/local/symphony/mapembedding/tests/main.nf.test.snap index f46df979..5588ee0b 100644 --- a/modules/local/symphony/mapembedding/tests/main.nf.test.snap +++ b/modules/local/symphony/mapembedding/tests/main.nf.test.snap @@ -5,13 +5,13 @@ "0": [ [ { - "id": "harmony" + "id": "symphony" }, - "harmony.h5ad:md5,d41d8cd98f00b204e9800998ecf8427e" + "symphony.h5ad:md5,d41d8cd98f00b204e9800998ecf8427e" ] ], "1": [ - "X_harmony.pkl:md5,d41d8cd98f00b204e9800998ecf8427e" + "X_symphony.pkl:md5,d41d8cd98f00b204e9800998ecf8427e" ], "2": [ "versions.yml:md5,d41d8cd98f00b204e9800998ecf8427e" @@ -19,20 +19,20 @@ "h5ad": [ [ { - "id": "harmony" + "id": "symphony" }, - "harmony.h5ad:md5,d41d8cd98f00b204e9800998ecf8427e" + "symphony.h5ad:md5,d41d8cd98f00b204e9800998ecf8427e" ] ], "obsm": [ - "X_harmony.pkl:md5,d41d8cd98f00b204e9800998ecf8427e" + "X_symphony.pkl:md5,d41d8cd98f00b204e9800998ecf8427e" ], "versions": [ "versions.yml:md5,d41d8cd98f00b204e9800998ecf8427e" ] } ], - "timestamp": "2026-05-28T08:08:04.937392964", + "timestamp": "2026-05-28T16:31:40.664029252", "meta": { "nf-test": "0.9.4", "nextflow": "26.04.0" diff --git a/subworkflows/local/integrate/tests/main.nf.test.snap b/subworkflows/local/integrate/tests/main.nf.test.snap index 17d238af..14315dc9 100644 --- a/subworkflows/local/integrate/tests/main.nf.test.snap +++ b/subworkflows/local/integrate/tests/main.nf.test.snap @@ -1,59 +1,4 @@ { - "Should run without failures - harmony - stub": { - "content": [ - { - "0": [ - [ - { - "id": "harmony" - }, - "harmony.h5ad:md5,d41d8cd98f00b204e9800998ecf8427e" - ] - ], - "1": [ - - ], - "2": [ - - ], - "3": [ - "X_harmony.pkl:md5,d41d8cd98f00b204e9800998ecf8427e" - ], - "4": [ - "versions.yml:md5,d41d8cd98f00b204e9800998ecf8427e", - "versions.yml:md5,d41d8cd98f00b204e9800998ecf8427e", - "versions.yml:md5,d41d8cd98f00b204e9800998ecf8427e" - ], - "integrations": [ - [ - { - "id": "harmony" - }, - "harmony.h5ad:md5,d41d8cd98f00b204e9800998ecf8427e" - ] - ], - "obs": [ - - ], - "obsm": [ - "X_harmony.pkl:md5,d41d8cd98f00b204e9800998ecf8427e" - ], - "var": [ - - ], - "versions": [ - "versions.yml:md5,d41d8cd98f00b204e9800998ecf8427e", - "versions.yml:md5,d41d8cd98f00b204e9800998ecf8427e", - "versions.yml:md5,d41d8cd98f00b204e9800998ecf8427e" - ] - } - ], - "timestamp": "2026-03-28T23:05:37.694952307", - "meta": { - "nf-test": "0.9.4", - "nextflow": "25.10.2" - } - }, "Should run without failures - bbknn - stub": { "content": [ { @@ -239,15 +184,15 @@ "nextflow": "25.10.2" } }, - "Should run without failures - extension mode - stub": { + "Should run without failures - combat - stub": { "content": [ { "0": [ [ { - "id": "harmony" + "id": "combat" }, - "harmony.h5ad:md5,d41d8cd98f00b204e9800998ecf8427e" + "combat.h5ad:md5,d41d8cd98f00b204e9800998ecf8427e" ] ], "1": [ @@ -257,44 +202,48 @@ ], "3": [ - "X_harmony.pkl:md5,d41d8cd98f00b204e9800998ecf8427e" + "combat.pkl:md5,d41d8cd98f00b204e9800998ecf8427e" ], "4": [ + "versions.yml:md5,d41d8cd98f00b204e9800998ecf8427e", + "versions.yml:md5,d41d8cd98f00b204e9800998ecf8427e", "versions.yml:md5,d41d8cd98f00b204e9800998ecf8427e" ], "integrations": [ [ { - "id": "harmony" + "id": "combat" }, - "harmony.h5ad:md5,d41d8cd98f00b204e9800998ecf8427e" + "combat.h5ad:md5,d41d8cd98f00b204e9800998ecf8427e" ] ], "obs": [ ], "obsm": [ - "X_harmony.pkl:md5,d41d8cd98f00b204e9800998ecf8427e" + "combat.pkl:md5,d41d8cd98f00b204e9800998ecf8427e" ], "var": [ ], "versions": [ + "versions.yml:md5,d41d8cd98f00b204e9800998ecf8427e", + "versions.yml:md5,d41d8cd98f00b204e9800998ecf8427e", "versions.yml:md5,d41d8cd98f00b204e9800998ecf8427e" ] } ], - "timestamp": "2026-03-28T23:09:49.392744851", + "timestamp": "2026-03-25T15:49:26.334091777", "meta": { "nf-test": "0.9.4", "nextflow": "25.10.2" } }, - "Should run without failures - pca": { + "Should run without failures - bbknn": { "content": [ [ "versions.yml:md5,20020d8c9cf585aaa75dd5a14aa5d3ae", - "versions.yml:md5,87a2cb96724430656d9c1276e91e0208", + "versions.yml:md5,ccf730637c4c61a84ac4a002bf9832e0", "versions.yml:md5,d28b65c4c18c54e1abc34040b584b823" ], { @@ -339,39 +288,41 @@ "counts" ], "obsm": [ - "X_emb" + "X_pca" ], "varm": [ - "X_emb" + "PCs" ], "obsp": [ - + "connectivities", + "distances" ], "varp": [ ], "uns": [ - "X_emb", "hvg", - "log1p" + "log1p", + "neighbors", + "pca" ] } ], - "timestamp": "2026-05-28T14:04:36.10115423", + "timestamp": "2026-05-28T14:01:44.359301169", "meta": { "nf-test": "0.9.4", "nextflow": "26.04.0" } }, - "Should run without failures - combat - stub": { + "Should run without failures - symphony - stub": { "content": [ { "0": [ [ { - "id": "combat" + "id": "symphony" }, - "combat.h5ad:md5,d41d8cd98f00b204e9800998ecf8427e" + "symphony.h5ad:md5,d41d8cd98f00b204e9800998ecf8427e" ] ], "1": [ @@ -381,7 +332,7 @@ ], "3": [ - "combat.pkl:md5,d41d8cd98f00b204e9800998ecf8427e" + "X_symphony.pkl:md5,d41d8cd98f00b204e9800998ecf8427e" ], "4": [ "versions.yml:md5,d41d8cd98f00b204e9800998ecf8427e", @@ -391,16 +342,16 @@ "integrations": [ [ { - "id": "combat" + "id": "symphony" }, - "combat.h5ad:md5,d41d8cd98f00b204e9800998ecf8427e" + "symphony.h5ad:md5,d41d8cd98f00b204e9800998ecf8427e" ] ], "obs": [ ], "obsm": [ - "combat.pkl:md5,d41d8cd98f00b204e9800998ecf8427e" + "X_symphony.pkl:md5,d41d8cd98f00b204e9800998ecf8427e" ], "var": [ @@ -412,13 +363,13 @@ ] } ], - "timestamp": "2026-03-25T15:49:26.334091777", + "timestamp": "2026-05-28T16:31:59.971834646", "meta": { "nf-test": "0.9.4", - "nextflow": "25.10.2" + "nextflow": "26.04.0" } }, - "Should run without failures - harmony": { + "Should run without failures - symphony": { "content": [ [ "versions.yml:md5,0941a4daea5c41d9e3259be11e9f2263", @@ -485,17 +436,68 @@ ] } ], - "timestamp": "2026-05-28T14:18:52.042984469", + "timestamp": "2026-05-28T16:32:53.466053531", "meta": { "nf-test": "0.9.4", "nextflow": "26.04.0" } }, - "Should run without failures - bbknn": { + "Should run without failures - extension mode - stub": { + "content": [ + { + "0": [ + [ + { + "id": "symphony" + }, + "symphony.h5ad:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "1": [ + + ], + "2": [ + + ], + "3": [ + "X_symphony.pkl:md5,d41d8cd98f00b204e9800998ecf8427e" + ], + "4": [ + "versions.yml:md5,d41d8cd98f00b204e9800998ecf8427e" + ], + "integrations": [ + [ + { + "id": "symphony" + }, + "symphony.h5ad:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "obs": [ + + ], + "obsm": [ + "X_symphony.pkl:md5,d41d8cd98f00b204e9800998ecf8427e" + ], + "var": [ + + ], + "versions": [ + "versions.yml:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + } + ], + "timestamp": "2026-05-28T16:35:48.236148467", + "meta": { + "nf-test": "0.9.4", + "nextflow": "26.04.0" + } + }, + "Should run without failures - pca": { "content": [ [ "versions.yml:md5,20020d8c9cf585aaa75dd5a14aa5d3ae", - "versions.yml:md5,ccf730637c4c61a84ac4a002bf9832e0", + "versions.yml:md5,87a2cb96724430656d9c1276e91e0208", "versions.yml:md5,d28b65c4c18c54e1abc34040b584b823" ], { @@ -540,27 +542,25 @@ "counts" ], "obsm": [ - "X_pca" + "X_emb" ], "varm": [ - "PCs" + "X_emb" ], "obsp": [ - "connectivities", - "distances" + ], "varp": [ ], "uns": [ + "X_emb", "hvg", - "log1p", - "neighbors", - "pca" + "log1p" ] } ], - "timestamp": "2026-05-28T14:01:44.359301169", + "timestamp": "2026-05-28T14:04:36.10115423", "meta": { "nf-test": "0.9.4", "nextflow": "26.04.0" diff --git a/tests/default.nf.test.snap b/tests/default.nf.test.snap index eda1a9db..5fc73379 100644 --- a/tests/default.nf.test.snap +++ b/tests/default.nf.test.snap @@ -135,12 +135,6 @@ "python": "3.13.12", "scanpy": "1.12" }, - "SCANPY_HARMONY": { - "harmonypy": "0.2.0", - "pandas": "2.3.3", - "python": "3.13.12", - "scanpy": "1.12" - }, "SCANPY_HVGS": { "python": "3.13.12", "scanpy": "1.12" @@ -172,6 +166,12 @@ "SCVITOOLS_SOLO": { "scvi": "1.4.3" }, + "SYMPHONY_HARMONYINTEGRATE": { + "pandas": "2.3.3", + "python": "3.13.13", + "scanpy": "1.12.1", + "symphonypy": "0.2.4" + }, "UMAP": { "pandas": "2.3.3", "python": "3.13.12", @@ -237,14 +237,6 @@ "cluster_dimred/combat/leiden/combat-global-0.5_leiden.png", "cluster_dimred/combat/leiden/combat-global-1.0_leiden.png", "cluster_dimred/combat/umap", - "cluster_dimred/harmony", - "cluster_dimred/harmony/entropy", - "cluster_dimred/harmony/entropy/harmony-global-0.5_entropy.png", - "cluster_dimred/harmony/entropy/harmony-global-1.0_entropy.png", - "cluster_dimred/harmony/leiden", - "cluster_dimred/harmony/leiden/harmony-global-0.5_leiden.png", - "cluster_dimred/harmony/leiden/harmony-global-1.0_leiden.png", - "cluster_dimred/harmony/umap", "cluster_dimred/scvi", "cluster_dimred/scvi/entropy", "cluster_dimred/scvi/entropy/scvi-global-0.5_entropy.png", @@ -253,15 +245,25 @@ "cluster_dimred/scvi/leiden/scvi-global-0.5_leiden.png", "cluster_dimred/scvi/leiden/scvi-global-1.0_leiden.png", "cluster_dimred/scvi/umap", + "cluster_dimred/symphony", + "cluster_dimred/symphony/entropy", + "cluster_dimred/symphony/entropy/symphony-global-0.5_entropy.png", + "cluster_dimred/symphony/entropy/symphony-global-1.0_entropy.png", + "cluster_dimred/symphony/leiden", + "cluster_dimred/symphony/leiden/symphony-global-0.5_leiden.png", + "cluster_dimred/symphony/leiden/symphony-global-1.0_leiden.png", + "cluster_dimred/symphony/umap", "combine", "combine/integrate", "combine/integrate/scib_metrics", "combine/integrate/scib_metrics/combat_metrics.tsv", - "combine/integrate/scib_metrics/harmony_metrics.tsv", "combine/integrate/scib_metrics/scvi_metrics.tsv", + "combine/integrate/scib_metrics/symphony_metrics.tsv", "combine/integrate/scvi", "combine/integrate/scvi/scvi_model", "combine/integrate/scvi/scvi_model/model.pt", + "combine/integrate/symphony", + "combine/integrate/symphony/symphony_reference.h5ad", "combine/merge", "combine/merge/upset_genes.png", "finalized", @@ -305,7 +307,7 @@ "qc-report.qmd:md5,13061014a897b3fbdafd6ea3212df0e0" ] ], - "timestamp": "2026-05-11T22:49:53.648324922", + "timestamp": "2026-05-28T17:09:18.020969587", "meta": { "nf-test": "0.9.4", "nextflow": "26.04.0" diff --git a/tests/main_pipeline_build.nf.test.snap b/tests/main_pipeline_build.nf.test.snap index 02fca1ba..77005b00 100644 --- a/tests/main_pipeline_build.nf.test.snap +++ b/tests/main_pipeline_build.nf.test.snap @@ -135,12 +135,6 @@ "python": "3.13.12", "scanpy": "1.12" }, - "SCANPY_HARMONY": { - "harmonypy": "0.2.0", - "pandas": "2.3.3", - "python": "3.13.12", - "scanpy": "1.12" - }, "SCANPY_HVGS": { "python": "3.13.12", "scanpy": "1.12" @@ -174,6 +168,12 @@ "Seurat": "5.4.0", "anndataR": "1.0.2" }, + "SYMPHONY_HARMONYINTEGRATE": { + "pandas": "2.3.3", + "python": "3.13.13", + "scanpy": "1.12.1", + "symphonypy": "0.2.4" + }, "UMAP": { "pandas": "2.3.3", "python": "3.13.12", @@ -239,14 +239,6 @@ "cluster_dimred/combat/leiden/combat-global-0.5_leiden.png", "cluster_dimred/combat/leiden/combat-global-1.0_leiden.png", "cluster_dimred/combat/umap", - "cluster_dimred/harmony", - "cluster_dimred/harmony/entropy", - "cluster_dimred/harmony/entropy/harmony-global-0.5_entropy.png", - "cluster_dimred/harmony/entropy/harmony-global-1.0_entropy.png", - "cluster_dimred/harmony/leiden", - "cluster_dimred/harmony/leiden/harmony-global-0.5_leiden.png", - "cluster_dimred/harmony/leiden/harmony-global-1.0_leiden.png", - "cluster_dimred/harmony/umap", "cluster_dimred/scvi", "cluster_dimred/scvi/entropy", "cluster_dimred/scvi/entropy/scvi-global-0.5_entropy.png", @@ -263,16 +255,26 @@ "cluster_dimred/seurat/leiden/seurat-global-0.5_leiden.png", "cluster_dimred/seurat/leiden/seurat-global-1.0_leiden.png", "cluster_dimred/seurat/umap", + "cluster_dimred/symphony", + "cluster_dimred/symphony/entropy", + "cluster_dimred/symphony/entropy/symphony-global-0.5_entropy.png", + "cluster_dimred/symphony/entropy/symphony-global-1.0_entropy.png", + "cluster_dimred/symphony/leiden", + "cluster_dimred/symphony/leiden/symphony-global-0.5_leiden.png", + "cluster_dimred/symphony/leiden/symphony-global-1.0_leiden.png", + "cluster_dimred/symphony/umap", "combine", "combine/integrate", "combine/integrate/scib_metrics", "combine/integrate/scib_metrics/combat_metrics.tsv", - "combine/integrate/scib_metrics/harmony_metrics.tsv", "combine/integrate/scib_metrics/scvi_metrics.tsv", "combine/integrate/scib_metrics/seurat_metrics.tsv", + "combine/integrate/scib_metrics/symphony_metrics.tsv", "combine/integrate/scvi", "combine/integrate/scvi/scvi_model", "combine/integrate/scvi/scvi_model/model.pt", + "combine/integrate/symphony", + "combine/integrate/symphony/symphony_reference.h5ad", "combine/merge", "combine/merge/upset_genes.png", "finalized", @@ -316,7 +318,7 @@ "qc-report.qmd:md5,13061014a897b3fbdafd6ea3212df0e0" ] ], - "timestamp": "2026-05-11T23:09:11.341514791", + "timestamp": "2026-05-28T17:30:10.040067606", "meta": { "nf-test": "0.9.4", "nextflow": "26.04.0" diff --git a/tests/main_pipeline_extend.nf.test.snap b/tests/main_pipeline_extend.nf.test.snap index add7d274..400a7ed3 100644 --- a/tests/main_pipeline_extend.nf.test.snap +++ b/tests/main_pipeline_extend.nf.test.snap @@ -1,5 +1,5 @@ { - "Should perform scvi and harmony reference extension": { + "Should perform scvi and symphony reference extension": { "content": [ { "ADATA_EXTEND": { @@ -181,14 +181,6 @@ "celltypes/singler/SRR28679759_singler_immune_direct_heatmap.pdf", "celltypes/singler/SRR28679759_singler_predictions.csv", "cluster_dimred", - "cluster_dimred/harmony", - "cluster_dimred/harmony/entropy", - "cluster_dimred/harmony/entropy/harmony-global-0.5_entropy.png", - "cluster_dimred/harmony/entropy/harmony-global-1.0_entropy.png", - "cluster_dimred/harmony/leiden", - "cluster_dimred/harmony/leiden/harmony-global-0.5_leiden.png", - "cluster_dimred/harmony/leiden/harmony-global-1.0_leiden.png", - "cluster_dimred/harmony/umap", "cluster_dimred/scvi", "cluster_dimred/scvi/entropy", "cluster_dimred/scvi/entropy/scvi-global-0.5_entropy.png", @@ -197,11 +189,19 @@ "cluster_dimred/scvi/leiden/scvi-global-0.5_leiden.png", "cluster_dimred/scvi/leiden/scvi-global-1.0_leiden.png", "cluster_dimred/scvi/umap", + "cluster_dimred/symphony", + "cluster_dimred/symphony/entropy", + "cluster_dimred/symphony/entropy/symphony-global-0.5_entropy.png", + "cluster_dimred/symphony/entropy/symphony-global-1.0_entropy.png", + "cluster_dimred/symphony/leiden", + "cluster_dimred/symphony/leiden/symphony-global-0.5_leiden.png", + "cluster_dimred/symphony/leiden/symphony-global-1.0_leiden.png", + "cluster_dimred/symphony/umap", "combine", "combine/integrate", "combine/integrate/scib_metrics", - "combine/integrate/scib_metrics/harmony_metrics.tsv", "combine/integrate/scib_metrics/scvi_metrics.tsv", + "combine/integrate/scib_metrics/symphony_metrics.tsv", "combine/integrate/scvi", "combine/integrate/scvi/scvi_model", "combine/integrate/scvi/scvi_model/model.pt", @@ -243,10 +243,10 @@ "qc-report.qmd:md5,13061014a897b3fbdafd6ea3212df0e0" ] ], - "timestamp": "2026-05-28T11:14:08.38204652", + "timestamp": "2026-05-29T14:41:14.623955124", "meta": { "nf-test": "0.9.4", "nextflow": "26.04.0" } } -} +} \ No newline at end of file diff --git a/tests/main_pipeline_reference_mapping.nf.test.snap b/tests/main_pipeline_reference_mapping.nf.test.snap index 8353f4a6..e8795e97 100644 --- a/tests/main_pipeline_reference_mapping.nf.test.snap +++ b/tests/main_pipeline_reference_mapping.nf.test.snap @@ -1,5 +1,5 @@ { - "Should perform scvi and harmony reference mapping": { + "Should perform scvi and symphony reference mapping": { "content": [ { "ADATA_EXTEND": { @@ -181,14 +181,6 @@ "celltypes/singler/SRR28679759_singler_immune_direct_heatmap.pdf", "celltypes/singler/SRR28679759_singler_predictions.csv", "cluster_dimred", - "cluster_dimred/harmony", - "cluster_dimred/harmony/entropy", - "cluster_dimred/harmony/entropy/harmony-global-0.5_entropy.png", - "cluster_dimred/harmony/entropy/harmony-global-1.0_entropy.png", - "cluster_dimred/harmony/leiden", - "cluster_dimred/harmony/leiden/harmony-global-0.5_leiden.png", - "cluster_dimred/harmony/leiden/harmony-global-1.0_leiden.png", - "cluster_dimred/harmony/umap", "cluster_dimred/scvi", "cluster_dimred/scvi/entropy", "cluster_dimred/scvi/entropy/scvi-global-0.5_entropy.png", @@ -197,11 +189,19 @@ "cluster_dimred/scvi/leiden/scvi-global-0.5_leiden.png", "cluster_dimred/scvi/leiden/scvi-global-1.0_leiden.png", "cluster_dimred/scvi/umap", + "cluster_dimred/symphony", + "cluster_dimred/symphony/entropy", + "cluster_dimred/symphony/entropy/symphony-global-0.5_entropy.png", + "cluster_dimred/symphony/entropy/symphony-global-1.0_entropy.png", + "cluster_dimred/symphony/leiden", + "cluster_dimred/symphony/leiden/symphony-global-0.5_leiden.png", + "cluster_dimred/symphony/leiden/symphony-global-1.0_leiden.png", + "cluster_dimred/symphony/umap", "combine", "combine/integrate", "combine/integrate/scib_metrics", - "combine/integrate/scib_metrics/harmony_metrics.tsv", "combine/integrate/scib_metrics/scvi_metrics.tsv", + "combine/integrate/scib_metrics/symphony_metrics.tsv", "combine/integrate/scvi", "combine/integrate/scvi/scvi_model", "combine/integrate/scvi/scvi_model/model.pt", @@ -243,10 +243,10 @@ "qc-report.qmd:md5,13061014a897b3fbdafd6ea3212df0e0" ] ], - "timestamp": "2026-05-28T10:56:43.387329548", + "timestamp": "2026-05-29T14:31:02.484302876", "meta": { "nf-test": "0.9.4", "nextflow": "26.04.0" } } -} +} \ No newline at end of file diff --git a/tests/main_pipeline_sub.nf.test.snap b/tests/main_pipeline_sub.nf.test.snap index 36b85745..cb74498d 100644 --- a/tests/main_pipeline_sub.nf.test.snap +++ b/tests/main_pipeline_sub.nf.test.snap @@ -59,58 +59,24 @@ }, [ "adata", - "adata/combat.h5ad", - "adata/harmony.h5ad", - "adata/scvi.h5ad", + "adata/symphony.h5ad", "cluster_dimred", - "cluster_dimred/combat", - "cluster_dimred/combat/entropy", - "cluster_dimred/combat/entropy/combat-SRR28679756-0.5_entropy.png", - "cluster_dimred/combat/entropy/combat-SRR28679756-1.0_entropy.png", - "cluster_dimred/combat/entropy/combat-SRR28679757-0.5_entropy.png", - "cluster_dimred/combat/entropy/combat-SRR28679757-1.0_entropy.png", - "cluster_dimred/combat/entropy/combat-SRR28679758-0.5_entropy.png", - "cluster_dimred/combat/entropy/combat-SRR28679758-1.0_entropy.png", - "cluster_dimred/combat/leiden", - "cluster_dimred/combat/leiden/combat-SRR28679756-0.5_leiden.png", - "cluster_dimred/combat/leiden/combat-SRR28679756-1.0_leiden.png", - "cluster_dimred/combat/leiden/combat-SRR28679757-0.5_leiden.png", - "cluster_dimred/combat/leiden/combat-SRR28679757-1.0_leiden.png", - "cluster_dimred/combat/leiden/combat-SRR28679758-0.5_leiden.png", - "cluster_dimred/combat/leiden/combat-SRR28679758-1.0_leiden.png", - "cluster_dimred/combat/umap", - "cluster_dimred/harmony", - "cluster_dimred/harmony/entropy", - "cluster_dimred/harmony/entropy/harmony-SRR28679756-0.5_entropy.png", - "cluster_dimred/harmony/entropy/harmony-SRR28679756-1.0_entropy.png", - "cluster_dimred/harmony/entropy/harmony-SRR28679757-0.5_entropy.png", - "cluster_dimred/harmony/entropy/harmony-SRR28679757-1.0_entropy.png", - "cluster_dimred/harmony/entropy/harmony-SRR28679758-0.5_entropy.png", - "cluster_dimred/harmony/entropy/harmony-SRR28679758-1.0_entropy.png", - "cluster_dimred/harmony/leiden", - "cluster_dimred/harmony/leiden/harmony-SRR28679756-0.5_leiden.png", - "cluster_dimred/harmony/leiden/harmony-SRR28679756-1.0_leiden.png", - "cluster_dimred/harmony/leiden/harmony-SRR28679757-0.5_leiden.png", - "cluster_dimred/harmony/leiden/harmony-SRR28679757-1.0_leiden.png", - "cluster_dimred/harmony/leiden/harmony-SRR28679758-0.5_leiden.png", - "cluster_dimred/harmony/leiden/harmony-SRR28679758-1.0_leiden.png", - "cluster_dimred/harmony/umap", - "cluster_dimred/scvi", - "cluster_dimred/scvi/entropy", - "cluster_dimred/scvi/entropy/scvi-SRR28679756-0.5_entropy.png", - "cluster_dimred/scvi/entropy/scvi-SRR28679756-1.0_entropy.png", - "cluster_dimred/scvi/entropy/scvi-SRR28679757-0.5_entropy.png", - "cluster_dimred/scvi/entropy/scvi-SRR28679757-1.0_entropy.png", - "cluster_dimred/scvi/entropy/scvi-SRR28679758-0.5_entropy.png", - "cluster_dimred/scvi/entropy/scvi-SRR28679758-1.0_entropy.png", - "cluster_dimred/scvi/leiden", - "cluster_dimred/scvi/leiden/scvi-SRR28679756-0.5_leiden.png", - "cluster_dimred/scvi/leiden/scvi-SRR28679756-1.0_leiden.png", - "cluster_dimred/scvi/leiden/scvi-SRR28679757-0.5_leiden.png", - "cluster_dimred/scvi/leiden/scvi-SRR28679757-1.0_leiden.png", - "cluster_dimred/scvi/leiden/scvi-SRR28679758-0.5_leiden.png", - "cluster_dimred/scvi/leiden/scvi-SRR28679758-1.0_leiden.png", - "cluster_dimred/scvi/umap", + "cluster_dimred/symphony", + "cluster_dimred/symphony/entropy", + "cluster_dimred/symphony/entropy/symphony-SRR28679756-0.5_entropy.png", + "cluster_dimred/symphony/entropy/symphony-SRR28679756-1.0_entropy.png", + "cluster_dimred/symphony/entropy/symphony-SRR28679757-0.5_entropy.png", + "cluster_dimred/symphony/entropy/symphony-SRR28679757-1.0_entropy.png", + "cluster_dimred/symphony/entropy/symphony-SRR28679758-0.5_entropy.png", + "cluster_dimred/symphony/entropy/symphony-SRR28679758-1.0_entropy.png", + "cluster_dimred/symphony/leiden", + "cluster_dimred/symphony/leiden/symphony-SRR28679756-0.5_leiden.png", + "cluster_dimred/symphony/leiden/symphony-SRR28679756-1.0_leiden.png", + "cluster_dimred/symphony/leiden/symphony-SRR28679757-0.5_leiden.png", + "cluster_dimred/symphony/leiden/symphony-SRR28679757-1.0_leiden.png", + "cluster_dimred/symphony/leiden/symphony-SRR28679758-0.5_leiden.png", + "cluster_dimred/symphony/leiden/symphony-SRR28679758-1.0_leiden.png", + "cluster_dimred/symphony/umap", "finalized", "finalized/base.h5ad", "finalized/base.rds", @@ -145,7 +111,7 @@ "qc-report.qmd:md5,13061014a897b3fbdafd6ea3212df0e0" ] ], - "timestamp": "2026-05-11T23:50:17.469316064", + "timestamp": "2026-05-29T14:32:12.630005788", "meta": { "nf-test": "0.9.4", "nextflow": "26.04.0" From a7429c96c3d029476e80a113d784e7143a796ebd Mon Sep 17 00:00:00 2001 From: Nico Trummer Date: Fri, 29 May 2026 23:19:00 +0200 Subject: [PATCH 10/19] Use forked pipeline test data Point pipeline nf-tests back at the forked fixtures needed by existing tests and align the COMBINE test inputs with the current workflow signature. --- nextflow.config | 2 +- nextflow_schema.json | 2 +- subworkflows/local/combine/tests/main.nf.test | 10 ++++++---- tests/main_pipeline_extend.nf.test | 2 +- tests/main_pipeline_reference_mapping.nf.test | 2 +- tests/main_pipeline_sub.nf.test | 2 +- tests/nextflow.config | 2 +- 7 files changed, 12 insertions(+), 10 deletions(-) diff --git a/nextflow.config b/nextflow.config index 9e612096..1bfca071 100644 --- a/nextflow.config +++ b/nextflow.config @@ -113,7 +113,7 @@ params { help_full = false show_hidden = false version = false - pipelines_testdata_base_path = 'https://raw.githubusercontent.com/nf-core/test-datasets/e3a7f43eb802a090affac918026d2ba5dce8fcd5/' + pipelines_testdata_base_path = 'https://raw.githubusercontent.com/nictru/test-datasets/97addfb0946c0e51dbb70ee1391142d12e70f085/' trace_report_suffix = new java.util.Date().format( 'yyyy-MM-dd_HH-mm-ss') // Config options config_profile_name = null diff --git a/nextflow_schema.json b/nextflow_schema.json index 762c1fc3..a180dba5 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -577,7 +577,7 @@ "type": "string", "fa_icon": "far fa-check-circle", "description": "Base URL or local path to location of pipeline test dataset files", - "default": "https://raw.githubusercontent.com/nf-core/test-datasets/e3a7f43eb802a090affac918026d2ba5dce8fcd5/", + "default": "https://raw.githubusercontent.com/nictru/test-datasets/97addfb0946c0e51dbb70ee1391142d12e70f085/", "hidden": true }, "trace_report_suffix": { diff --git a/subworkflows/local/combine/tests/main.nf.test b/subworkflows/local/combine/tests/main.nf.test index 836d14d3..e8446503 100644 --- a/subworkflows/local/combine/tests/main.nf.test +++ b/subworkflows/local/combine/tests/main.nf.test @@ -32,8 +32,9 @@ nextflow_workflow { input[9] = '' input[10] = 'https://zenodo.org/records/10685499/files/model_v1.1.tar.gz' input[11] = null - input[12] = 'condition' - input[13] = false + input[12] = null + input[13] = 'condition' + input[14] = false """ } } @@ -68,8 +69,9 @@ nextflow_workflow { input[9] = '' input[10] = 'https://zenodo.org/records/10685499/files/model_v1.1.tar.gz' input[11] = null - input[12] = 'condition' - input[13] = false + input[12] = null + input[13] = 'condition' + input[14] = false """ } } diff --git a/tests/main_pipeline_extend.nf.test b/tests/main_pipeline_extend.nf.test index d549b821..bc311d04 100644 --- a/tests/main_pipeline_extend.nf.test +++ b/tests/main_pipeline_extend.nf.test @@ -8,7 +8,7 @@ nextflow_pipeline { when { params { - pipelines_testdata_base_path = 'https://raw.githubusercontent.com/nf-core/test-datasets/e3a7f43eb802a090affac918026d2ba5dce8fcd5/' + pipelines_testdata_base_path = 'https://raw.githubusercontent.com/nictru/test-datasets/e3a7f43eb802a090affac918026d2ba5dce8fcd5/' input = pipelines_testdata_base_path + 'samplesheet_single.csv' integration_methods = 'scvi,symphony' doublet_detection = 'scrublet,scdblfinder' diff --git a/tests/main_pipeline_reference_mapping.nf.test b/tests/main_pipeline_reference_mapping.nf.test index fdaa3411..e7ccbe44 100644 --- a/tests/main_pipeline_reference_mapping.nf.test +++ b/tests/main_pipeline_reference_mapping.nf.test @@ -8,7 +8,7 @@ nextflow_pipeline { when { params { - pipelines_testdata_base_path = 'https://raw.githubusercontent.com/nf-core/test-datasets/e3a7f43eb802a090affac918026d2ba5dce8fcd5/' + pipelines_testdata_base_path = 'https://raw.githubusercontent.com/nictru/test-datasets/e3a7f43eb802a090affac918026d2ba5dce8fcd5/' input = pipelines_testdata_base_path + 'samplesheet_single.csv' integration_methods = 'scvi,symphony' doublet_detection = 'scrublet,scdblfinder' diff --git a/tests/main_pipeline_sub.nf.test b/tests/main_pipeline_sub.nf.test index 5bfb2b79..03758609 100644 --- a/tests/main_pipeline_sub.nf.test +++ b/tests/main_pipeline_sub.nf.test @@ -8,7 +8,7 @@ nextflow_pipeline { when { params { - pipelines_testdata_base_path = 'https://raw.githubusercontent.com/nf-core/test-datasets/e3a7f43eb802a090affac918026d2ba5dce8fcd5/' + pipelines_testdata_base_path = 'https://raw.githubusercontent.com/nictru/test-datasets/e3a7f43eb802a090affac918026d2ba5dce8fcd5/' outdir = "$outputDir" input = null base_adata = pipelines_testdata_base_path + 'extension_base/merged.h5ad' diff --git a/tests/nextflow.config b/tests/nextflow.config index 71b5b6be..0ab28b9b 100644 --- a/tests/nextflow.config +++ b/tests/nextflow.config @@ -8,7 +8,7 @@ // Or any resources requirements params { modules_testdata_base_path = 'https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/' - pipelines_testdata_base_path = 'https://raw.githubusercontent.com/nf-core/test-datasets/e3a7f43eb802a090affac918026d2ba5dce8fcd5/' + pipelines_testdata_base_path = 'https://raw.githubusercontent.com/nictru/test-datasets/97addfb0946c0e51dbb70ee1391142d12e70f085/' // CyteType is slow (remote LLM API); module nf-tests cover it — keep off in pipeline/subworkflow nf-tests cytetype_study_context = '' } From 3b3d863c2979af20ef9911ea98eda44a8877aba6 Mon Sep 17 00:00:00 2001 From: Nico Trummer Date: Sat, 30 May 2026 23:07:50 +0200 Subject: [PATCH 11/19] Fix cellbender merge gene alignment for mixed ID columns. Support gene_ids and gene_id in filtered matrices and fall back to var index when neither is present, fixing AMBIENT_CORRECTION tests on symbol-only h5ad inputs. --- modules/nf-core/cellbender/merge/templates/merge.py | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/modules/nf-core/cellbender/merge/templates/merge.py b/modules/nf-core/cellbender/merge/templates/merge.py index a15ada0d..8d424df5 100644 --- a/modules/nf-core/cellbender/merge/templates/merge.py +++ b/modules/nf-core/cellbender/merge/templates/merge.py @@ -30,11 +30,14 @@ def format_yaml_like(data: dict, indent: int = 0) -> str: adata_cellbender = load_anndata_from_input_and_output("${unfiltered}", "${cellbender_h5}", analyzed_barcodes_only=False) # Subset to the barcodes and genes present in the filtered matrix. -# Gene symbols (var index) may not be unique, so align on Ensembl IDs. -# The filtered h5ad uses 'gene_ids'; load_anndata_from_input_and_output uses 'gene_id'. -gene_id_col = "gene_id" if "gene_id" in adata_cellbender.var.columns else adata_cellbender.var.index.name -cb_id_to_pos = {gid: i for i, gid in enumerate(adata_cellbender.var[gene_id_col])} -var_positions = [cb_id_to_pos[gid] for gid in adata.var["gene_ids"]] +# Gene symbols (var index) may not be unique, so prefer Ensembl IDs when present. +# Column names differ: 10x/readh5 uses 'gene_ids'; unify/cellbender uses 'gene_id'. +filtered_gene_id_col = next((col for col in ("gene_ids", "gene_id") if col in adata.var.columns), None) +cellbender_gene_id_col = next((col for col in ("gene_id", "gene_ids") if col in adata_cellbender.var.columns), None) +filtered_ids = adata.var[filtered_gene_id_col] if filtered_gene_id_col else adata.var.index +cellbender_ids = adata_cellbender.var[cellbender_gene_id_col] if filtered_gene_id_col and cellbender_gene_id_col else adata_cellbender.var.index +cb_id_to_pos = {gid: i for i, gid in enumerate(cellbender_ids)} +var_positions = [cb_id_to_pos[gid] for gid in filtered_ids] adata_cellbender = adata_cellbender[adata.obs_names, var_positions] if "${output_layer}" == "X": From f31f4842a399a373198ffff5d366dc068120a161 Mon Sep 17 00:00:00 2001 From: Nico Trummer Date: Sun, 31 May 2026 08:51:19 +0200 Subject: [PATCH 12/19] Update SEURAT_INTEGRATION container with glmGamPoi and Seurat 5.5. Add bioconductor-glmgampoi and bump r-seurat so SCTransform uses the supported v2 backend in the Wave image. --- modules/local/seurat/integration/environment.yml | 3 ++- modules/local/seurat/integration/main.nf | 4 ++-- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/modules/local/seurat/integration/environment.yml b/modules/local/seurat/integration/environment.yml index f6bd79fc..d362a6f1 100644 --- a/modules/local/seurat/integration/environment.yml +++ b/modules/local/seurat/integration/environment.yml @@ -3,5 +3,6 @@ channels: - bioconda dependencies: - bioconda::bioconductor-anndatar=1.0.2 + - bioconda::bioconductor-glmgampoi=1.22.0 - bioconda::bioconductor-rhdf5=2.54.1 - - conda-forge::r-seurat=5.4.0 + - conda-forge::r-seurat=5.5.0 diff --git a/modules/local/seurat/integration/main.nf b/modules/local/seurat/integration/main.nf index 3cc2605c..33ff14fc 100644 --- a/modules/local/seurat/integration/main.nf +++ b/modules/local/seurat/integration/main.nf @@ -4,8 +4,8 @@ process SEURAT_INTEGRATION { conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine in ['singularity', 'apptainer'] && !task.ext.singularity_pull_docker_container ? - 'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/b4/b4393c608e642b1232cd7bb84e6c5d7620c4b167462f342a4780307e5e67596b/data': - 'community.wave.seqera.io/library/bioconductor-anndatar_bioconductor-rhdf5_r-seurat:71809468c7d8a963' }" + 'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/7b/7bbad8d18ada67c2ca1dfaec11c5acb0fcd355713fec10331b0e202f1d6165f1/data': + 'community.wave.seqera.io/library/bioconductor-anndatar_bioconductor-glmgampoi_bioconductor-rhdf5_r-seurat:a0acfd4813d44adc' }" input: tuple val(meta), path(h5ad) From be87838905b71bbb9e06f452945ab24bad8b0456 Mon Sep 17 00:00:00 2001 From: Nico Trummer Date: Sun, 31 May 2026 08:51:25 +0200 Subject: [PATCH 13/19] Align SEURAT_INTEGRATION nf-test with pipeline QC gene filtering. Run SCANPY_FILTER on raw counts before HVG selection and refresh integration and combat snapshots. --- .../scanpy/combat/tests/main.nf.test.snap | 4 +-- .../seurat/integration/tests/main.nf.test | 27 +++++++++++++++---- .../integration/tests/main.nf.test.snap | 10 +++---- 3 files changed, 29 insertions(+), 12 deletions(-) diff --git a/modules/local/scanpy/combat/tests/main.nf.test.snap b/modules/local/scanpy/combat/tests/main.nf.test.snap index c3becdf6..67c00aa4 100644 --- a/modules/local/scanpy/combat/tests/main.nf.test.snap +++ b/modules/local/scanpy/combat/tests/main.nf.test.snap @@ -56,7 +56,7 @@ }, { "n_obs": 38234, - "n_vars": 100, + "n_vars": 101, "obs": { "index": "_index", "columns": [ @@ -95,7 +95,7 @@ ] } ], - "timestamp": "2026-05-11T12:33:40.280258286", + "timestamp": "2026-05-31T07:26:05.351659325", "meta": { "nf-test": "0.9.4", "nextflow": "26.04.0" diff --git a/modules/local/seurat/integration/tests/main.nf.test b/modules/local/seurat/integration/tests/main.nf.test index eac29227..8fac539d 100644 --- a/modules/local/seurat/integration/tests/main.nf.test +++ b/modules/local/seurat/integration/tests/main.nf.test @@ -8,8 +8,8 @@ nextflow_process { tag "modules_local" setup { - run("SCANPY_HVGS") { - script "modules/local/scanpy/hvgs/main.nf" + run("SCANPY_FILTER", alias: "QC_FILTER") { + script "modules/local/scanpy/filter/main.nf" process { """ input[0] = channel.of([ @@ -17,12 +17,29 @@ nextflow_process { file(params.modules_testdata_base_path + 'genomics/homo_sapiens/scrnaseq/h5ad/combined_filtered_matrix.h5ad', checkIfExists: true) ] ) + input[1] = "index" + input[2] = 20 + input[3] = 20 + input[4] = 50 + input[5] = 50 + input[6] = 100 + input[7] = 0 + input[8] = 100 + input[9] = [] + """ + } + } + run("SCANPY_HVGS") { + script "modules/local/scanpy/hvgs/main.nf" + process { + """ + input[0] = QC_FILTER.out.h5ad input[1] = 100 input[2] = [] """ } } - run("SCANPY_FILTER") { + run("SCANPY_FILTER", alias: "HVG_FILTER") { script "modules/local/scanpy/filter/main.nf" process { """ @@ -49,7 +66,7 @@ nextflow_process { } process { """ - input[0] = SCANPY_FILTER.out.h5ad + input[0] = HVG_FILTER.out.h5ad input[1] = 'sample' """ } @@ -79,7 +96,7 @@ nextflow_process { } process { """ - input[0] = SCANPY_FILTER.out.h5ad + input[0] = HVG_FILTER.out.h5ad input[1] = 'sample' """ } diff --git a/modules/local/seurat/integration/tests/main.nf.test.snap b/modules/local/seurat/integration/tests/main.nf.test.snap index 1957b1a4..cb67cdc5 100644 --- a/modules/local/seurat/integration/tests/main.nf.test.snap +++ b/modules/local/seurat/integration/tests/main.nf.test.snap @@ -37,12 +37,12 @@ { "SEURAT_INTEGRATION": { "R": "4.5.3", - "Seurat": "5.4.0", + "Seurat": "5.5.0", "anndataR": "1.0.2" } }, { - "n_obs": 27350, + "n_obs": 12381, "n_vars": 100, "obs": { "index": "_index", @@ -99,10 +99,10 @@ ] } ], - "timestamp": "2026-04-12T20:15:34.917181", + "timestamp": "2026-05-31T08:44:01.475540778", "meta": { - "nf-test": "0.9.5", - "nextflow": "25.10.4" + "nf-test": "0.9.4", + "nextflow": "26.04.0" } } } \ No newline at end of file From 150d30c088f7a4a8a077e0d303a1846746350d98 Mon Sep 17 00:00:00 2001 From: Nico Trummer Date: Sun, 31 May 2026 10:15:23 +0200 Subject: [PATCH 14/19] Update test snapshots --- .../prepcellxgene/tests/main.nf.test.snap | 210 +++--------------- .../splitembeddings/tests/main.nf.test.snap | 203 ++--------------- .../local/scanpy/hvgs/tests/main.nf.test.snap | 6 +- 3 files changed, 54 insertions(+), 365 deletions(-) diff --git a/modules/local/adata/prepcellxgene/tests/main.nf.test.snap b/modules/local/adata/prepcellxgene/tests/main.nf.test.snap index e2982530..8fa47513 100644 --- a/modules/local/adata/prepcellxgene/tests/main.nf.test.snap +++ b/modules/local/adata/prepcellxgene/tests/main.nf.test.snap @@ -40,7 +40,7 @@ { "id": "test" }, - "test.h5ad:md5,e213f1b004bae37e440c83b3966890f3" + "test.h5ad:md5,1ea9af3fd7a7908e99d6a0ec04f62b89" ] ], "1": [ @@ -51,7 +51,7 @@ { "id": "test" }, - "test.h5ad:md5,e213f1b004bae37e440c83b3966890f3" + "test.h5ad:md5,1ea9af3fd7a7908e99d6a0ec04f62b89" ] ], "versions": [ @@ -67,193 +67,40 @@ } }, { - "n_obs": 32135, + "n_obs": 23364, "n_vars": 9887, "obs": { "index": "_index", "columns": [ - "G2M_score", - "S_score", "batch", "bbknn-global-0.5:entropy", "bbknn-global-0.5_leiden", "bbknn-global-1.0:entropy", "bbknn-global-1.0_leiden", - "celldex_hpca__2024.02.26_h5_se.tar.delta.next_hpca_direct", - "celldex_hpca__2024.02.26_h5_se.tar.labels_hpca_direct", - "celldex_hpca__2024.02.26_h5_se.tar.pruned.labels_hpca_direct", - "celldex_hpca__2024.02.26_h5_se.tar.scores_hpca_direct.Astrocyte", - "celldex_hpca__2024.02.26_h5_se.tar.scores_hpca_direct.BM", - "celldex_hpca__2024.02.26_h5_se.tar.scores_hpca_direct.BM...Prog.", - "celldex_hpca__2024.02.26_h5_se.tar.scores_hpca_direct.B_cell", - "celldex_hpca__2024.02.26_h5_se.tar.scores_hpca_direct.CMP", - "celldex_hpca__2024.02.26_h5_se.tar.scores_hpca_direct.Chondrocytes", - "celldex_hpca__2024.02.26_h5_se.tar.scores_hpca_direct.DC", - "celldex_hpca__2024.02.26_h5_se.tar.scores_hpca_direct.Embryonic_stem_cells", - "celldex_hpca__2024.02.26_h5_se.tar.scores_hpca_direct.Endothelial_cells", - "celldex_hpca__2024.02.26_h5_se.tar.scores_hpca_direct.Epithelial_cells", - "celldex_hpca__2024.02.26_h5_se.tar.scores_hpca_direct.Erythroblast", - "celldex_hpca__2024.02.26_h5_se.tar.scores_hpca_direct.Fibroblasts", - "celldex_hpca__2024.02.26_h5_se.tar.scores_hpca_direct.GMP", - "celldex_hpca__2024.02.26_h5_se.tar.scores_hpca_direct.Gametocytes", - "celldex_hpca__2024.02.26_h5_se.tar.scores_hpca_direct.HSC_.G.CSF", - "celldex_hpca__2024.02.26_h5_se.tar.scores_hpca_direct.HSC_CD34.", - "celldex_hpca__2024.02.26_h5_se.tar.scores_hpca_direct.Hepatocytes", - "celldex_hpca__2024.02.26_h5_se.tar.scores_hpca_direct.Keratinocytes", - "celldex_hpca__2024.02.26_h5_se.tar.scores_hpca_direct.MEP", - "celldex_hpca__2024.02.26_h5_se.tar.scores_hpca_direct.MSC", - "celldex_hpca__2024.02.26_h5_se.tar.scores_hpca_direct.Macrophage", - "celldex_hpca__2024.02.26_h5_se.tar.scores_hpca_direct.Monocyte", - "celldex_hpca__2024.02.26_h5_se.tar.scores_hpca_direct.Myelocyte", - "celldex_hpca__2024.02.26_h5_se.tar.scores_hpca_direct.NK_cell", - "celldex_hpca__2024.02.26_h5_se.tar.scores_hpca_direct.Neuroepithelial_cell", - "celldex_hpca__2024.02.26_h5_se.tar.scores_hpca_direct.Neurons", - "celldex_hpca__2024.02.26_h5_se.tar.scores_hpca_direct.Neutrophils", - "celldex_hpca__2024.02.26_h5_se.tar.scores_hpca_direct.Osteoblasts", - "celldex_hpca__2024.02.26_h5_se.tar.scores_hpca_direct.Platelets", - "celldex_hpca__2024.02.26_h5_se.tar.scores_hpca_direct.Pre.B_cell_CD34.", - "celldex_hpca__2024.02.26_h5_se.tar.scores_hpca_direct.Pro.B_cell_CD34.", - "celldex_hpca__2024.02.26_h5_se.tar.scores_hpca_direct.Pro.Myelocyte", - "celldex_hpca__2024.02.26_h5_se.tar.scores_hpca_direct.Smooth_muscle_cells", - "celldex_hpca__2024.02.26_h5_se.tar.scores_hpca_direct.T_cells", - "celldex_hpca__2024.02.26_h5_se.tar.scores_hpca_direct.Tissue_stem_cells", - "celldex_hpca__2024.02.26_h5_se.tar.scores_hpca_direct.iPS_cells", - "celldex_monaco_immune__2024.02.26_h5_se.tar.delta.next_immune_direct", - "celldex_monaco_immune__2024.02.26_h5_se.tar.labels_immune_direct", - "celldex_monaco_immune__2024.02.26_h5_se.tar.pruned.labels_immune_direct", - "celldex_monaco_immune__2024.02.26_h5_se.tar.scores_immune_direct.Central.memory.CD8.T.cells", - "celldex_monaco_immune__2024.02.26_h5_se.tar.scores_immune_direct.Classical.monocytes", - "celldex_monaco_immune__2024.02.26_h5_se.tar.scores_immune_direct.Effector.memory.CD8.T.cells", - "celldex_monaco_immune__2024.02.26_h5_se.tar.scores_immune_direct.Exhausted.B.cells", - "celldex_monaco_immune__2024.02.26_h5_se.tar.scores_immune_direct.Follicular.helper.T.cells", - "celldex_monaco_immune__2024.02.26_h5_se.tar.scores_immune_direct.Intermediate.monocytes", - "celldex_monaco_immune__2024.02.26_h5_se.tar.scores_immune_direct.Low.density.basophils", - "celldex_monaco_immune__2024.02.26_h5_se.tar.scores_immune_direct.Low.density.neutrophils", - "celldex_monaco_immune__2024.02.26_h5_se.tar.scores_immune_direct.MAIT.cells", - "celldex_monaco_immune__2024.02.26_h5_se.tar.scores_immune_direct.Myeloid.dendritic.cells", - "celldex_monaco_immune__2024.02.26_h5_se.tar.scores_immune_direct.Naive.B.cells", - "celldex_monaco_immune__2024.02.26_h5_se.tar.scores_immune_direct.Naive.CD4.T.cells", - "celldex_monaco_immune__2024.02.26_h5_se.tar.scores_immune_direct.Naive.CD8.T.cells", - "celldex_monaco_immune__2024.02.26_h5_se.tar.scores_immune_direct.Natural.killer.cells", - "celldex_monaco_immune__2024.02.26_h5_se.tar.scores_immune_direct.Non.Vd2.gd.T.cells", - "celldex_monaco_immune__2024.02.26_h5_se.tar.scores_immune_direct.Non.classical.monocytes", - "celldex_monaco_immune__2024.02.26_h5_se.tar.scores_immune_direct.Non.switched.memory.B.cells", - "celldex_monaco_immune__2024.02.26_h5_se.tar.scores_immune_direct.Plasmablasts", - "celldex_monaco_immune__2024.02.26_h5_se.tar.scores_immune_direct.Plasmacytoid.dendritic.cells", - "celldex_monaco_immune__2024.02.26_h5_se.tar.scores_immune_direct.Progenitor.cells", - "celldex_monaco_immune__2024.02.26_h5_se.tar.scores_immune_direct.Switched.memory.B.cells", - "celldex_monaco_immune__2024.02.26_h5_se.tar.scores_immune_direct.T.regulatory.cells", - "celldex_monaco_immune__2024.02.26_h5_se.tar.scores_immune_direct.Terminal.effector.CD4.T.cells", - "celldex_monaco_immune__2024.02.26_h5_se.tar.scores_immune_direct.Terminal.effector.CD8.T.cells", - "celldex_monaco_immune__2024.02.26_h5_se.tar.scores_immune_direct.Th1.Th17.cells", - "celldex_monaco_immune__2024.02.26_h5_se.tar.scores_immune_direct.Th1.cells", - "celldex_monaco_immune__2024.02.26_h5_se.tar.scores_immune_direct.Th17.cells", - "celldex_monaco_immune__2024.02.26_h5_se.tar.scores_immune_direct.Th2.cells", - "celldex_monaco_immune__2024.02.26_h5_se.tar.scores_immune_direct.Vd2.gd.T.cells", - "celltypist:Adult_COVID19_PBMC", - "celltypist:Adult_COVID19_PBMC:conf", + "celltypist:Adult_Human_Skin", + "celltypist:Adult_Human_Skin:conf", "combat-global-0.5:entropy", "combat-global-0.5_leiden", "combat-global-1.0:entropy", "combat-global-1.0_leiden", "condition", - "hpca_celldex.tar.delta.next_hpca_celldex", - "hpca_celldex.tar.labels_hpca_celldex", - "hpca_celldex.tar.pruned.labels_hpca_celldex", - "hpca_celldex.tar.scores_hpca_celldex.Astrocyte", - "hpca_celldex.tar.scores_hpca_celldex.BM", - "hpca_celldex.tar.scores_hpca_celldex.BM...Prog.", - "hpca_celldex.tar.scores_hpca_celldex.B_cell", - "hpca_celldex.tar.scores_hpca_celldex.CMP", - "hpca_celldex.tar.scores_hpca_celldex.Chondrocytes", - "hpca_celldex.tar.scores_hpca_celldex.DC", - "hpca_celldex.tar.scores_hpca_celldex.Embryonic_stem_cells", - "hpca_celldex.tar.scores_hpca_celldex.Endothelial_cells", - "hpca_celldex.tar.scores_hpca_celldex.Epithelial_cells", - "hpca_celldex.tar.scores_hpca_celldex.Erythroblast", - "hpca_celldex.tar.scores_hpca_celldex.Fibroblasts", - "hpca_celldex.tar.scores_hpca_celldex.GMP", - "hpca_celldex.tar.scores_hpca_celldex.Gametocytes", - "hpca_celldex.tar.scores_hpca_celldex.HSC_.G.CSF", - "hpca_celldex.tar.scores_hpca_celldex.HSC_CD34.", - "hpca_celldex.tar.scores_hpca_celldex.Hepatocytes", - "hpca_celldex.tar.scores_hpca_celldex.Keratinocytes", - "hpca_celldex.tar.scores_hpca_celldex.MEP", - "hpca_celldex.tar.scores_hpca_celldex.MSC", - "hpca_celldex.tar.scores_hpca_celldex.Macrophage", - "hpca_celldex.tar.scores_hpca_celldex.Monocyte", - "hpca_celldex.tar.scores_hpca_celldex.Myelocyte", - "hpca_celldex.tar.scores_hpca_celldex.NK_cell", - "hpca_celldex.tar.scores_hpca_celldex.Neuroepithelial_cell", - "hpca_celldex.tar.scores_hpca_celldex.Neurons", - "hpca_celldex.tar.scores_hpca_celldex.Neutrophils", - "hpca_celldex.tar.scores_hpca_celldex.Osteoblasts", - "hpca_celldex.tar.scores_hpca_celldex.Platelets", - "hpca_celldex.tar.scores_hpca_celldex.Pre.B_cell_CD34.", - "hpca_celldex.tar.scores_hpca_celldex.Pro.B_cell_CD34.", - "hpca_celldex.tar.scores_hpca_celldex.Pro.Myelocyte", - "hpca_celldex.tar.scores_hpca_celldex.Smooth_muscle_cells", - "hpca_celldex.tar.scores_hpca_celldex.T_cells", - "hpca_celldex.tar.scores_hpca_celldex.Tissue_stem_cells", - "hpca_celldex.tar.scores_hpca_celldex.iPS_cells", - "immune_celldex.tar.delta.next_immune_celldex", - "immune_celldex.tar.labels_immune_celldex", - "immune_celldex.tar.pruned.labels_immune_celldex", - "immune_celldex.tar.scores_immune_celldex.Central.memory.CD8.T.cells", - "immune_celldex.tar.scores_immune_celldex.Classical.monocytes", - "immune_celldex.tar.scores_immune_celldex.Effector.memory.CD8.T.cells", - "immune_celldex.tar.scores_immune_celldex.Exhausted.B.cells", - "immune_celldex.tar.scores_immune_celldex.Follicular.helper.T.cells", - "immune_celldex.tar.scores_immune_celldex.Intermediate.monocytes", - "immune_celldex.tar.scores_immune_celldex.Low.density.basophils", - "immune_celldex.tar.scores_immune_celldex.Low.density.neutrophils", - "immune_celldex.tar.scores_immune_celldex.MAIT.cells", - "immune_celldex.tar.scores_immune_celldex.Myeloid.dendritic.cells", - "immune_celldex.tar.scores_immune_celldex.Naive.B.cells", - "immune_celldex.tar.scores_immune_celldex.Naive.CD4.T.cells", - "immune_celldex.tar.scores_immune_celldex.Naive.CD8.T.cells", - "immune_celldex.tar.scores_immune_celldex.Natural.killer.cells", - "immune_celldex.tar.scores_immune_celldex.Non.Vd2.gd.T.cells", - "immune_celldex.tar.scores_immune_celldex.Non.classical.monocytes", - "immune_celldex.tar.scores_immune_celldex.Non.switched.memory.B.cells", - "immune_celldex.tar.scores_immune_celldex.Plasmablasts", - "immune_celldex.tar.scores_immune_celldex.Plasmacytoid.dendritic.cells", - "immune_celldex.tar.scores_immune_celldex.Progenitor.cells", - "immune_celldex.tar.scores_immune_celldex.Switched.memory.B.cells", - "immune_celldex.tar.scores_immune_celldex.T.regulatory.cells", - "immune_celldex.tar.scores_immune_celldex.Terminal.effector.CD4.T.cells", - "immune_celldex.tar.scores_immune_celldex.Terminal.effector.CD8.T.cells", - "immune_celldex.tar.scores_immune_celldex.Th1.Th17.cells", - "immune_celldex.tar.scores_immune_celldex.Th1.cells", - "immune_celldex.tar.scores_immune_celldex.Th17.cells", - "immune_celldex.tar.scores_immune_celldex.Th2.cells", - "immune_celldex.tar.scores_immune_celldex.Vd2.gd.T.cells", + "harmony-global-0.5:entropy", + "harmony-global-0.5_leiden", + "harmony-global-1.0:entropy", + "harmony-global-1.0_leiden", "label", "n_counts", "n_genes", "n_genes_by_counts", - "pct_counts_hb", "pct_counts_mt", - "pct_counts_ribo", - "phase", "sample", "sample_original", "scvi-global-0.5:entropy", "scvi-global-0.5_leiden", "scvi-global-1.0:entropy", "scvi-global-1.0_leiden", - "seurat-global-0.5:entropy", - "seurat-global-0.5_leiden", - "seurat-global-1.0:entropy", - "seurat-global-1.0_leiden", - "symphony-global-0.5:entropy", - "symphony-global-0.5_leiden", - "symphony-global-1.0:entropy", - "symphony-global-1.0_leiden", "total_counts", - "total_counts_hb", - "total_counts_mt", - "total_counts_ribo" + "total_counts_mt" ] }, "var": { @@ -268,12 +115,11 @@ "obsm": [ "X_bbknn-global_umap", "X_combat-global_umap", + "X_harmony", + "X_harmony-global_umap", "X_scvi-global_umap", - "X_seurat-global_umap", - "X_symphony-global_umap", "combat", - "scvi", - "symphony" + "scvi" ], "varm": [ @@ -285,31 +131,29 @@ ], "uns": [ - "bbknn-global-0.5_leiden_characteristic_genes", + "bbknn-global-0.5_characteristic_genes", "bbknn-global-0.5_paga", - "bbknn-global-1.0_leiden_characteristic_genes", + "bbknn-global-1.0_characteristic_genes", "bbknn-global-1.0_paga", - "combat-global-0.5_leiden_characteristic_genes", + "combat-global-0.5_characteristic_genes", + "combat-global-0.5_liana", "combat-global-0.5_paga", - "combat-global-1.0_leiden_characteristic_genes", + "combat-global-1.0_characteristic_genes", + "combat-global-1.0_liana", "combat-global-1.0_paga", + "harmony-global-0.5_characteristic_genes", + "harmony-global-0.5_paga", + "harmony-global-1.0_characteristic_genes", + "harmony-global-1.0_paga", "log1p", - "scvi-global-0.5_leiden_characteristic_genes", + "scvi-global-0.5_characteristic_genes", "scvi-global-0.5_paga", - "scvi-global-1.0_leiden_characteristic_genes", - "scvi-global-1.0_paga", - "seurat-global-0.5_leiden_characteristic_genes", - "seurat-global-0.5_paga", - "seurat-global-1.0_leiden_characteristic_genes", - "seurat-global-1.0_paga", - "symphony-global-0.5_leiden_characteristic_genes", - "symphony-global-0.5_paga", - "symphony-global-1.0_leiden_characteristic_genes", - "symphony-global-1.0_paga" + "scvi-global-1.0_characteristic_genes", + "scvi-global-1.0_paga" ] } ], - "timestamp": "2026-05-29T11:36:58.839746387", + "timestamp": "2026-05-31T10:12:36.828571877", "meta": { "nf-test": "0.9.4", "nextflow": "26.04.0" diff --git a/modules/local/adata/splitembeddings/tests/main.nf.test.snap b/modules/local/adata/splitembeddings/tests/main.nf.test.snap index 1da9eb04..7d87289d 100644 --- a/modules/local/adata/splitembeddings/tests/main.nf.test.snap +++ b/modules/local/adata/splitembeddings/tests/main.nf.test.snap @@ -40,7 +40,7 @@ { "id": "test" }, - "scvi.h5ad:md5,70a63fb030713420c635aab523701691" + "scvi.h5ad:md5,41c46e638fbd817665eca0ce1921585e" ] ], "1": [ @@ -51,7 +51,7 @@ { "id": "test" }, - "scvi.h5ad:md5,70a63fb030713420c635aab523701691" + "scvi.h5ad:md5,41c46e638fbd817665eca0ce1921585e" ] ], "versions": [ @@ -65,193 +65,40 @@ } }, { - "n_obs": 32135, + "n_obs": 23364, "n_vars": 9887, "obs": { "index": "_index", "columns": [ - "G2M_score", - "S_score", "batch", "bbknn-global-0.5:entropy", "bbknn-global-0.5_leiden", "bbknn-global-1.0:entropy", "bbknn-global-1.0_leiden", - "celldex_hpca__2024.02.26_h5_se.tar.delta.next_hpca_direct", - "celldex_hpca__2024.02.26_h5_se.tar.labels_hpca_direct", - "celldex_hpca__2024.02.26_h5_se.tar.pruned.labels_hpca_direct", - "celldex_hpca__2024.02.26_h5_se.tar.scores_hpca_direct.Astrocyte", - "celldex_hpca__2024.02.26_h5_se.tar.scores_hpca_direct.BM", - "celldex_hpca__2024.02.26_h5_se.tar.scores_hpca_direct.BM...Prog.", - "celldex_hpca__2024.02.26_h5_se.tar.scores_hpca_direct.B_cell", - "celldex_hpca__2024.02.26_h5_se.tar.scores_hpca_direct.CMP", - "celldex_hpca__2024.02.26_h5_se.tar.scores_hpca_direct.Chondrocytes", - "celldex_hpca__2024.02.26_h5_se.tar.scores_hpca_direct.DC", - "celldex_hpca__2024.02.26_h5_se.tar.scores_hpca_direct.Embryonic_stem_cells", - "celldex_hpca__2024.02.26_h5_se.tar.scores_hpca_direct.Endothelial_cells", - "celldex_hpca__2024.02.26_h5_se.tar.scores_hpca_direct.Epithelial_cells", - "celldex_hpca__2024.02.26_h5_se.tar.scores_hpca_direct.Erythroblast", - "celldex_hpca__2024.02.26_h5_se.tar.scores_hpca_direct.Fibroblasts", - "celldex_hpca__2024.02.26_h5_se.tar.scores_hpca_direct.GMP", - "celldex_hpca__2024.02.26_h5_se.tar.scores_hpca_direct.Gametocytes", - "celldex_hpca__2024.02.26_h5_se.tar.scores_hpca_direct.HSC_.G.CSF", - "celldex_hpca__2024.02.26_h5_se.tar.scores_hpca_direct.HSC_CD34.", - "celldex_hpca__2024.02.26_h5_se.tar.scores_hpca_direct.Hepatocytes", - "celldex_hpca__2024.02.26_h5_se.tar.scores_hpca_direct.Keratinocytes", - "celldex_hpca__2024.02.26_h5_se.tar.scores_hpca_direct.MEP", - "celldex_hpca__2024.02.26_h5_se.tar.scores_hpca_direct.MSC", - "celldex_hpca__2024.02.26_h5_se.tar.scores_hpca_direct.Macrophage", - "celldex_hpca__2024.02.26_h5_se.tar.scores_hpca_direct.Monocyte", - "celldex_hpca__2024.02.26_h5_se.tar.scores_hpca_direct.Myelocyte", - "celldex_hpca__2024.02.26_h5_se.tar.scores_hpca_direct.NK_cell", - "celldex_hpca__2024.02.26_h5_se.tar.scores_hpca_direct.Neuroepithelial_cell", - "celldex_hpca__2024.02.26_h5_se.tar.scores_hpca_direct.Neurons", - "celldex_hpca__2024.02.26_h5_se.tar.scores_hpca_direct.Neutrophils", - "celldex_hpca__2024.02.26_h5_se.tar.scores_hpca_direct.Osteoblasts", - "celldex_hpca__2024.02.26_h5_se.tar.scores_hpca_direct.Platelets", - "celldex_hpca__2024.02.26_h5_se.tar.scores_hpca_direct.Pre.B_cell_CD34.", - "celldex_hpca__2024.02.26_h5_se.tar.scores_hpca_direct.Pro.B_cell_CD34.", - "celldex_hpca__2024.02.26_h5_se.tar.scores_hpca_direct.Pro.Myelocyte", - "celldex_hpca__2024.02.26_h5_se.tar.scores_hpca_direct.Smooth_muscle_cells", - "celldex_hpca__2024.02.26_h5_se.tar.scores_hpca_direct.T_cells", - "celldex_hpca__2024.02.26_h5_se.tar.scores_hpca_direct.Tissue_stem_cells", - "celldex_hpca__2024.02.26_h5_se.tar.scores_hpca_direct.iPS_cells", - "celldex_monaco_immune__2024.02.26_h5_se.tar.delta.next_immune_direct", - "celldex_monaco_immune__2024.02.26_h5_se.tar.labels_immune_direct", - "celldex_monaco_immune__2024.02.26_h5_se.tar.pruned.labels_immune_direct", - "celldex_monaco_immune__2024.02.26_h5_se.tar.scores_immune_direct.Central.memory.CD8.T.cells", - "celldex_monaco_immune__2024.02.26_h5_se.tar.scores_immune_direct.Classical.monocytes", - "celldex_monaco_immune__2024.02.26_h5_se.tar.scores_immune_direct.Effector.memory.CD8.T.cells", - "celldex_monaco_immune__2024.02.26_h5_se.tar.scores_immune_direct.Exhausted.B.cells", - "celldex_monaco_immune__2024.02.26_h5_se.tar.scores_immune_direct.Follicular.helper.T.cells", - "celldex_monaco_immune__2024.02.26_h5_se.tar.scores_immune_direct.Intermediate.monocytes", - "celldex_monaco_immune__2024.02.26_h5_se.tar.scores_immune_direct.Low.density.basophils", - "celldex_monaco_immune__2024.02.26_h5_se.tar.scores_immune_direct.Low.density.neutrophils", - "celldex_monaco_immune__2024.02.26_h5_se.tar.scores_immune_direct.MAIT.cells", - "celldex_monaco_immune__2024.02.26_h5_se.tar.scores_immune_direct.Myeloid.dendritic.cells", - "celldex_monaco_immune__2024.02.26_h5_se.tar.scores_immune_direct.Naive.B.cells", - "celldex_monaco_immune__2024.02.26_h5_se.tar.scores_immune_direct.Naive.CD4.T.cells", - "celldex_monaco_immune__2024.02.26_h5_se.tar.scores_immune_direct.Naive.CD8.T.cells", - "celldex_monaco_immune__2024.02.26_h5_se.tar.scores_immune_direct.Natural.killer.cells", - "celldex_monaco_immune__2024.02.26_h5_se.tar.scores_immune_direct.Non.Vd2.gd.T.cells", - "celldex_monaco_immune__2024.02.26_h5_se.tar.scores_immune_direct.Non.classical.monocytes", - "celldex_monaco_immune__2024.02.26_h5_se.tar.scores_immune_direct.Non.switched.memory.B.cells", - "celldex_monaco_immune__2024.02.26_h5_se.tar.scores_immune_direct.Plasmablasts", - "celldex_monaco_immune__2024.02.26_h5_se.tar.scores_immune_direct.Plasmacytoid.dendritic.cells", - "celldex_monaco_immune__2024.02.26_h5_se.tar.scores_immune_direct.Progenitor.cells", - "celldex_monaco_immune__2024.02.26_h5_se.tar.scores_immune_direct.Switched.memory.B.cells", - "celldex_monaco_immune__2024.02.26_h5_se.tar.scores_immune_direct.T.regulatory.cells", - "celldex_monaco_immune__2024.02.26_h5_se.tar.scores_immune_direct.Terminal.effector.CD4.T.cells", - "celldex_monaco_immune__2024.02.26_h5_se.tar.scores_immune_direct.Terminal.effector.CD8.T.cells", - "celldex_monaco_immune__2024.02.26_h5_se.tar.scores_immune_direct.Th1.Th17.cells", - "celldex_monaco_immune__2024.02.26_h5_se.tar.scores_immune_direct.Th1.cells", - "celldex_monaco_immune__2024.02.26_h5_se.tar.scores_immune_direct.Th17.cells", - "celldex_monaco_immune__2024.02.26_h5_se.tar.scores_immune_direct.Th2.cells", - "celldex_monaco_immune__2024.02.26_h5_se.tar.scores_immune_direct.Vd2.gd.T.cells", - "celltypist:Adult_COVID19_PBMC", - "celltypist:Adult_COVID19_PBMC:conf", + "celltypist:Adult_Human_Skin", + "celltypist:Adult_Human_Skin:conf", "combat-global-0.5:entropy", "combat-global-0.5_leiden", "combat-global-1.0:entropy", "combat-global-1.0_leiden", "condition", - "hpca_celldex.tar.delta.next_hpca_celldex", - "hpca_celldex.tar.labels_hpca_celldex", - "hpca_celldex.tar.pruned.labels_hpca_celldex", - "hpca_celldex.tar.scores_hpca_celldex.Astrocyte", - "hpca_celldex.tar.scores_hpca_celldex.BM", - "hpca_celldex.tar.scores_hpca_celldex.BM...Prog.", - "hpca_celldex.tar.scores_hpca_celldex.B_cell", - "hpca_celldex.tar.scores_hpca_celldex.CMP", - "hpca_celldex.tar.scores_hpca_celldex.Chondrocytes", - "hpca_celldex.tar.scores_hpca_celldex.DC", - "hpca_celldex.tar.scores_hpca_celldex.Embryonic_stem_cells", - "hpca_celldex.tar.scores_hpca_celldex.Endothelial_cells", - "hpca_celldex.tar.scores_hpca_celldex.Epithelial_cells", - "hpca_celldex.tar.scores_hpca_celldex.Erythroblast", - "hpca_celldex.tar.scores_hpca_celldex.Fibroblasts", - "hpca_celldex.tar.scores_hpca_celldex.GMP", - "hpca_celldex.tar.scores_hpca_celldex.Gametocytes", - "hpca_celldex.tar.scores_hpca_celldex.HSC_.G.CSF", - "hpca_celldex.tar.scores_hpca_celldex.HSC_CD34.", - "hpca_celldex.tar.scores_hpca_celldex.Hepatocytes", - "hpca_celldex.tar.scores_hpca_celldex.Keratinocytes", - "hpca_celldex.tar.scores_hpca_celldex.MEP", - "hpca_celldex.tar.scores_hpca_celldex.MSC", - "hpca_celldex.tar.scores_hpca_celldex.Macrophage", - "hpca_celldex.tar.scores_hpca_celldex.Monocyte", - "hpca_celldex.tar.scores_hpca_celldex.Myelocyte", - "hpca_celldex.tar.scores_hpca_celldex.NK_cell", - "hpca_celldex.tar.scores_hpca_celldex.Neuroepithelial_cell", - "hpca_celldex.tar.scores_hpca_celldex.Neurons", - "hpca_celldex.tar.scores_hpca_celldex.Neutrophils", - "hpca_celldex.tar.scores_hpca_celldex.Osteoblasts", - "hpca_celldex.tar.scores_hpca_celldex.Platelets", - "hpca_celldex.tar.scores_hpca_celldex.Pre.B_cell_CD34.", - "hpca_celldex.tar.scores_hpca_celldex.Pro.B_cell_CD34.", - "hpca_celldex.tar.scores_hpca_celldex.Pro.Myelocyte", - "hpca_celldex.tar.scores_hpca_celldex.Smooth_muscle_cells", - "hpca_celldex.tar.scores_hpca_celldex.T_cells", - "hpca_celldex.tar.scores_hpca_celldex.Tissue_stem_cells", - "hpca_celldex.tar.scores_hpca_celldex.iPS_cells", - "immune_celldex.tar.delta.next_immune_celldex", - "immune_celldex.tar.labels_immune_celldex", - "immune_celldex.tar.pruned.labels_immune_celldex", - "immune_celldex.tar.scores_immune_celldex.Central.memory.CD8.T.cells", - "immune_celldex.tar.scores_immune_celldex.Classical.monocytes", - "immune_celldex.tar.scores_immune_celldex.Effector.memory.CD8.T.cells", - "immune_celldex.tar.scores_immune_celldex.Exhausted.B.cells", - "immune_celldex.tar.scores_immune_celldex.Follicular.helper.T.cells", - "immune_celldex.tar.scores_immune_celldex.Intermediate.monocytes", - "immune_celldex.tar.scores_immune_celldex.Low.density.basophils", - "immune_celldex.tar.scores_immune_celldex.Low.density.neutrophils", - "immune_celldex.tar.scores_immune_celldex.MAIT.cells", - "immune_celldex.tar.scores_immune_celldex.Myeloid.dendritic.cells", - "immune_celldex.tar.scores_immune_celldex.Naive.B.cells", - "immune_celldex.tar.scores_immune_celldex.Naive.CD4.T.cells", - "immune_celldex.tar.scores_immune_celldex.Naive.CD8.T.cells", - "immune_celldex.tar.scores_immune_celldex.Natural.killer.cells", - "immune_celldex.tar.scores_immune_celldex.Non.Vd2.gd.T.cells", - "immune_celldex.tar.scores_immune_celldex.Non.classical.monocytes", - "immune_celldex.tar.scores_immune_celldex.Non.switched.memory.B.cells", - "immune_celldex.tar.scores_immune_celldex.Plasmablasts", - "immune_celldex.tar.scores_immune_celldex.Plasmacytoid.dendritic.cells", - "immune_celldex.tar.scores_immune_celldex.Progenitor.cells", - "immune_celldex.tar.scores_immune_celldex.Switched.memory.B.cells", - "immune_celldex.tar.scores_immune_celldex.T.regulatory.cells", - "immune_celldex.tar.scores_immune_celldex.Terminal.effector.CD4.T.cells", - "immune_celldex.tar.scores_immune_celldex.Terminal.effector.CD8.T.cells", - "immune_celldex.tar.scores_immune_celldex.Th1.Th17.cells", - "immune_celldex.tar.scores_immune_celldex.Th1.cells", - "immune_celldex.tar.scores_immune_celldex.Th17.cells", - "immune_celldex.tar.scores_immune_celldex.Th2.cells", - "immune_celldex.tar.scores_immune_celldex.Vd2.gd.T.cells", + "harmony-global-0.5:entropy", + "harmony-global-0.5_leiden", + "harmony-global-1.0:entropy", + "harmony-global-1.0_leiden", "label", "n_counts", "n_genes", "n_genes_by_counts", - "pct_counts_hb", "pct_counts_mt", - "pct_counts_ribo", - "phase", "sample", "sample_original", "scvi-global-0.5:entropy", "scvi-global-0.5_leiden", "scvi-global-1.0:entropy", "scvi-global-1.0_leiden", - "seurat-global-0.5:entropy", - "seurat-global-0.5_leiden", - "seurat-global-1.0:entropy", - "seurat-global-1.0_leiden", - "symphony-global-0.5:entropy", - "symphony-global-0.5_leiden", - "symphony-global-1.0:entropy", - "symphony-global-1.0_leiden", "total_counts", - "total_counts_hb", - "total_counts_mt", - "total_counts_ribo" + "total_counts_mt" ] }, "var": { @@ -276,30 +123,28 @@ ], "uns": [ - "bbknn-global-0.5_leiden_characteristic_genes", + "bbknn-global-0.5_characteristic_genes", "bbknn-global-0.5_paga", - "bbknn-global-1.0_leiden_characteristic_genes", + "bbknn-global-1.0_characteristic_genes", "bbknn-global-1.0_paga", - "combat-global-0.5_leiden_characteristic_genes", + "combat-global-0.5_characteristic_genes", + "combat-global-0.5_liana", "combat-global-0.5_paga", - "combat-global-1.0_leiden_characteristic_genes", + "combat-global-1.0_characteristic_genes", + "combat-global-1.0_liana", "combat-global-1.0_paga", - "scvi-global-0.5_leiden_characteristic_genes", + "harmony-global-0.5_characteristic_genes", + "harmony-global-0.5_paga", + "harmony-global-1.0_characteristic_genes", + "harmony-global-1.0_paga", + "scvi-global-0.5_characteristic_genes", "scvi-global-0.5_paga", - "scvi-global-1.0_leiden_characteristic_genes", - "scvi-global-1.0_paga", - "seurat-global-0.5_leiden_characteristic_genes", - "seurat-global-0.5_paga", - "seurat-global-1.0_leiden_characteristic_genes", - "seurat-global-1.0_paga", - "symphony-global-0.5_leiden_characteristic_genes", - "symphony-global-0.5_paga", - "symphony-global-1.0_leiden_characteristic_genes", - "symphony-global-1.0_paga" + "scvi-global-1.0_characteristic_genes", + "scvi-global-1.0_paga" ] } ], - "timestamp": "2026-05-29T11:36:58.752520992", + "timestamp": "2026-05-31T10:09:07.090064015", "meta": { "nf-test": "0.9.4", "nextflow": "26.04.0" diff --git a/modules/local/scanpy/hvgs/tests/main.nf.test.snap b/modules/local/scanpy/hvgs/tests/main.nf.test.snap index e8a736f2..e6dfc237 100644 --- a/modules/local/scanpy/hvgs/tests/main.nf.test.snap +++ b/modules/local/scanpy/hvgs/tests/main.nf.test.snap @@ -154,7 +154,7 @@ }, { "n_obs": 38234, - "n_vars": 100, + "n_vars": 101, "obs": { "index": "_index", "columns": [ @@ -191,10 +191,10 @@ ] } ], - "timestamp": "2026-03-29T11:19:17.695541068", + "timestamp": "2026-05-31T10:13:07.459491579", "meta": { "nf-test": "0.9.4", - "nextflow": "25.10.2" + "nextflow": "26.04.0" } } } \ No newline at end of file From b7488c78e0c8754deeba8303f269ada2c1aea9b2 Mon Sep 17 00:00:00 2001 From: Nico Trummer Date: Sun, 31 May 2026 13:07:07 +0200 Subject: [PATCH 15/19] Update pipeline-level test snapshot --- tests/main_pipeline_build.nf.test.snap | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/main_pipeline_build.nf.test.snap b/tests/main_pipeline_build.nf.test.snap index 77005b00..f7cb9baa 100644 --- a/tests/main_pipeline_build.nf.test.snap +++ b/tests/main_pipeline_build.nf.test.snap @@ -165,7 +165,7 @@ }, "SEURAT_INTEGRATION": { "R": "4.5.3", - "Seurat": "5.4.0", + "Seurat": "5.5.0", "anndataR": "1.0.2" }, "SYMPHONY_HARMONYINTEGRATE": { @@ -318,7 +318,7 @@ "qc-report.qmd:md5,13061014a897b3fbdafd6ea3212df0e0" ] ], - "timestamp": "2026-05-28T17:30:10.040067606", + "timestamp": "2026-05-31T12:35:21.295737307", "meta": { "nf-test": "0.9.4", "nextflow": "26.04.0" From 3f1d4e76337c5523c5dacdf7fca233186630f46c Mon Sep 17 00:00:00 2001 From: Nico Trummer Date: Sun, 31 May 2026 18:59:35 +0200 Subject: [PATCH 16/19] Improve mygene.info API failure messages in ADATA_MYGENE. Surface actionable context when mygene.info returns server or network errors instead of a raw httpx traceback. --- .../local/adata/mygene/templates/mygene.py | 31 +++++++++++++++++-- 1 file changed, 28 insertions(+), 3 deletions(-) diff --git a/modules/local/adata/mygene/templates/mygene.py b/modules/local/adata/mygene/templates/mygene.py index e9050d91..ac7cbed9 100644 --- a/modules/local/adata/mygene/templates/mygene.py +++ b/modules/local/adata/mygene/templates/mygene.py @@ -6,6 +6,7 @@ os.environ["NUMBA_CACHE_DIR"] = "./tmp/numba" import anndata as ad +import httpx import mygene import yaml @@ -21,9 +22,33 @@ ) mg = mygene.MyGeneInfo() -df_genes = mg.querymany(inputs, - scopes=["symbol", "entrezgene", "ensemblgene"], - fields="symbol", species="human", as_dataframe=True) +try: + df_genes = mg.querymany( + inputs, + scopes=["symbol", "entrezgene", "ensemblgene"], + fields="symbol", + species="human", + as_dataframe=True, + ) +except httpx.HTTPStatusError as exc: + status = exc.response.status_code + if status >= 500: + raise RuntimeError( + f"mygene.info returned HTTP {status} (server error) while mapping " + f"{len(inputs)} gene identifiers from var[{input_col!r}]. " + "The mygene.info API is temporarily unavailable or overloaded — " + "this is not caused by your input data. Re-run this process; " + "if it keeps failing, check https://mygene.info or try again later." + ) from exc + raise RuntimeError( + f"mygene.info returned HTTP {status} while mapping " + f"{len(inputs)} gene identifiers from var[{input_col!r}]." + ) from exc +except httpx.RequestError as exc: + raise RuntimeError( + f"Could not reach mygene.info while mapping {len(inputs)} gene identifiers " + f"from var[{input_col!r}]: {exc}. Check network connectivity and try again." + ) from exc mapping = df_genes["symbol"].dropna().to_dict() outputs = [mapping.get(i, i) for i in inputs] From e8e002e1ff9e1b7262405a7c23f421254a1e2807 Mon Sep 17 00:00:00 2001 From: Nico Trummer Date: Mon, 1 Jun 2026 21:22:56 +0200 Subject: [PATCH 17/19] Limit number of genes processed at a time in mygene --- .../local/adata/mygene/templates/mygene.py | 27 +++---------------- 1 file changed, 4 insertions(+), 23 deletions(-) diff --git a/modules/local/adata/mygene/templates/mygene.py b/modules/local/adata/mygene/templates/mygene.py index ac7cbed9..e5d14385 100644 --- a/modules/local/adata/mygene/templates/mygene.py +++ b/modules/local/adata/mygene/templates/mygene.py @@ -6,7 +6,6 @@ os.environ["NUMBA_CACHE_DIR"] = "./tmp/numba" import anndata as ad -import httpx import mygene import yaml @@ -22,34 +21,16 @@ ) mg = mygene.MyGeneInfo() -try: +mapping = {} +for i in range(0, len(inputs), 500): df_genes = mg.querymany( - inputs, + inputs[i : i + 500], scopes=["symbol", "entrezgene", "ensemblgene"], fields="symbol", species="human", as_dataframe=True, ) -except httpx.HTTPStatusError as exc: - status = exc.response.status_code - if status >= 500: - raise RuntimeError( - f"mygene.info returned HTTP {status} (server error) while mapping " - f"{len(inputs)} gene identifiers from var[{input_col!r}]. " - "The mygene.info API is temporarily unavailable or overloaded — " - "this is not caused by your input data. Re-run this process; " - "if it keeps failing, check https://mygene.info or try again later." - ) from exc - raise RuntimeError( - f"mygene.info returned HTTP {status} while mapping " - f"{len(inputs)} gene identifiers from var[{input_col!r}]." - ) from exc -except httpx.RequestError as exc: - raise RuntimeError( - f"Could not reach mygene.info while mapping {len(inputs)} gene identifiers " - f"from var[{input_col!r}]: {exc}. Check network connectivity and try again." - ) from exc -mapping = df_genes["symbol"].dropna().to_dict() + mapping.update(df_genes["symbol"].dropna().to_dict()) outputs = [mapping.get(i, i) for i in inputs] From 8631fd441eb1c2ecae9af88003e657aa29ba0776 Mon Sep 17 00:00:00 2001 From: Nico Trummer Date: Mon, 1 Jun 2026 21:44:20 +0200 Subject: [PATCH 18/19] Thread params.species through ADATA_MYGENE for MyGene.info lookups. Pass species from the pipeline into unify and quality control so gene ID conversion respects the configured organism, and update tests and parameter docs accordingly. --- docs/usage.md | 47 ++++++++++--------- main.nf | 3 ++ modules/local/adata/mygene/main.nf | 1 + .../local/adata/mygene/templates/mygene.py | 2 +- modules/local/adata/mygene/tests/main.nf.test | 2 + nextflow_schema.json | 2 +- subworkflows/local/quality_control/main.nf | 4 +- .../local/quality_control/tests/main.nf.test | 5 ++ subworkflows/local/unify/main.nf | 4 +- subworkflows/local/unify/tests/main.nf.test | 6 +++ workflows/scdownstream.nf | 2 + 11 files changed, 51 insertions(+), 27 deletions(-) diff --git a/docs/usage.md b/docs/usage.md index a7dcc04a..8447d758 100644 --- a/docs/usage.md +++ b/docs/usage.md @@ -51,29 +51,29 @@ sample3,/absolute/path/to/sample3_filtered.csv,/absolute/path/to/sample3.csv,,,, For CSV input files, specifying the `batch_col`, `label_col`, `condition_col`, and `unknown_label` columns will not have any effect, as no additional metadata is available in the CSV file. -| Column | Description | -| ------------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | -| `sample` | Unique sample identifier. Will be added to the pipeline output objects as `sample` column. | -| `filtered` | May contain paths to `h5ad`, `h5`, `rds`, or `csv` files. `rds` files may contain any object that can be converted to a `SingleCellExperiment` using the [Seurat `as.SingleCellExperiment`](https://satijalab.org/seurat/reference/as.singlecellexperiment) function. `csv` files should contain a matrix with genes as columns and cells as rows. | -| `unfiltered` | Same as `filtered`, but for the unfiltered cellranger or nf-core/scrnaseq output. If not provided, only `decontX` can be used for ambient RNA removal. | -| `batch_col` | Column in the input file containing batch information. If not provided, the entire input object will be considered as one batch. If the `batch_col` is something else than `batch`, it will be renamed to `batch` during pipeline execution. | -| `symbol_col` | Column in the input file containing gene symbol information. Defaults to `index`. There are two special values that can be used: `index` and `none`. `index` will use the row names of the matrix as gene symbols. `none` will trigger the pipeline to perform gene symbol conversion using MyGene.info based on the `geneid_col`. The values from `symbol_col` will be set as the index during pipeline execution. | -| `geneid_col` | Column in the input file containing gene identifier information. Defaults to `index`. Only used if `symbol_col` is set to `none`. | -| `label_col` | Column in the input file containing cell type information. Defaults to `label`. If the column does not exist in the input object, the pipeline will create a new column and put `unknown` in it. If the `label_col` is something else than `label`, it will be renamed to `label` during pipeline execution. | -| `condition_col` | Column in the input file containing condition information (e.g. disease state, treatment). If the column does not exist in the input object, the pipeline will create a new column and put `unknown` in it. If the `condition_col` is something else than `condition`, it will be renamed to `condition` during pipeline execution. | -| `unknown_label` | Value in the `label_col` column that should be considered as unknown. Defaults to `unknown`. If the `unknown_label` is something else than `unknown`, it will be renamed to `unknown` during pipeline execution. If trying to perform integration with scANVI, more than one unique label other than `unknown` must exist in the input data. | -| `counts_layer` | Layer in the input file containing the raw counts matrix. Defaults to `X`. | -| `min_genes` | Minimum number of genes required for a cell to be considered. Defaults to `1`. | -| `min_cells` | Minimum number of cells required for a gene to be considered. Defaults to `1`. | -| `min_counts_cell` | Minimum number of counts required for a cell to be considered. Defaults to `1`. | -| `min_counts_gene` | Minimum number of counts required for a gene to be considered. Defaults to `1`. | -| `expected_cells` | Number of expected cells, used as input to CellBender for empty droplet detection. | -| `doublet_rate` | Optional expected doublet rate (0-1) for `scDblFinder`. If not provided, `scDblFinder` estimates it internally. | -| `max_mito_percentage` | Maximum percentage of mitochondrial reads for a cell to be considered. Defaults to `100`. | -| `min_ribo_percentage` | Minimum percentage of ribosomal reads for a cell to be considered. Defaults to `0`. | -| `max_hb_percentage` | Maximum percentage of haemoglobin reads for a cell to be considered. Defaults to `100`. | -| `ambient_correction` | Whether to perform ambient RNA correction for this sample. Set to `true` to use the globally configured method, `false` to skip ambient correction for this sample. Defaults to `true`. | -| `ambient_corrected_integration` | Whether to use ambient-corrected counts for integration for this sample. Set to `true` to use corrected counts in downstream integration, `false` to store them only as additional layers. Can override the global `--ambient_corrected_integration` parameter. Defaults to global setting. | +| Column | Description | +| ------------------------------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| `sample` | Unique sample identifier. Will be added to the pipeline output objects as `sample` column. | +| `filtered` | May contain paths to `h5ad`, `h5`, `rds`, or `csv` files. `rds` files may contain any object that can be converted to a `SingleCellExperiment` using the [Seurat `as.SingleCellExperiment`](https://satijalab.org/seurat/reference/as.singlecellexperiment) function. `csv` files should contain a matrix with genes as columns and cells as rows. | +| `unfiltered` | Same as `filtered`, but for the unfiltered cellranger or nf-core/scrnaseq output. If not provided, only `decontX` can be used for ambient RNA removal. | +| `batch_col` | Column in the input file containing batch information. If not provided, the entire input object will be considered as one batch. If the `batch_col` is something else than `batch`, it will be renamed to `batch` during pipeline execution. | +| `symbol_col` | Column in the input file containing gene symbol information. Defaults to `index`. There are two special values that can be used: `index` and `none`. `index` will use the row names of the matrix as gene symbols. `none` will trigger the pipeline to perform gene symbol conversion using MyGene.info based on the `geneid_col` and the pipeline `--species` parameter. The values from `symbol_col` will be set as the index during pipeline execution. | +| `geneid_col` | Column in the input file containing gene identifier information. Defaults to `index`. Only used if `symbol_col` is set to `none`. | +| `label_col` | Column in the input file containing cell type information. Defaults to `label`. If the column does not exist in the input object, the pipeline will create a new column and put `unknown` in it. If the `label_col` is something else than `label`, it will be renamed to `label` during pipeline execution. | +| `condition_col` | Column in the input file containing condition information (e.g. disease state, treatment). If the column does not exist in the input object, the pipeline will create a new column and put `unknown` in it. If the `condition_col` is something else than `condition`, it will be renamed to `condition` during pipeline execution. | +| `unknown_label` | Value in the `label_col` column that should be considered as unknown. Defaults to `unknown`. If the `unknown_label` is something else than `unknown`, it will be renamed to `unknown` during pipeline execution. If trying to perform integration with scANVI, more than one unique label other than `unknown` must exist in the input data. | +| `counts_layer` | Layer in the input file containing the raw counts matrix. Defaults to `X`. | +| `min_genes` | Minimum number of genes required for a cell to be considered. Defaults to `1`. | +| `min_cells` | Minimum number of cells required for a gene to be considered. Defaults to `1`. | +| `min_counts_cell` | Minimum number of counts required for a cell to be considered. Defaults to `1`. | +| `min_counts_gene` | Minimum number of counts required for a gene to be considered. Defaults to `1`. | +| `expected_cells` | Number of expected cells, used as input to CellBender for empty droplet detection. | +| `doublet_rate` | Optional expected doublet rate (0-1) for `scDblFinder`. If not provided, `scDblFinder` estimates it internally. | +| `max_mito_percentage` | Maximum percentage of mitochondrial reads for a cell to be considered. Defaults to `100`. | +| `min_ribo_percentage` | Minimum percentage of ribosomal reads for a cell to be considered. Defaults to `0`. | +| `max_hb_percentage` | Maximum percentage of haemoglobin reads for a cell to be considered. Defaults to `100`. | +| `ambient_correction` | Whether to perform ambient RNA correction for this sample. Set to `true` to use the globally configured method, `false` to skip ambient correction for this sample. Defaults to `true`. | +| `ambient_corrected_integration` | Whether to use ambient-corrected counts for integration for this sample. Set to `true` to use corrected counts in downstream integration, `false` to store them only as additional layers. Can override the global `--ambient_corrected_integration` parameter. Defaults to global setting. | An [example samplesheet](../assets/samplesheet.csv) has been provided with the pipeline. @@ -185,6 +185,7 @@ nextflow run nf-core/scdownstream --input samplesheet.csv --outdir results --cel #### Species Bundled gene lists are provided for human and mouse. +`--species` also selects the MyGene.info taxonomy used when samples have `symbol_col: none` and gene identifiers are converted via MyGene.info. Select the appropriate species with `--species`: ```bash diff --git a/main.nf b/main.nf index 4f032b32..63a26ad8 100644 --- a/main.nf +++ b/main.nf @@ -46,6 +46,7 @@ workflow NFCORE_SCDOWNSTREAM { cell_cycle_scoring // value: boolean s_genes // path: file or [] g2m_genes // path: file or [] + species // value: string qc_only // value: boolean celldex_reference // value: string celltypist_model // value: string @@ -103,6 +104,7 @@ workflow NFCORE_SCDOWNSTREAM { cell_cycle_scoring, s_genes, g2m_genes, + species, qc_only, celldex_reference, celltypist_model, @@ -202,6 +204,7 @@ workflow { params.cell_cycle_scoring, s_genes_file, g2m_genes_file, + params.species, params.qc_only, params.celldex_reference, params.celltypist_model, diff --git a/modules/local/adata/mygene/main.nf b/modules/local/adata/mygene/main.nf index d80629a4..6ab5b022 100644 --- a/modules/local/adata/mygene/main.nf +++ b/modules/local/adata/mygene/main.nf @@ -9,6 +9,7 @@ process ADATA_MYGENE { input: tuple val(meta), path(h5ad) + val(species) output: tuple val(meta), path("*.h5ad"), emit: h5ad diff --git a/modules/local/adata/mygene/templates/mygene.py b/modules/local/adata/mygene/templates/mygene.py index e5d14385..56173426 100644 --- a/modules/local/adata/mygene/templates/mygene.py +++ b/modules/local/adata/mygene/templates/mygene.py @@ -27,7 +27,7 @@ inputs[i : i + 500], scopes=["symbol", "entrezgene", "ensemblgene"], fields="symbol", - species="human", + species="${species}", as_dataframe=True, ) mapping.update(df_genes["symbol"].dropna().to_dict()) diff --git a/modules/local/adata/mygene/tests/main.nf.test b/modules/local/adata/mygene/tests/main.nf.test index 75cf7191..97ec0dc4 100644 --- a/modules/local/adata/mygene/tests/main.nf.test +++ b/modules/local/adata/mygene/tests/main.nf.test @@ -20,6 +20,7 @@ nextflow_process { file(params.modules_testdata_base_path + 'genomics/homo_sapiens/scrnaseq/h5ad/SRR28679759_filtered_matrix.h5ad', checkIfExists: true) ] ) + input[1] = 'human' """ } } @@ -51,6 +52,7 @@ nextflow_process { file(params.modules_testdata_base_path + 'genomics/homo_sapiens/scrnaseq/h5ad/SRR28679759_filtered_matrix.h5ad', checkIfExists: true) ] ) + input[1] = 'human' """ } } diff --git a/nextflow_schema.json b/nextflow_schema.json index a180dba5..43ed239b 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -86,7 +86,7 @@ "species": { "type": "string", "default": "human", - "description": "Species of the input data. Used to auto-select bundled cell cycle gene lists (assets/cell_cycle_genes/_s_genes.txt and _g2m_genes.txt). Bundled lists are provided for 'human' and 'mouse'. Ignored when --s_genes and --g2m_genes are set explicitly." + "description": "Species of the input data. Used to auto-select bundled cell cycle gene lists (assets/cell_cycle_genes/_s_genes.txt and _g2m_genes.txt) and as the MyGene.info taxonomy when converting gene identifiers (samplesheet symbol_col: none). Bundled cell cycle lists are provided for 'human' and 'mouse'. Ignored when --s_genes and --g2m_genes are set explicitly." }, "cell_cycle_scoring": { "type": "boolean", diff --git a/subworkflows/local/quality_control/main.nf b/subworkflows/local/quality_control/main.nf index 0aced122..07d07524 100644 --- a/subworkflows/local/quality_control/main.nf +++ b/subworkflows/local/quality_control/main.nf @@ -31,6 +31,7 @@ workflow QUALITY_CONTROL { cell_cycle_scoring // value: boolean s_genes // path: file or [] g2m_genes // path: file or [] + species // value: string main: ch_multiqc_files = channel.empty() @@ -117,7 +118,8 @@ workflow QUALITY_CONTROL { ch_h5ad, unify_gene_symbols, duplicate_var_resolution, - aggregate_isoforms + aggregate_isoforms, + species ) ch_multiqc_files = ch_multiqc_files.mix(UNIFY.out.multiqc_files) ch_h5ad = UNIFY.out.h5ad diff --git a/subworkflows/local/quality_control/tests/main.nf.test b/subworkflows/local/quality_control/tests/main.nf.test index d582d91a..914abd60 100644 --- a/subworkflows/local/quality_control/tests/main.nf.test +++ b/subworkflows/local/quality_control/tests/main.nf.test @@ -35,6 +35,7 @@ nextflow_workflow { input[12] = false input[13] = [] input[14] = [] + input[15] = 'human' """ } } @@ -79,6 +80,7 @@ nextflow_workflow { input[12] = false input[13] = [] input[14] = [] + input[15] = 'human' """ } } @@ -122,6 +124,7 @@ nextflow_workflow { input[12] = false input[13] = [] input[14] = [] + input[15] = 'human' """ } } @@ -178,6 +181,7 @@ nextflow_workflow { input[12] = false input[13] = [] input[14] = [] + input[15] = 'human' """ } } @@ -217,6 +221,7 @@ nextflow_workflow { input[12] = true input[13] = file("${projectDir}/assets/cell_cycle_genes/human_s_genes.txt") input[14] = file("${projectDir}/assets/cell_cycle_genes/human_g2m_genes.txt") + input[15] = 'human' """ } } diff --git a/subworkflows/local/unify/main.nf b/subworkflows/local/unify/main.nf index 631c79a4..250a6b95 100644 --- a/subworkflows/local/unify/main.nf +++ b/subworkflows/local/unify/main.nf @@ -11,6 +11,7 @@ workflow UNIFY { unify_gene_symbols // value: boolean duplicate_var_resolution // value: string aggregate_isoforms // value: boolean + species // value: string main: ch_multiqc_files = channel.empty() @@ -21,7 +22,8 @@ workflow UNIFY { } MYGENE ( - ch_h5ad.needs_symbol_conversion + ch_h5ad.needs_symbol_conversion, + species ) ch_h5ad = ch_h5ad.has_symbol_col.mix( MYGENE.out.h5ad.map { meta, h5ad -> [meta + [symbol_col: 'symbols'], h5ad] } diff --git a/subworkflows/local/unify/tests/main.nf.test b/subworkflows/local/unify/tests/main.nf.test index a5c5cf08..35a9eb14 100644 --- a/subworkflows/local/unify/tests/main.nf.test +++ b/subworkflows/local/unify/tests/main.nf.test @@ -25,6 +25,7 @@ nextflow_workflow { input[1] = false input[2] = 'sum' input[3] = false + input[4] = 'human' """ } } @@ -52,6 +53,7 @@ nextflow_workflow { input[1] = false input[2] = 'sum' input[3] = false + input[4] = 'human' """ } } @@ -93,6 +95,7 @@ nextflow_workflow { input[1] = false input[2] = 'sum' input[3] = false + input[4] = 'human' """ } } @@ -120,6 +123,7 @@ nextflow_workflow { input[1] = false input[2] = 'sum' input[3] = false + input[4] = 'human' """ } } @@ -161,6 +165,7 @@ nextflow_workflow { input[1] = true input[2] = 'sum' input[3] = false + input[4] = 'human' """ } } @@ -188,6 +193,7 @@ nextflow_workflow { input[1] = true input[2] = 'sum' input[3] = false + input[4] = 'human' """ } } diff --git a/workflows/scdownstream.nf b/workflows/scdownstream.nf index 3c99deae..e8a7c0e2 100644 --- a/workflows/scdownstream.nf +++ b/workflows/scdownstream.nf @@ -44,6 +44,7 @@ workflow SCDOWNSTREAM { cell_cycle_scoring // value: boolean s_genes // path: file or [] g2m_genes // path: file or [] + species // value: string qc_only // value: boolean celldex_reference // value: string celltypist_model // value: string @@ -129,6 +130,7 @@ workflow SCDOWNSTREAM { cell_cycle_scoring, s_genes, g2m_genes, + species, ) ch_multiqc_files = ch_multiqc_files.mix(QUALITY_CONTROL.out.multiqc_files) ch_h5ad = QUALITY_CONTROL.out.h5ad From cef4bcd725d7a3ca642682f74f4048cd938dec83 Mon Sep 17 00:00:00 2001 From: Nico Trummer Date: Tue, 2 Jun 2026 08:04:58 +0200 Subject: [PATCH 19/19] Add analysis plan to extension test --- assets/schema_analysis_plan.json | 12 ++++++++---- nf-test.config | 1 + .../local/utils_nfcore_scdownstream_pipeline/main.nf | 1 + tests/analysis_plan_extension.csv | 2 ++ tests/main_pipeline_extend.nf.test | 1 + tests/main_pipeline_extend.nf.test.snap | 10 +--------- 6 files changed, 14 insertions(+), 13 deletions(-) create mode 100644 tests/analysis_plan_extension.csv diff --git a/assets/schema_analysis_plan.json b/assets/schema_analysis_plan.json index 92be8712..fdb44dbc 100644 --- a/assets/schema_analysis_plan.json +++ b/assets/schema_analysis_plan.json @@ -12,27 +12,31 @@ "pattern": "^\\S*$", "default": null, "errorMessage": "Integration name cannot contain spaces", - "description": "Integration method name, or empty to match all integrations" + "description": "Integration method name, or empty to match all integrations", + "meta": ["integration"] }, "subset": { "type": "string", "pattern": "^\\S*$", "default": null, "errorMessage": "Subset cannot contain spaces", - "description": "Clustering subset (global or a label value), or empty to match all subsets" + "description": "Clustering subset (global or a label value), or empty to match all subsets", + "meta": ["subset"] }, "resolution": { "type": "number", "minimum": 0, "default": null, - "description": "Leiden resolution, or empty to match all resolutions" + "description": "Leiden resolution, or empty to match all resolutions", + "meta": ["resolution"] }, "analyses": { "type": "string", "pattern": "^(|paga|liana|de|cytetype)(,(paga|liana|de|cytetype))*$", "default": null, "errorMessage": "Analyses must be a comma-separated list of paga, liana, de, and/or cytetype", - "description": "Downstream analyses to run for matching clusterings, or empty to run all analyses" + "description": "Downstream analyses to run for matching clusterings, or empty to run all analyses", + "meta": ["analyses"] } } } diff --git a/nf-test.config b/nf-test.config index 52203d1e..c56bd5d9 100644 --- a/nf-test.config +++ b/nf-test.config @@ -29,6 +29,7 @@ config { 'nf-test.config', 'tests/.nftignore', 'tests/nextflow.config', + 'tests/analysis_plan_extension.csv', ] // load the necessary plugins diff --git a/subworkflows/local/utils_nfcore_scdownstream_pipeline/main.nf b/subworkflows/local/utils_nfcore_scdownstream_pipeline/main.nf index 9e0cff62..4c98fc5c 100644 --- a/subworkflows/local/utils_nfcore_scdownstream_pipeline/main.nf +++ b/subworkflows/local/utils_nfcore_scdownstream_pipeline/main.nf @@ -165,6 +165,7 @@ workflow PIPELINE_COMPLETION { def analysisPlanToList() { params.analysis_plan ? samplesheetToList(params.analysis_plan, "${projectDir}/assets/schema_analysis_plan.json") + .collect { row -> row[0] } : [[integration: null, subset: null, resolution: null, analyses: null]] } diff --git a/tests/analysis_plan_extension.csv b/tests/analysis_plan_extension.csv new file mode 100644 index 00000000..dc39c197 --- /dev/null +++ b/tests/analysis_plan_extension.csv @@ -0,0 +1,2 @@ +integration,subset,resolution,analyses +scvi,global,0.5,"paga,liana,de" diff --git a/tests/main_pipeline_extend.nf.test b/tests/main_pipeline_extend.nf.test index bc311d04..daa0dd57 100644 --- a/tests/main_pipeline_extend.nf.test +++ b/tests/main_pipeline_extend.nf.test @@ -18,6 +18,7 @@ nextflow_pipeline { scvi_model = pipelines_testdata_base_path + 'extension_base/model.pt' symphony_reference = pipelines_testdata_base_path + 'extension_base/symphony_reference.h5ad' base_adata = pipelines_testdata_base_path + 'extension_base/merged.h5ad' + analysis_plan = "${projectDir}/tests/analysis_plan_extension.csv" } } diff --git a/tests/main_pipeline_extend.nf.test.snap b/tests/main_pipeline_extend.nf.test.snap index 400a7ed3..1a1be83b 100644 --- a/tests/main_pipeline_extend.nf.test.snap +++ b/tests/main_pipeline_extend.nf.test.snap @@ -184,18 +184,10 @@ "cluster_dimred/scvi", "cluster_dimred/scvi/entropy", "cluster_dimred/scvi/entropy/scvi-global-0.5_entropy.png", - "cluster_dimred/scvi/entropy/scvi-global-1.0_entropy.png", "cluster_dimred/scvi/leiden", "cluster_dimred/scvi/leiden/scvi-global-0.5_leiden.png", - "cluster_dimred/scvi/leiden/scvi-global-1.0_leiden.png", "cluster_dimred/scvi/umap", "cluster_dimred/symphony", - "cluster_dimred/symphony/entropy", - "cluster_dimred/symphony/entropy/symphony-global-0.5_entropy.png", - "cluster_dimred/symphony/entropy/symphony-global-1.0_entropy.png", - "cluster_dimred/symphony/leiden", - "cluster_dimred/symphony/leiden/symphony-global-0.5_leiden.png", - "cluster_dimred/symphony/leiden/symphony-global-1.0_leiden.png", "cluster_dimred/symphony/umap", "combine", "combine/integrate", @@ -243,7 +235,7 @@ "qc-report.qmd:md5,13061014a897b3fbdafd6ea3212df0e0" ] ], - "timestamp": "2026-05-29T14:41:14.623955124", + "timestamp": "2026-06-02T08:01:50.281977805", "meta": { "nf-test": "0.9.4", "nextflow": "26.04.0"