diff --git a/README.md b/README.md index 9eb07ccb..b1b3c501 100644 --- a/README.md +++ b/README.md @@ -121,6 +121,7 @@ We thank the following people for their extensive assistance in the development - [Jonathan Talbot-Martin](https://github.com/jtalbotmartin) - [Lukas Heumos](https://github.com/zethson) - [Matiss Ozols](https://github.com/maxozo) +- [Miguel Rosell](https://github.com/miguelrosell) - [Nathan Skene](https://github.com/NathanSkene) - [Nurun Fancy](https://github.com/nfancy) - [Riley Grindle](https://github.com/Riley-Grindle) diff --git a/bin/run_causality.py b/bin/run_causality.py new file mode 100755 index 00000000..cc7b5916 --- /dev/null +++ b/bin/run_causality.py @@ -0,0 +1,51 @@ +#!/usr/bin/env python + +import argparse +import sys +import scanpy as sc +import numpy as np +import pandas as pd + +def parse_args(): + parser = argparse.ArgumentParser(description='Run Causal Inference on Manifold.') + parser.add_argument('--input', required=True, help='Input AnnData file (.h5ad)') + parser.add_argument('--output', required=True, help='Output AnnData file (.h5ad)') + return parser.parse_args() + +def main(): + args = parse_args() + + print(f"Reading input from {args.input}...") + try: + adata = sc.read_h5ad(args.input) + except Exception as e: + sys.exit(f"Error loading AnnData: {e}") + + # Logic: We compute Gene Rank based on centrality in the manifold graph + # This simulates finding "driver genes" + print("Computing causal graph metrics...") + + if 'connectivities' not in adata.obsp: + print("Computing neighbors first...") + sc.pp.neighbors(adata, use_rep='X_pca') + + # Compute PAGA (Partition-based Graph Abstraction) as a proxy for causal trajectory + # This requires clusters. If no clusters, then we cluster first. + if 'leiden' not in adata.obs: + print("Clustering (Leiden) needed for causality inference...") + sc.tl.leiden(adata) + + print("Running PAGA for trajectory inference...") + sc.tl.paga(adata, groups='leiden') + + # Store "causal" results (Pseudotime / PAGA connectivity) + adata.uns['causal_inference'] = { + 'method': 'PAGA_Trajectory', + 'connectivities': adata.uns['paga']['connectivities_tree'] + } + + print(f"Saving results to {args.output}...") + adata.write_h5ad(args.output) + +if __name__ == "__main__": + main() diff --git a/bin/run_diffmap.py b/bin/run_diffmap.py new file mode 100755 index 00000000..0fbae45f --- /dev/null +++ b/bin/run_diffmap.py @@ -0,0 +1,58 @@ +#!/usr/bin/env python + +import argparse +import sys +import scanpy as sc +import pandas as pd +import numpy as np + +def parse_args(): + parser = argparse.ArgumentParser(description='Run Diffusion Maps (Diffmap) on an AnnData object.') + parser.add_argument('--input', required=True, help='Input AnnData file (.h5ad)') + parser.add_argument('--output', required=True, help='Output AnnData file (.h5ad)') + parser.add_argument('--n_neighbors', type=int, default=15, help='Number of nearest neighbors for graph construction') + parser.add_argument('--n_comps', type=int, default=15, help='Number of diffusion components to compute') + return parser.parse_args() + +def main(): + args = parse_args() + + # 1. Load data + print(f"Reading input from {args.input}...") + try: + adata = sc.read_h5ad(args.input) + except Exception as e: + sys.exit(f"Error loading AnnData: {e}") + + # 2. Validation + if 'X_pca' not in adata.obsm: + sys.exit("Error: 'X_pca' not found in adata.obsm. Diffmap requires pre-computed PCA coordinates.") + + # 3. Compute Neighbors + # Diffmap requires a neighborhood graph. We compute it on X_pca. + print(f"Computing neighbors graph (k={args.n_neighbors})...") + sc.pp.neighbors(adata, n_neighbors=args.n_neighbors, use_rep='X_pca') + + # 4. Run diffusion maps + print(f"Running Diffusion Maps with n_comps={args.n_comps}...") + try: + sc.tl.diffmap(adata, n_comps=args.n_comps) + except Exception as e: + sys.exit(f"Error running diffmap: {e}") + + # The result is stored in adata.obsm['X_diffmap'] automatically by scanpy + + # 5. Save output + print(f"Saving results to {args.output}...") + try: + adata.write_h5ad(args.output) + except Exception as e: + sys.exit(f"Error saving AnnData: {e}") + + # 6. Generate versions.yml + import scanpy as s_lib + + print("Diffmap computation completed successfully.") + +if __name__ == "__main__": + main() diff --git a/bin/run_phate.py b/bin/run_phate.py new file mode 100755 index 00000000..f0b8f257 --- /dev/null +++ b/bin/run_phate.py @@ -0,0 +1,81 @@ +#!/usr/bin/env python + +import argparse +import sys +import scanpy as sc +import phate +import pandas as pd +import numpy as np + +def parse_args(): + parser = argparse.ArgumentParser(description='Run PHATE on an AnnData object.') + parser.add_argument('--input', required=True, help='Input AnnData file (.h5ad)') + parser.add_argument('--output', required=True, help='Output AnnData file (.h5ad)') + parser.add_argument('--k', type=int, default=5, help='Number of nearest neighbors') + parser.add_argument('--a', type=float, default=40, help='Alpha decay parameter') + parser.add_argument('--t', type=str, default='auto', help='Diffusion time (int or "auto")') + parser.add_argument('--n_jobs', type=int, default=1, help='Number of threads') + parser.add_argument('--gamma', type=float, default=1, help='Informational distance parameter') + return parser.parse_args() + +def main(): + args = parse_args() + + # 1. Load data + print(f"Reading input from {args.input}...") + try: + adata = sc.read_h5ad(args.input) + except Exception as e: + sys.exit(f"Error loading AnnData: {e}") + + # 2. Validation: Making sure that PCA exists (nf-core/scdownstream standard) + if 'X_pca' not in adata.obsm: + sys.exit("Error: 'X_pca' not found in adata.obsm. PHATE requires pre-computed PCA coordinates.") + + # 3. Prepare parameters + # PHATE requires specific type handling for "auto" + t_param = 'auto' if args.t == 'auto' else int(args.t) + + print(f"Running PHATE with k={args.k}, a={args.a}, t={t_param} on X_pca...") + + # 4. Run PHATE + # We initialize the PHATE operator explicitly + phate_op = phate.PHATE( + n_pca=None, # PCA is already computed + knn=args.k, + decay=args.a, + t=t_param, + gamma=args.gamma, + n_jobs=args.n_jobs, + verbose=1, + random_state=42 # Ensure reproducibility + ) + + # Fit and transform the PCA data + # Note: We pass X_pca directly to avoid recomputation + X_phate = phate_op.fit_transform(adata.obsm['X_pca']) + + # 5. Store results + # Save coordinates in the standard Scanpy slot + adata.obsm['X_phate'] = X_phate + + # Save metadata for reproducibility and downstream usage (e.g. TDA) + adata.uns['phate_params'] = { + 'k': args.k, + 'a': args.a, + 't': t_param, + 'gamma': args.gamma, + 'diff_potential': phate_op.diff_potential # Crucial for Potential Distance + } + + # 6. Save output + print(f"Saving results to {args.output}...") + try: + adata.write_h5ad(args.output) + except Exception as e: + sys.exit(f"Error saving AnnData: {e}") + + print("PHATE computation completed successfully.") + +if __name__ == "__main__": + main() diff --git a/bin/run_topology.py b/bin/run_topology.py new file mode 100755 index 00000000..9807beb2 --- /dev/null +++ b/bin/run_topology.py @@ -0,0 +1,82 @@ +#!/usr/bin/env python + +import argparse +import sys +import scanpy as sc +import numpy as np + +# Secure import for ripser +try: + from ripser import ripser +except ImportError: + # Fail gracefully if the library is missing + sys.exit("Error: 'ripser' library not found. Please ensure it is installed in the environment.") + +def parse_args(): + parser = argparse.ArgumentParser(description='Run Topological Data Analysis (TDA) using Ripser.') + parser.add_argument('--input', required=True, help='Input AnnData file (.h5ad)') + parser.add_argument('--output', required=True, help='Output AnnData file (.h5ad)') + return parser.parse_args() + +def main(): + args = parse_args() + + # 1. Load Data + print(f"Reading input from {args.input}...") + try: + adata = sc.read_h5ad(args.input) + except Exception as e: + sys.exit(f"Error loading AnnData: {e}") + + # 2. Select Embedding + # Priority: PHATE -> Diffmap -> PCA + # We check which embeddings are available in the object + embedding_key = 'X_phate' + if embedding_key not in adata.obsm: + if 'X_diffmap' in adata.obsm: + embedding_key = 'X_diffmap' + elif 'X_pca' in adata.obsm: + embedding_key = 'X_pca' + else: + sys.exit("Error: No valid embedding found (requires X_phate, X_diffmap, or X_pca).") + + print(f"Running Ripser TDA on {embedding_key}...") + + # 3. Subsampling cause TDA is computationally expensive + # If the dataset is large, we subsample to make sure the pipeline doesn't hang. + matrix = adata.obsm[embedding_key] + if matrix.shape[0] > 1000: + print("Subsampling to 1000 cells for performance...") + # Use random sampling without replacement + idx = np.random.choice(matrix.shape[0], 1000, replace=False) + matrix = matrix[idx, :] + + # 4. Run Persistent Homology + try: + # maxdim=1 computes H0 (connected components) and H1 (loops) + diagrams = ripser(matrix, maxdim=1)['dgms'] + except Exception as e: + sys.exit(f"Error running ripser: {e}") + + # 5. Store results + diagrams_dict = {} + for i, dgm in enumerate(diagrams): + diagrams_dict[f'dim_{i}'] = dgm + + adata.uns['tda_results'] = { + 'max_homology_dim': 1, + 'embedding_used': embedding_key, + 'diagrams': diagrams_dict + } + + # 6. Save output + print(f"Saving results to {args.output}...") + try: + adata.write_h5ad(args.output) + except Exception as e: + sys.exit(f"Error saving h5ad file: {e}") + + print("TDA computation completed successfully.") + +if __name__ == "__main__": + main() diff --git a/conf/modules.config b/conf/modules.config index beae2c2f..2ff8a2a8 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -575,3 +575,43 @@ process { ] } } + +/* +======================================================================================== + MANIFOLD LEARNING MODULES CONFIG +======================================================================================== +*/ + +process { + withName: 'PHATE' { + publishDir = [ + path: { "${params.outdir}/geometry/phate" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + + withName: 'DIFFMAP' { + publishDir = [ + path: { "${params.outdir}/geometry/diffmap" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + + withName: 'TOPOLOGY' { + publishDir = [ + path: { "${params.outdir}/topology" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + + withName: 'CAUSALITY' { + publishDir = [ + path: { "${params.outdir}/causality" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } +} diff --git a/modules/local/causality/main.nf b/modules/local/causality/main.nf new file mode 100644 index 00000000..db7fd39e --- /dev/null +++ b/modules/local/causality/main.nf @@ -0,0 +1,38 @@ +process CAUSALITY { + tag "$meta.id" + label 'process_medium' + + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'docker://miguelrosell/nf-core-manifold:dev' : + 'docker.io/miguelrosell/nf-core-manifold:dev' }" + + input: + tuple val(meta), path(h5ad) + + output: + tuple val(meta), path("*.h5ad"), emit: h5ad + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + + """ + mkdir -p ./tmp + export MPLCONFIGDIR="./tmp" + export NUMBA_CACHE_DIR="./tmp" + + run_causality.py \\ + --input ${h5ad} \\ + --output ${prefix}_causal.h5ad \\ + $args + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + scanpy: \$(python -c "import importlib.metadata; print(importlib.metadata.version('scanpy'))") + END_VERSIONS + """ +} diff --git a/modules/local/causality/tests/main.nf.test b/modules/local/causality/tests/main.nf.test new file mode 100644 index 00000000..a8768c34 --- /dev/null +++ b/modules/local/causality/tests/main.nf.test @@ -0,0 +1,25 @@ +nextflow_process { + + name "Test Process CAUSALITY" + script "../main.nf" + process "CAUSALITY" + + test("Should run successfully on test dataset") { + + when { + process { + """ + input[0] = [ [id:'test_sample'], file("${projectDir}/test_dataset.h5ad") ] + """ + } + } + + then { + assert process.success + assert process.out.h5ad != null + assert snapshot(process.out.versions).match() + } + + } + +} diff --git a/modules/local/causality/tests/main.nf.test.snap b/modules/local/causality/tests/main.nf.test.snap new file mode 100644 index 00000000..a5c61dea --- /dev/null +++ b/modules/local/causality/tests/main.nf.test.snap @@ -0,0 +1,14 @@ +{ + "Should run successfully on test dataset": { + "content": [ + [ + "versions.yml:md5,81746bf5da667501c0bb768f18e30354" + ] + ], + "timestamp": "2026-02-17T01:07:56.818075791", + "meta": { + "nf-test": "0.9.4", + "nextflow": "25.04.7" + } + } +} \ No newline at end of file diff --git a/modules/local/diffmap/main.nf b/modules/local/diffmap/main.nf new file mode 100644 index 00000000..f22f5be0 --- /dev/null +++ b/modules/local/diffmap/main.nf @@ -0,0 +1,40 @@ +process DIFFMAP { + tag "$meta.id" + label 'process_medium' + + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'docker://miguelrosell/nf-core-manifold:dev' : + 'docker.io/miguelrosell/nf-core-manifold:dev' }" + + input: + tuple val(meta), path(h5ad) + + output: + tuple val(meta), path("*.h5ad"), emit: h5ad + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + + """ + mkdir -p ./tmp + export MPLCONFIGDIR="./tmp" + export NUMBA_CACHE_DIR="./tmp" + + run_diffmap.py \\ + --input ${h5ad} \\ + --output ${prefix}_diffmap.h5ad \\ + --n_neighbors 15 \\ + --n_comps 15 \\ + $args + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + scanpy: \$(python -c "import importlib.metadata; print(importlib.metadata.version('scanpy'))") + END_VERSIONS + """ +} diff --git a/modules/local/diffmap/tests/main.nf.test b/modules/local/diffmap/tests/main.nf.test new file mode 100644 index 00000000..c020e565 --- /dev/null +++ b/modules/local/diffmap/tests/main.nf.test @@ -0,0 +1,25 @@ +nextflow_process { + + name "Test Process DIFFMAP" + script "../main.nf" + process "DIFFMAP" + + test("Should run successfully on test dataset") { + + when { + process { + """ + input[0] = [ [id:'test_sample'], file("${projectDir}/test_dataset.h5ad") ] + """ + } + } + + then { + assert process.success + assert process.out.h5ad != null + assert snapshot(process.out.versions).match() + } + + } + +} diff --git a/modules/local/diffmap/tests/main.nf.test.snap b/modules/local/diffmap/tests/main.nf.test.snap new file mode 100644 index 00000000..0b45e616 --- /dev/null +++ b/modules/local/diffmap/tests/main.nf.test.snap @@ -0,0 +1,14 @@ +{ + "Should run successfully on test dataset": { + "content": [ + [ + "versions.yml:md5,577953a482294d2540aeff8a844fab0e" + ] + ], + "timestamp": "2026-02-17T01:08:09.06399458", + "meta": { + "nf-test": "0.9.4", + "nextflow": "25.04.7" + } + } +} \ No newline at end of file diff --git a/modules/local/phate/main.nf b/modules/local/phate/main.nf new file mode 100644 index 00000000..7a178f31 --- /dev/null +++ b/modules/local/phate/main.nf @@ -0,0 +1,40 @@ +process PHATE { + tag "$meta.id" + label 'process_medium' + + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'docker://miguelrosell/nf-core-manifold:dev' : + 'docker.io/miguelrosell/nf-core-manifold:dev' }" + + input: + tuple val(meta), path(h5ad) + + output: + tuple val(meta), path("*.h5ad"), emit: h5ad + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + + """ + mkdir -p ./tmp + export MPLCONFIGDIR="./tmp" + export NUMBA_CACHE_DIR="./tmp" + + run_phate.py \\ + --input ${h5ad} \\ + --output ${prefix}_phate.h5ad \\ + --n_jobs ${task.cpus} \\ + $args + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + phate: \$(python -c "import importlib.metadata; print(importlib.metadata.version('phate'))") + scanpy: \$(python -c "import importlib.metadata; print(importlib.metadata.version('scanpy'))") + END_VERSIONS + """ +} diff --git a/modules/local/phate/tests/main.nf.test b/modules/local/phate/tests/main.nf.test new file mode 100644 index 00000000..2a2aa6b6 --- /dev/null +++ b/modules/local/phate/tests/main.nf.test @@ -0,0 +1,26 @@ +nextflow_process { + + name "Test Process PHATE" + script "../main.nf" + process "PHATE" + + test("Should run successfully on test dataset") { + + when { + process { + """ + // Input: tuple(meta, file) + input[0] = [ [id:'test_sample'], file("${projectDir}/test_dataset.h5ad") ] + """ + } + } + + then { + assert process.success + assert process.out.h5ad != null + assert snapshot(process.out.versions).match() + } + + } + +} diff --git a/modules/local/phate/tests/main.nf.test.snap b/modules/local/phate/tests/main.nf.test.snap new file mode 100644 index 00000000..911bd201 --- /dev/null +++ b/modules/local/phate/tests/main.nf.test.snap @@ -0,0 +1,14 @@ +{ + "Should run successfully on test dataset": { + "content": [ + [ + "versions.yml:md5,1b4ce8e2261f629de5f72baca44de6b5" + ] + ], + "timestamp": "2026-02-17T01:14:59.996355416", + "meta": { + "nf-test": "0.9.4", + "nextflow": "25.04.7" + } + } +} \ No newline at end of file diff --git a/modules/local/soupx/tests/main.nf.test.snap b/modules/local/soupx/tests/main.nf.test.snap index 2a845b44..c7465df9 100644 --- a/modules/local/soupx/tests/main.nf.test.snap +++ b/modules/local/soupx/tests/main.nf.test.snap @@ -26,11 +26,11 @@ ] } ], + "timestamp": "2026-02-28T00:14:22.594548408", "meta": { - "nf-test": "0.9.2", - "nextflow": "25.10.2" - }, - "timestamp": "2026-01-25T17:15:02.795192924" + "nf-test": "0.9.4", + "nextflow": "25.10.4" + } }, "Should run without failures": { "content": [ @@ -39,10 +39,10 @@ ], true ], + "timestamp": "2025-06-07T14:45:52.250170518", "meta": { "nf-test": "0.9.2", "nextflow": "25.04.2" - }, - "timestamp": "2025-06-07T14:45:52.250170518" + } } } \ No newline at end of file diff --git a/modules/local/topology/main.nf b/modules/local/topology/main.nf new file mode 100644 index 00000000..50b7ff33 --- /dev/null +++ b/modules/local/topology/main.nf @@ -0,0 +1,38 @@ +process TOPOLOGY { + tag "$meta.id" + label 'process_medium' + + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'docker://miguelrosell/nf-core-manifold:dev' : + 'docker.io/miguelrosell/nf-core-manifold:dev' }" + + input: + tuple val(meta), path(h5ad) + + output: + tuple val(meta), path("*.h5ad"), emit: h5ad + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + + """ + mkdir -p ./tmp + export MPLCONFIGDIR="./tmp" + export NUMBA_CACHE_DIR="./tmp" + + run_topology.py \\ + --input ${h5ad} \\ + --output ${prefix}_topology.h5ad \\ + $args + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + ripser: \$(python -c "import importlib.metadata; print(importlib.metadata.version('ripser'))") + END_VERSIONS + """ +} diff --git a/modules/local/topology/tests/main.nf.test b/modules/local/topology/tests/main.nf.test new file mode 100644 index 00000000..7ccb0ed9 --- /dev/null +++ b/modules/local/topology/tests/main.nf.test @@ -0,0 +1,25 @@ +nextflow_process { + + name "Test Process TOPOLOGY" + script "../main.nf" + process "TOPOLOGY" + + test("Should run successfully on test dataset") { + + when { + process { + """ + input[0] = [ [id:'test_sample'], file("${projectDir}/test_dataset.h5ad") ] + """ + } + } + + then { + assert process.success + assert process.out.h5ad != null + assert snapshot(process.out.versions).match() + } + + } + +} diff --git a/modules/local/topology/tests/main.nf.test.snap b/modules/local/topology/tests/main.nf.test.snap new file mode 100644 index 00000000..1192f79e --- /dev/null +++ b/modules/local/topology/tests/main.nf.test.snap @@ -0,0 +1,14 @@ +{ + "Should run successfully on test dataset": { + "content": [ + [ + "versions.yml:md5,5c1a5dd0b61b4504c6d1a081c697b5fb" + ] + ], + "timestamp": "2026-02-17T01:08:15.364435999", + "meta": { + "nf-test": "0.9.4", + "nextflow": "25.04.7" + } + } +} \ No newline at end of file diff --git a/nextflow.config b/nextflow.config index bb9bfbd3..078ac965 100644 --- a/nextflow.config +++ b/nextflow.config @@ -118,6 +118,9 @@ params { // Schema validation default options validate_params = true + + // MANIFOLD LEARNING OPTIONS + manifold_methods = 'phate,diffmap' } // Load base.config by default for all pipelines diff --git a/nextflow_schema.json b/nextflow_schema.json index 5157e224..59843f4a 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -303,6 +303,12 @@ "type": "boolean", "description": "Prepare the output for visualisation in cellxgene", "fa_icon": "fas fa-chart-line" + }, + "manifold_methods": { + "type": "string", + "description": "Specify the manifold learning methods to run (e.g., 'phate,diffmap').", + "default": "phate,diffmap", + "fa_icon": "fas fa-project-diagram" } } }, diff --git a/ro-crate-metadata.json b/ro-crate-metadata.json index 0ce2b699..754c848d 100644 --- a/ro-crate-metadata.json +++ b/ro-crate-metadata.json @@ -23,7 +23,7 @@ "@type": "Dataset", "creativeWorkStatus": "InProgress", "datePublished": "2025-11-20T09:32:29+00:00", - "description": "

\n \n \n \"nf-core/scdownstream\"\n \n

\n\n[![Open in GitHub Codespaces](https://img.shields.io/badge/Open_In_GitHub_Codespaces-black?labelColor=grey&logo=github)](https://github.com/codespaces/new/nf-core/scdownstream)\n[![GitHub Actions CI Status](https://github.com/nf-core/scdownstream/actions/workflows/nf-test.yml/badge.svg)](https://github.com/nf-core/scdownstream/actions/workflows/nf-test.yml)\n[![GitHub Actions Linting Status](https://github.com/nf-core/scdownstream/actions/workflows/linting.yml/badge.svg)](https://github.com/nf-core/scdownstream/actions/workflows/linting.yml)[![AWS CI](https://img.shields.io/badge/CI%20tests-full%20size-FF9900?labelColor=000000&logo=Amazon%20AWS)](https://nf-co.re/scdownstream/results)[![Cite with Zenodo](http://img.shields.io/badge/DOI-10.5281/zenodo.XXXXXXX-1073c8?labelColor=000000)](https://doi.org/10.5281/zenodo.XXXXXXX)\n[![nf-test](https://img.shields.io/badge/unit_tests-nf--test-337ab7.svg)](https://www.nf-test.com)\n\n[![Nextflow](https://img.shields.io/badge/version-%E2%89%A525.10.0-green?style=flat&logo=nextflow&logoColor=white&color=%230DC09D&link=https%3A%2F%2Fnextflow.io)](https://www.nextflow.io/)\n[![nf-core template version](https://img.shields.io/badge/nf--core_template-3.5.1-green?style=flat&logo=nfcore&logoColor=white&color=%2324B064&link=https%3A%2F%2Fnf-co.re)](https://github.com/nf-core/tools/releases/tag/3.5.1)\n[![run with conda](http://img.shields.io/badge/run%20with-conda-3EB049?labelColor=000000&logo=anaconda)](https://docs.conda.io/en/latest/)\n[![run with docker](https://img.shields.io/badge/run%20with-docker-0db7ed?labelColor=000000&logo=docker)](https://www.docker.com/)\n[![run with singularity](https://img.shields.io/badge/run%20with-singularity-1d355c.svg?labelColor=000000)](https://sylabs.io/docs/)\n[![Launch on Seqera Platform](https://img.shields.io/badge/Launch%20%F0%9F%9A%80-Seqera%20Platform-%234256e7)](https://cloud.seqera.io/launch?pipeline=https://github.com/nf-core/scdownstream)\n\n[![Get help on Slack](http://img.shields.io/badge/slack-nf--core%20%23scdownstream-4A154B?labelColor=000000&logo=slack)](https://nfcore.slack.com/channels/scdownstream)[![Follow on Bluesky](https://img.shields.io/badge/bluesky-%40nf__core-1185fe?labelColor=000000&logo=bluesky)](https://bsky.app/profile/nf-co.re)[![Follow on Mastodon](https://img.shields.io/badge/mastodon-nf__core-6364ff?labelColor=FFFFFF&logo=mastodon)](https://mstdn.science/@nf_core)[![Watch on YouTube](http://img.shields.io/badge/youtube-nf--core-FF0000?labelColor=000000&logo=youtube)](https://www.youtube.com/c/nf-core)\n\n## Introduction\n\n**nf-core/scdownstream** is a bioinformatics pipeline that can be used to process already quantified single-cell RNA-seq data. It takes a samplesheet and h5ad-, SingleCellExperiment/Seurat- or CSV files as input and performs quality control, integration, dimensionality reduction and clustering. It produces an integrated h5ad and SingleCellExperiment file and an extensive QC report.\n\nThe pipeline is based on the learnings and implementations from the following pipelines (alphabetical):\n\n- [panpipes](https://github.com/DendrouLab/panpipes)\n- [scFlow](https://combiz.github.io/scFlow/)\n- [scRAFIKI](https://github.com/Mye-InfoBank/scRAFIKI)\n- [YASCP](https://github.com/wtsi-hgi/yascp)\n\n# ![nf-core/scdownstream](docs/images/metromap.png)\n\nSteps marked with the boat icon are not yet implemented. For the other steps, the pipeline uses the following tools:\n\n1. Per-sample preprocessing\n 1. Convert all RDS files to h5ad format\n 2. Create filtered matrix (if not provided)\n 3. Present QC for raw counts ([`MultiQC`](http://multiqc.info/))\n 4. Remove ambient RNA\n - [decontX](https://bioconductor.org/packages/release/bioc/html/decontX.html)\n - [soupX](https://cran.r-project.org/web/packages/SoupX/readme/README.html)\n - [cellbender](https://cellbender.readthedocs.io/en/latest/)\n - [scAR](https://docs.scvi-tools.org/en/stable/user_guide/models/scar.html)\n 5. Apply user-defined QC filters (can be defined per sample in the samplesheet)\n 6. Doublet detection (Majority vote possible)\n - [SOLO](https://docs.scvi-tools.org/en/stable/user_guide/models/solo.html)\n - [scrublet](https://scanpy.readthedocs.io/en/stable/api/generated/scanpy.pp.scrublet.html)\n - [DoubletDetection](https://doubletdetection.readthedocs.io/en/v2.5.2/doubletdetection.doubletdetection.html)\n - [SCDS](https://bioconductor.org/packages/devel/bioc/vignettes/scds/inst/doc/scds.html)\n2. Sample aggregation\n 1. Merge into a single h5ad file\n 2. Present QC for merged counts ([`MultiQC`](http://multiqc.info/))\n 3. Integration\n - [scVI](https://docs.scvi-tools.org/en/stable/user_guide/models/scvi.html)\n - [scANVI](https://docs.scvi-tools.org/en/stable/user_guide/models/scanvi.html)\n - [Harmony](https://portals.broadinstitute.org/harmony/articles/quickstart.html)\n - [BBKNN](https://github.com/Teichlab/bbknn)\n - [Combat](https://scanpy.readthedocs.io/en/latest/api/generated/scanpy.pp.combat.html)\n - [Seurat](https://satijalab.org/seurat/articles/integration_introduction)\n3. Cell type annotation\n - [celltypist](https://www.celltypist.org/)\n4. Clustering and dimensionality reduction\n 1. [Leiden clustering](https://scanpy.readthedocs.io/en/stable/generated/scanpy.tl.leiden.html)\n 2. [UMAP](https://scanpy.readthedocs.io/en/stable/generated/scanpy.tl.umap.html)\n5. Create report ([`MultiQC`](http://multiqc.info/))\n\n## Usage\n\n> [!NOTE]\n> If you are new to Nextflow and nf-core, please refer to [this page](https://nf-co.re/docs/usage/installation) on how to set-up Nextflow. Make sure to [test your setup](https://nf-co.re/docs/usage/introduction#how-to-run-a-pipeline) with `-profile test` before running the workflow on actual data.\n\n> [!NOTE]\n> If you are confused by the terms `filtered` and `unfiltered`, please check out the respective [documentation](https://nf-co.re/scdownstream/dev/docs/usage/#filtered-and-unfiltered-matrices).\n\nFirst, prepare a samplesheet with your input data that looks as follows:\n\n```csv title=\"samplesheet.csv\"\nsample,unfiltered\nsample1,/absolute/path/to/sample1.h5ad\nsample2,/absolute/path/to/sample3.h5\nsample3,relative/path/to/sample2.rds\nsample4,/absolute/path/to/sample3.csv\n```\n\nEach entry represents a h5ad, h5, RDS or CSV file. RDS files may contain any object that can be converted to a SingleCellExperiment using the [Seurat `as.SingleCellExperiment`](https://satijalab.org/seurat/reference/as.singlecellexperiment) function.\nCSV files should contain a matrix with genes as columns and cells as rows. The first column should contain cell names/barcodes.\n\n-->\n\nNow, you can run the pipeline using:\n\n```bash\nnextflow run nf-core/scdownstream \\\n -profile \\\n --input samplesheet.csv \\\n --outdir \n```\n\n> [!WARNING]\n> Please provide pipeline parameters via the CLI or Nextflow `-params-file` option. Custom config files including those provided by the `-c` Nextflow option can be used to provide any configuration _**except for parameters**_; see [docs](https://nf-co.re/docs/usage/getting_started/configuration#custom-configuration-files).\n\nFor more details and further functionality, please refer to the [usage documentation](https://nf-co.re/scdownstream/usage) and the [parameter documentation](https://nf-co.re/scdownstream/parameters).\n\n## Pipeline output\n\nTo see the results of an example test run with a full size dataset refer to the [results](https://nf-co.re/scdownstream/results) tab on the nf-core website pipeline page.\nFor more details about the output files and reports, please refer to the\n[output documentation](https://nf-co.re/scdownstream/output).\n\n## Credits\n\nnf-core/scdownstream was originally written by [Nico Trummer](https://github.com/nictru).\n\nWe thank the following people for their extensive assistance in the development of this pipeline (alphabetical):\n\n- [Fabian Rost](https://github.com/fbnrst)\n- [Fabiola Curion](https://github.com/bio-la)\n- [Gregor Sturm](https://github.com/grst)\n- [Jonathan Talbot-Martin](https://github.com/jtalbotmartin)\n- [Lukas Heumos](https://github.com/zethson)\n- [Matiss Ozols](https://github.com/maxozo)\n- [Nathan Skene](https://github.com/NathanSkene)\n- [Nurun Fancy](https://github.com/nfancy)\n- [Riley Grindle](https://github.com/Riley-Grindle)\n- [Ryan Seaman](https://github.com/RPSeaman)\n- [Steffen M\u00f6ller](https://github.com/smoe)\n- [Wojtek Sowinski](https://github.com/WojtekSowinski)\n\n## Contributions and Support\n\nIf you would like to contribute to this pipeline, please see the [contributing guidelines](.github/CONTRIBUTING.md).\n\nFor further information or help, don't hesitate to get in touch on the [Slack `#scdownstream` channel](https://nfcore.slack.com/channels/scdownstream) (you can join with [this invite](https://nf-co.re/join/slack)).\n\n## Citations\n\n\n\n\n\n\nAn extensive list of references for the tools used by the pipeline can be found in the [`CITATIONS.md`](CITATIONS.md) file.\n\nYou can cite the `nf-core` publication as follows:\n\n> **The nf-core framework for community-curated bioinformatics pipelines.**\n>\n> Philip Ewels, Alexander Peltzer, Sven Fillinger, Harshil Patel, Johannes Alneberg, Andreas Wilm, Maxime Ulysse Garcia, Paolo Di Tommaso & Sven Nahnsen.\n>\n> _Nat Biotechnol._ 2020 Feb 13. doi: [10.1038/s41587-020-0439-x](https://dx.doi.org/10.1038/s41587-020-0439-x).\n", + "description": "

\n \n \n \"nf-core/scdownstream\"\n \n

\n\n[![Open in GitHub Codespaces](https://img.shields.io/badge/Open_In_GitHub_Codespaces-black?labelColor=grey&logo=github)](https://github.com/codespaces/new/nf-core/scdownstream)\n[![GitHub Actions CI Status](https://github.com/nf-core/scdownstream/actions/workflows/nf-test.yml/badge.svg)](https://github.com/nf-core/scdownstream/actions/workflows/nf-test.yml)\n[![GitHub Actions Linting Status](https://github.com/nf-core/scdownstream/actions/workflows/linting.yml/badge.svg)](https://github.com/nf-core/scdownstream/actions/workflows/linting.yml)[![AWS CI](https://img.shields.io/badge/CI%20tests-full%20size-FF9900?labelColor=000000&logo=Amazon%20AWS)](https://nf-co.re/scdownstream/results)[![Cite with Zenodo](http://img.shields.io/badge/DOI-10.5281/zenodo.XXXXXXX-1073c8?labelColor=000000)](https://doi.org/10.5281/zenodo.XXXXXXX)\n[![nf-test](https://img.shields.io/badge/unit_tests-nf--test-337ab7.svg)](https://www.nf-test.com)\n\n[![Nextflow](https://img.shields.io/badge/version-%E2%89%A525.10.0-green?style=flat&logo=nextflow&logoColor=white&color=%230DC09D&link=https%3A%2F%2Fnextflow.io)](https://www.nextflow.io/)\n[![nf-core template version](https://img.shields.io/badge/nf--core_template-3.5.1-green?style=flat&logo=nfcore&logoColor=white&color=%2324B064&link=https%3A%2F%2Fnf-co.re)](https://github.com/nf-core/tools/releases/tag/3.5.1)\n[![run with conda](http://img.shields.io/badge/run%20with-conda-3EB049?labelColor=000000&logo=anaconda)](https://docs.conda.io/en/latest/)\n[![run with docker](https://img.shields.io/badge/run%20with-docker-0db7ed?labelColor=000000&logo=docker)](https://www.docker.com/)\n[![run with singularity](https://img.shields.io/badge/run%20with-singularity-1d355c.svg?labelColor=000000)](https://sylabs.io/docs/)\n[![Launch on Seqera Platform](https://img.shields.io/badge/Launch%20%F0%9F%9A%80-Seqera%20Platform-%234256e7)](https://cloud.seqera.io/launch?pipeline=https://github.com/nf-core/scdownstream)\n\n[![Get help on Slack](http://img.shields.io/badge/slack-nf--core%20%23scdownstream-4A154B?labelColor=000000&logo=slack)](https://nfcore.slack.com/channels/scdownstream)[![Follow on Bluesky](https://img.shields.io/badge/bluesky-%40nf__core-1185fe?labelColor=000000&logo=bluesky)](https://bsky.app/profile/nf-co.re)[![Follow on Mastodon](https://img.shields.io/badge/mastodon-nf__core-6364ff?labelColor=FFFFFF&logo=mastodon)](https://mstdn.science/@nf_core)[![Watch on YouTube](http://img.shields.io/badge/youtube-nf--core-FF0000?labelColor=000000&logo=youtube)](https://www.youtube.com/c/nf-core)\n\n## Introduction\n\n**nf-core/scdownstream** is a bioinformatics pipeline that can be used to process already quantified single-cell RNA-seq data. It takes a samplesheet and h5ad-, SingleCellExperiment/Seurat- or CSV files as input and performs quality control, integration, dimensionality reduction and clustering. It produces an integrated h5ad and SingleCellExperiment file and an extensive QC report.\n\nThe pipeline is based on the learnings and implementations from the following pipelines (alphabetical):\n\n- [panpipes](https://github.com/DendrouLab/panpipes)\n- [scFlow](https://combiz.github.io/scFlow/)\n- [scRAFIKI](https://github.com/Mye-InfoBank/scRAFIKI)\n- [YASCP](https://github.com/wtsi-hgi/yascp)\n\n# ![nf-core/scdownstream](docs/images/metromap.png)\n\nSteps marked with the boat icon are not yet implemented. For the other steps, the pipeline uses the following tools:\n\n1. Per-sample preprocessing\n 1. Convert all RDS files to h5ad format\n 2. Create filtered matrix (if not provided)\n 3. Present QC for raw counts ([`MultiQC`](http://multiqc.info/))\n 4. Remove ambient RNA\n - [decontX](https://bioconductor.org/packages/release/bioc/html/decontX.html)\n - [soupX](https://cran.r-project.org/web/packages/SoupX/readme/README.html)\n - [cellbender](https://cellbender.readthedocs.io/en/latest/)\n - [scAR](https://docs.scvi-tools.org/en/stable/user_guide/models/scar.html)\n 5. Apply user-defined QC filters (can be defined per sample in the samplesheet)\n 6. Doublet detection (Majority vote possible)\n - [SOLO](https://docs.scvi-tools.org/en/stable/user_guide/models/solo.html)\n - [scrublet](https://scanpy.readthedocs.io/en/stable/api/generated/scanpy.pp.scrublet.html)\n - [DoubletDetection](https://doubletdetection.readthedocs.io/en/v2.5.2/doubletdetection.doubletdetection.html)\n - [SCDS](https://bioconductor.org/packages/devel/bioc/vignettes/scds/inst/doc/scds.html)\n2. Sample aggregation\n 1. Merge into a single h5ad file\n 2. Present QC for merged counts ([`MultiQC`](http://multiqc.info/))\n 3. Integration\n - [scVI](https://docs.scvi-tools.org/en/stable/user_guide/models/scvi.html)\n - [scANVI](https://docs.scvi-tools.org/en/stable/user_guide/models/scanvi.html)\n - [Harmony](https://portals.broadinstitute.org/harmony/articles/quickstart.html)\n - [BBKNN](https://github.com/Teichlab/bbknn)\n - [Combat](https://scanpy.readthedocs.io/en/latest/api/generated/scanpy.pp.combat.html)\n - [Seurat](https://satijalab.org/seurat/articles/integration_introduction)\n3. Cell type annotation\n - [celltypist](https://www.celltypist.org/)\n4. Clustering and dimensionality reduction\n 1. [Leiden clustering](https://scanpy.readthedocs.io/en/stable/generated/scanpy.tl.leiden.html)\n 2. [UMAP](https://scanpy.readthedocs.io/en/stable/generated/scanpy.tl.umap.html)\n5. Create report ([`MultiQC`](http://multiqc.info/))\n\n## Usage\n\n> [!NOTE]\n> If you are new to Nextflow and nf-core, please refer to [this page](https://nf-co.re/docs/usage/installation) on how to set-up Nextflow. Make sure to [test your setup](https://nf-co.re/docs/usage/introduction#how-to-run-a-pipeline) with `-profile test` before running the workflow on actual data.\n\n> [!NOTE]\n> If you are confused by the terms `filtered` and `unfiltered`, please check out the respective [documentation](https://nf-co.re/scdownstream/dev/docs/usage/#filtered-and-unfiltered-matrices).\n\nFirst, prepare a samplesheet with your input data that looks as follows:\n\n```csv title=\"samplesheet.csv\"\nsample,unfiltered\nsample1,/absolute/path/to/sample1.h5ad\nsample2,/absolute/path/to/sample3.h5\nsample3,relative/path/to/sample2.rds\nsample4,/absolute/path/to/sample3.csv\n```\n\nEach entry represents a h5ad, h5, RDS or CSV file. RDS files may contain any object that can be converted to a SingleCellExperiment using the [Seurat `as.SingleCellExperiment`](https://satijalab.org/seurat/reference/as.singlecellexperiment) function.\nCSV files should contain a matrix with genes as columns and cells as rows. The first column should contain cell names/barcodes.\n\n-->\n\nNow, you can run the pipeline using:\n\n```bash\nnextflow run nf-core/scdownstream \\\n -profile \\\n --input samplesheet.csv \\\n --outdir \n```\n\n> [!WARNING]\n> Please provide pipeline parameters via the CLI or Nextflow `-params-file` option. Custom config files including those provided by the `-c` Nextflow option can be used to provide any configuration _**except for parameters**_; see [docs](https://nf-co.re/docs/usage/getting_started/configuration#custom-configuration-files).\n\nFor more details and further functionality, please refer to the [usage documentation](https://nf-co.re/scdownstream/usage) and the [parameter documentation](https://nf-co.re/scdownstream/parameters).\n\n## Pipeline output\n\nTo see the results of an example test run with a full size dataset refer to the [results](https://nf-co.re/scdownstream/results) tab on the nf-core website pipeline page.\nFor more details about the output files and reports, please refer to the\n[output documentation](https://nf-co.re/scdownstream/output).\n\n## Credits\n\nnf-core/scdownstream was originally written by [Nico Trummer](https://github.com/nictru).\n\nWe thank the following people for their extensive assistance in the development of this pipeline (alphabetical):\n\n- [Fabian Rost](https://github.com/fbnrst)\n- [Fabiola Curion](https://github.com/bio-la)\n- [Gregor Sturm](https://github.com/grst)\n- [Jonathan Talbot-Martin](https://github.com/jtalbotmartin)\n- [Lukas Heumos](https://github.com/zethson)\n- [Matiss Ozols](https://github.com/maxozo)\n- [Miguel Rosell](https://github.com/miguelrosell)\n- [Nathan Skene](https://github.com/NathanSkene)\n- [Nurun Fancy](https://github.com/nfancy)\n- [Riley Grindle](https://github.com/Riley-Grindle)\n- [Ryan Seaman](https://github.com/RPSeaman)\n- [Steffen M\u00f6ller](https://github.com/smoe)\n- [Wojtek Sowinski](https://github.com/WojtekSowinski)\n\n## Contributions and Support\n\nIf you would like to contribute to this pipeline, please see the [contributing guidelines](.github/CONTRIBUTING.md).\n\nFor further information or help, don't hesitate to get in touch on the [Slack `#scdownstream` channel](https://nfcore.slack.com/channels/scdownstream) (you can join with [this invite](https://nf-co.re/join/slack)).\n\n## Citations\n\n\n\n\n\n\nAn extensive list of references for the tools used by the pipeline can be found in the [`CITATIONS.md`](CITATIONS.md) file.\n\nYou can cite the `nf-core` publication as follows:\n\n> **The nf-core framework for community-curated bioinformatics pipelines.**\n>\n> Philip Ewels, Alexander Peltzer, Sven Fillinger, Harshil Patel, Johannes Alneberg, Andreas Wilm, Maxime Ulysse Garcia, Paolo Di Tommaso & Sven Nahnsen.\n>\n> _Nat Biotechnol._ 2020 Feb 13. doi: [10.1038/s41587-020-0439-x](https://dx.doi.org/10.1038/s41587-020-0439-x).\n", "hasPart": [ { "@id": "main.nf" diff --git a/subworkflows/local/manifold_learning/main.nf b/subworkflows/local/manifold_learning/main.nf new file mode 100644 index 00000000..f5b96477 --- /dev/null +++ b/subworkflows/local/manifold_learning/main.nf @@ -0,0 +1,61 @@ +// subworkflows/local/manifold_learning/main.nf + +include { PHATE } from '../../../modules/local/phate/main' +include { DIFFMAP } from '../../../modules/local/diffmap/main' +include { TOPOLOGY } from '../../../modules/local/topology/main' +include { CAUSALITY } from '../../../modules/local/causality/main' + +workflow MANIFOLD_LEARNING { + + take: + ch_h5ad // Channel: [ val(meta), path(h5ad) ] + methods // Value: String (e.g. "phate,diffmap") + + main: + ch_versions = Channel.empty() + ch_outputs = Channel.empty() + + // ------------------------------------------------ + // 1. GEOMETRY STEP (PHATE / DIFFMAP) + // ------------------------------------------------ + + ch_geometry_out = Channel.empty() + + // Run PHATE if requested + if ( methods.toString().toLowerCase().contains('phate') ) { + PHATE ( ch_h5ad ) + ch_geometry_out = ch_geometry_out.mix( PHATE.out.h5ad ) + ch_versions = ch_versions.mix( PHATE.out.versions ) + } + + // Run DIFFMAP if requested + if ( methods.toString().toLowerCase().contains('diffmap') ) { + DIFFMAP ( ch_h5ad ) + ch_geometry_out = ch_geometry_out.mix( DIFFMAP.out.h5ad ) + ch_versions = ch_versions.mix( DIFFMAP.out.versions ) + } + + // If no geometry method ran (user error?), pass input through (fallback) + // But ideally, we continue with the output of geometry + + // ------------------------------------------------ + // 2. TOPOLOGY & CAUSALITY STEPS + // ------------------------------------------------ + // We run these on the output of the Geometry step. + // Since PHATE/DIFFMAP output separate files, Topology/Causality will run for each. + + // Run Topology + TOPOLOGY ( ch_geometry_out ) + ch_versions = ch_versions.mix( TOPOLOGY.out.versions ) + + // Run Causality (Using Topology output to chain the information) + CAUSALITY ( TOPOLOGY.out.h5ad ) + ch_versions = ch_versions.mix( CAUSALITY.out.versions ) + + // Collect final outputs + ch_outputs = CAUSALITY.out.h5ad + + emit: + h5ad = ch_outputs + versions = ch_versions +} diff --git a/subworkflows/local/manifold_learning/tests/main.nf.test b/subworkflows/local/manifold_learning/tests/main.nf.test new file mode 100644 index 00000000..049804c8 --- /dev/null +++ b/subworkflows/local/manifold_learning/tests/main.nf.test @@ -0,0 +1,26 @@ +nextflow_workflow { + + name "Test Subworkflow MANIFOLD_LEARNING" + script "../main.nf" + workflow "MANIFOLD_LEARNING" + + test("Should run successfully on test dataset") { + + when { + workflow { + """ + input[0] = Channel.of([ [id:'test_sample'], file("${projectDir}/test_dataset.h5ad") ]) + input[1] = "phate,diffmap" + """ + } + } + + then { + assert workflow.success + assert workflow.out.h5ad != null + assert snapshot(workflow.out.versions).match() + } + + } + +} diff --git a/subworkflows/local/manifold_learning/tests/main.nf.test.snap b/subworkflows/local/manifold_learning/tests/main.nf.test.snap new file mode 100644 index 00000000..8901a1a8 --- /dev/null +++ b/subworkflows/local/manifold_learning/tests/main.nf.test.snap @@ -0,0 +1,19 @@ +{ + "Should run successfully on test dataset": { + "content": [ + [ + "versions.yml:md5,2739ae12e44ad2c9e6a1ccf1e10a7e53", + "versions.yml:md5,7ff083f7fa9bed2b0e51b5e8e0dfcca6", + "versions.yml:md5,7ff083f7fa9bed2b0e51b5e8e0dfcca6", + "versions.yml:md5,8959b9f7eac3b55c99ffe18910dd4a44", + "versions.yml:md5,de9d8e4ffb9637560e57ca638f3a7a6e", + "versions.yml:md5,de9d8e4ffb9637560e57ca638f3a7a6e" + ] + ], + "timestamp": "2026-02-28T00:42:47.298251606", + "meta": { + "nf-test": "0.9.4", + "nextflow": "25.10.4" + } + } +} \ No newline at end of file diff --git a/test.config b/test.config new file mode 100644 index 00000000..835fb1a6 --- /dev/null +++ b/test.config @@ -0,0 +1 @@ +process { withLabel: process_medium { cpus = 1; memory = '2GB' } } diff --git a/test_dataset.h5ad b/test_dataset.h5ad new file mode 100644 index 00000000..9b87350a Binary files /dev/null and b/test_dataset.h5ad differ diff --git a/workflows/scdownstream.nf b/workflows/scdownstream.nf index f9dd9220..232f8ec6 100644 --- a/workflows/scdownstream.nf +++ b/workflows/scdownstream.nf @@ -4,21 +4,22 @@ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ -include { LOAD_H5AD } from '../subworkflows/local/load_h5ad' -include { QUALITY_CONTROL } from '../subworkflows/local/quality_control' -include { CELLTYPE_ASSIGNMENT } from '../subworkflows/local/celltype_assignment' +include { LOAD_H5AD } from '../subworkflows/local/load_h5ad' +include { QUALITY_CONTROL } from '../subworkflows/local/quality_control' +include { CELLTYPE_ASSIGNMENT } from '../subworkflows/local/celltype_assignment' include { ADATA_EXTEND as FINALIZE_QC_ANNDATAS } from '../modules/local/adata/extend' -include { COMBINE } from '../subworkflows/local/combine' -include { ADATA_SPLITEMBEDDINGS } from '../modules/local/adata/splitembeddings' -include { CLUSTER } from '../subworkflows/local/cluster' -include { PSEUDOBULKING } from '../subworkflows/local/pseudobulking' -include { PER_GROUP } from '../subworkflows/local/per_group' -include { FINALIZE } from '../subworkflows/local/finalize' -include { MULTIQC } from '../modules/nf-core/multiqc/main' -include { paramsSummaryMap } from 'plugin/nf-schema' -include { paramsSummaryMultiqc } from '../subworkflows/nf-core/utils_nfcore_pipeline' -include { softwareVersionsToYAML } from '../subworkflows/nf-core/utils_nfcore_pipeline' -include { methodsDescriptionText } from '../subworkflows/local/utils_nfcore_scdownstream_pipeline' +include { COMBINE } from '../subworkflows/local/combine' +include { ADATA_SPLITEMBEDDINGS } from '../modules/local/adata/splitembeddings' +include { CLUSTER } from '../subworkflows/local/cluster' +include { PSEUDOBULKING } from '../subworkflows/local/pseudobulking' +include { PER_GROUP } from '../subworkflows/local/per_group' +include { FINALIZE } from '../subworkflows/local/finalize' +include { MULTIQC } from '../modules/nf-core/multiqc/main' +include { paramsSummaryMap } from 'plugin/nf-schema' +include { paramsSummaryMultiqc } from '../subworkflows/nf-core/utils_nfcore_pipeline' +include { softwareVersionsToYAML } from '../subworkflows/nf-core/utils_nfcore_pipeline' +include { methodsDescriptionText } from '../subworkflows/local/utils_nfcore_scdownstream_pipeline' +include { MANIFOLD_LEARNING } from '../subworkflows/local/manifold_learning' /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -30,45 +31,45 @@ workflow SCDOWNSTREAM { take: ch_samplesheet // channel: samplesheet read in from --input ch_base // channel: [ val(meta), path(h5ad) ] - is_extension // value: boolean - ch_input // file: samplesheet.csv - ambient_correction // value: string - ambient_corrected_integration // value: boolean - doublet_detection // value: string - doublet_detection_threshold // value: integer - scvi_max_epochs // value: integer - mito_genes // value: string - sample_n // value: string - sample_fraction // value: string - qc_only // value: boolean - celldex_reference // value: string - celltypist_model // value: string - unify_gene_symbols // value: boolean - duplicate_var_resolution // value: string - aggregate_isoforms // value: boolean - integration_hvgs // value: integer - integration_methods // value: string - integration_excluded_genes // value: string - scvi_model // value: string - scanvi_model // value: string - scvi_categorical_covariates // value: string - scvi_continuous_covariates // value: string - scimilarity_model // value: string - skip_liana // value: boolean - skip_rankgenesgroups // value: boolean - base_embeddings // value: string - base_label_col // value: string - cluster_per_label // value: boolean - cluster_global // value: boolean - clustering_resolutions // value: string - pseudobulk // value: boolean - pseudobulk_groupby_labels // value: string - pseudobulk_min_num_cells // value: integer - prep_cellxgene // value: boolean - outdir // value: string - multiqc_config // value: string - multiqc_logo // value: string - multiqc_methods_description // value: string + is_extension // value: boolean + ch_input // file: samplesheet.csv + ambient_correction // value: string + ambient_corrected_integration // value: boolean + doublet_detection // value: string + doublet_detection_threshold // value: integer + scvi_max_epochs // value: integer + mito_genes // value: string + sample_n // value: string + sample_fraction // value: string + qc_only // value: boolean + celldex_reference // value: string + celltypist_model // value: string + unify_gene_symbols // value: boolean + duplicate_var_resolution // value: string + aggregate_isoforms // value: boolean + integration_hvgs // value: integer + integration_methods // value: string + integration_excluded_genes // value: string + scvi_model // value: string + scanvi_model // value: string + scvi_categorical_covariates // value: string + scvi_continuous_covariates // value: string + scimilarity_model // value: string + skip_liana // value: boolean + skip_rankgenesgroups // value: boolean + base_embeddings // value: string + base_label_col // value: string + cluster_per_label // value: boolean + cluster_global // value: boolean + clustering_resolutions // value: string + pseudobulk // value: boolean + pseudobulk_groupby_labels // value: string + pseudobulk_min_num_cells // value: integer + prep_cellxgene // value: boolean + outdir // value: string + multiqc_config // value: string + multiqc_logo // value: string + multiqc_methods_description // value: string main: @@ -278,6 +279,13 @@ workflow SCDOWNSTREAM { prep_cellxgene ) ch_versions = ch_versions.mix(FINALIZE.out.versions) + + // + // MANIFOLD_LEARNING ( + // FINALIZE.out.h5ad, // we use the h5ad from the previous process + // params.manifold_methods // 'phate,diffmap' (defined in nextflow.config) + // ) + // ch_versions = ch_versions.mix(MANIFOLD_LEARNING.out.versions) } //