diff --git a/modules/nf-core/deepmased/features/environment.yml b/modules/nf-core/deepmased/features/environment.yml new file mode 100644 index 000000000000..b39e8d5e9113 --- /dev/null +++ b/modules/nf-core/deepmased/features/environment.yml @@ -0,0 +1,9 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json +channels: + - conda-forge + - bioconda +dependencies: + # renovate: datasource=conda depName=bioconda/deepmased + - bioconda::deepmased=0.3.1 + - conda-forge::setuptools=78.1 diff --git a/modules/nf-core/deepmased/features/main.nf b/modules/nf-core/deepmased/features/main.nf new file mode 100644 index 000000000000..d46ef6181505 --- /dev/null +++ b/modules/nf-core/deepmased/features/main.nf @@ -0,0 +1,43 @@ +process DEEPMASED_FEATURES { + tag "$meta.id" + label 'process_high' + + // WARN: Version information not provided by tool on CLI. Please update version string below when bumping container versions. + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/deepmased:0.3.1--pyh5ca1d4c_0': + 'quay.io/biocontainers/deepmased:0.3.1--pyh5ca1d4c_0' }" + + input: + tuple val(meta), path(bam), path(bai), path(fasta) + + output: + tuple val(meta), path("${prefix}_feature_file_paths.tsv"), path("*_feats.tsv"), emit: features + tuple val("${task.process}"), val('deepmased'), val('0.3.1'), topic: versions, emit: versions_deepmased_features + // WARN: Version information not provided by tool on CLI. Please update this string when bumping container versions. + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + prefix = task.ext.prefix ?: "${meta.id}" + """ + echo -e "bam\\tfasta" > ${prefix}_file_paths.tsv + echo -e "${bam}\\t${fasta}" >> ${prefix}_file_paths.tsv + + DeepMAsED features \\ + ${prefix}_file_paths.tsv \\ + -p ${task.cpus} \\ + -o . \\ + -n ${prefix}_feature_file_paths.tsv \\ + ${args} + """ + + stub: + prefix = task.ext.prefix ?: "${meta.id}" + """ + touch ${prefix}_feature_file_paths.tsv + touch ${prefix}_feats.tsv + """ +} diff --git a/modules/nf-core/deepmased/features/meta.yml b/modules/nf-core/deepmased/features/meta.yml new file mode 100644 index 000000000000..0e2a95cc5fef --- /dev/null +++ b/modules/nf-core/deepmased/features/meta.yml @@ -0,0 +1,88 @@ +name: "deepmased_features" +description: "DeepMAsED features subcommand: extracts alignment-based features from + BAM and assembly FASTA for each contig, producing feature tables used as input for + DeepMAsED predict." +keywords: + - metagenomics + - assembly + - quality control + - error detection + - deep learning + - features +tools: + - "deepmased": + description: "Deep learning for Metagenome Assembly Error Detection" + homepage: "https://github.com/leylabmpi/DeepMAsED" + documentation: "https://github.com/leylabmpi/DeepMAsED" + tool_dev_url: "https://github.com/leylabmpi/DeepMAsED" + doi: "10.1093/bioinformatics/btaa386" + licence: + - "MIT" + identifier: "" +input: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'sample1' ]` + - bam: + type: file + description: Sorted BAM file of reads mapped to the assembly + pattern: "*.{bam}" + ontologies: + - edam: "http://edamontology.org/format_2572" + - bai: + type: file + description: BAM index file + pattern: "*.{bai}" + ontologies: [] + - fasta: + type: file + description: Assembly in FASTA format + pattern: "*.{fasta,fa,fna}" + ontologies: + - edam: "http://edamontology.org/format_1929" +output: + features: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'sample1' ]` + - ${prefix}_feature_file_paths.tsv: + type: file + description: Index file listing all generated feature table files + pattern: "*_feature_file_paths.tsv" + ontologies: + - edam: "http://edamontology.org/format_3475" + - "*_feats.tsv": + type: file + description: Per-contig feature tables (one per parallel bin) + pattern: "*_feats.tsv" + ontologies: + - edam: "http://edamontology.org/format_3475" + versions_deepmased_features: + - - ${task.process}: + type: string + description: The name of the process + - deepmased: + type: string + description: The name of the tool + - 0.3.1: + type: string + description: The expression to obtain the version of the tool +topics: + versions: + - - ${task.process}: + type: string + description: The name of the process + - deepmased: + type: string + description: The name of the tool + - 0.3.1: + type: string + description: The expression to obtain the version of the tool +authors: + - "@SkyLexS" +maintainers: + - "@SkyLexS" diff --git a/modules/nf-core/deepmased/features/tests/main.nf.test b/modules/nf-core/deepmased/features/tests/main.nf.test new file mode 100644 index 000000000000..624b31d2f143 --- /dev/null +++ b/modules/nf-core/deepmased/features/tests/main.nf.test @@ -0,0 +1,66 @@ +// nf-core modules test deepmased/features +nextflow_process { + + name "Test Process DEEPMASED_FEATURES" + script "../main.nf" + process "DEEPMASED_FEATURES" + + tag "modules" + tag "modules_nfcore" + tag "deepmased" + tag "deepmased/features" + tag "deepmased_features" + + test("sarscov2 [fasta] - paired-end sorted bam") { + + when { + process { + """ + input[0] = [ + [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam.bai', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true), + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() }, + { assert path(process.out.features[0][1]).exists() }, + { assert path(process.out.features[0][1]).readLines().size() > 1 } + ) + } + + } + + test("sarscov2 [fasta] - paired-end sorted bam - stub") { + + options "-stub" + + when { + process { + """ + input[0] = [ + [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam.bai', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true), + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + +} diff --git a/modules/nf-core/deepmased/features/tests/main.nf.test.snap b/modules/nf-core/deepmased/features/tests/main.nf.test.snap new file mode 100644 index 000000000000..1bb10f9913e4 --- /dev/null +++ b/modules/nf-core/deepmased/features/tests/main.nf.test.snap @@ -0,0 +1,92 @@ +{ + "sarscov2 [fasta] - paired-end sorted bam": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": false + }, + "test_feature_file_paths.tsv:md5,4eceace937a13c4318294cee72cb4320", + "test.paired_end.sorted_feats.tsv:md5,81bc5e8ac16d47ea7fde1c07a92fbba3" + ] + ], + "1": [ + [ + "DEEPMASED_FEATURES", + "deepmased", + "0.3.1" + ] + ], + "features": [ + [ + { + "id": "test", + "single_end": false + }, + "test_feature_file_paths.tsv:md5,4eceace937a13c4318294cee72cb4320", + "test.paired_end.sorted_feats.tsv:md5,81bc5e8ac16d47ea7fde1c07a92fbba3" + ] + ], + "versions_deepmased_features": [ + [ + "DEEPMASED_FEATURES", + "deepmased", + "0.3.1" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.3" + }, + "timestamp": "2026-05-13T09:59:49.966230332" + }, + "sarscov2 [fasta] - paired-end sorted bam - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": false + }, + "test_feature_file_paths.tsv:md5,d41d8cd98f00b204e9800998ecf8427e", + "test_feats.tsv:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "1": [ + [ + "DEEPMASED_FEATURES", + "deepmased", + "0.3.1" + ] + ], + "features": [ + [ + { + "id": "test", + "single_end": false + }, + "test_feature_file_paths.tsv:md5,d41d8cd98f00b204e9800998ecf8427e", + "test_feats.tsv:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "versions_deepmased_features": [ + [ + "DEEPMASED_FEATURES", + "deepmased", + "0.3.1" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.3" + }, + "timestamp": "2026-05-13T10:04:26.105533827" + } +} \ No newline at end of file diff --git a/modules/nf-core/deepmased/predict/environment.yml b/modules/nf-core/deepmased/predict/environment.yml new file mode 100644 index 000000000000..b39e8d5e9113 --- /dev/null +++ b/modules/nf-core/deepmased/predict/environment.yml @@ -0,0 +1,9 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json +channels: + - conda-forge + - bioconda +dependencies: + # renovate: datasource=conda depName=bioconda/deepmased + - bioconda::deepmased=0.3.1 + - conda-forge::setuptools=78.1 diff --git a/modules/nf-core/deepmased/predict/main.nf b/modules/nf-core/deepmased/predict/main.nf new file mode 100644 index 000000000000..a676cc317a3c --- /dev/null +++ b/modules/nf-core/deepmased/predict/main.nf @@ -0,0 +1,38 @@ +process DEEPMASED_PREDICT { + tag "$meta.id" + label 'process_medium' + + // WARN: Version information not provided by tool on CLI. Please update version string below when bumping container versions. + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/deepmased:0.3.1--pyh5ca1d4c_0': + 'quay.io/biocontainers/deepmased:0.3.1--pyh5ca1d4c_0' }" + + input: + tuple val(meta), path(feature_file_table), path(feature_files) + + output: + tuple val(meta), path("*_deepmased_predictions.tsv"), emit: predictions + tuple val("${task.process}"), val('deepmased'), val('0.3.1'), topic: versions, emit: versions_deepmased_predict + // WARN: Version information not provided by tool on CLI. Please update this string when bumping container versions. + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '--cpu-only' + def prefix = task.ext.prefix ?: "${meta.id}" + """ + DeepMAsED predict \\ + ${feature_file_table} \\ + --n-procs ${task.cpus} \\ + --save-name ${prefix}_deepmased \\ + ${args} + """ + + stub: + def prefix = task.ext.prefix ?: "${meta.id}" + """ + touch ${prefix}_deepmased_predictions.tsv + """ +} diff --git a/modules/nf-core/deepmased/predict/meta.yml b/modules/nf-core/deepmased/predict/meta.yml new file mode 100644 index 000000000000..d70260271298 --- /dev/null +++ b/modules/nf-core/deepmased/predict/meta.yml @@ -0,0 +1,79 @@ +name: "deepmased_predict" +description: "DeepMAsED predict subcommand: runs the pre-trained deep learning model + on feature tables produced by DeepMAsED features to predict per-contig assembly + error scores." +keywords: + - metagenomics + - assembly + - quality control + - error detection + - deep learning + - prediction +tools: + - "deepmased": + description: "Deep learning for Metagenome Assembly Error Detection" + homepage: "https://github.com/leylabmpi/DeepMAsED" + documentation: "https://github.com/leylabmpi/DeepMAsED" + tool_dev_url: "https://github.com/leylabmpi/DeepMAsED" + doi: "10.1093/bioinformatics/btaa386" + licence: + - "MIT" + identifier: "" +input: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'sample1' ]` + - feature_file_table: + type: file + description: Index TSV file listing all feature table files (output of + deepmased/features) + pattern: "*_feature_file_paths.tsv" + ontologies: + - edam: "http://edamontology.org/format_3475" + - feature_files: + type: file + description: Per-contig feature table files (output of deepmased/features) + pattern: "*_feats.tsv" + ontologies: + - edam: "http://edamontology.org/format_3475" +output: + predictions: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'sample1' ]` + - "*_deepmased_predictions.tsv": + type: file + description: TSV file containing per-contig assembly error predictions + (score 0=correct, 1=misassembly) + pattern: "*_deepmased_predictions.tsv" + ontologies: + - edam: "http://edamontology.org/format_3475" + versions_deepmased_predict: + - - ${task.process}: + type: string + description: The name of the process + - deepmased: + type: string + description: The name of the tool + - 0.3.1: + type: string + description: The expression to obtain the version of the tool +topics: + versions: + - - ${task.process}: + type: string + description: The name of the process + - deepmased: + type: string + description: The name of the tool + - 0.3.1: + type: string + description: The expression to obtain the version of the tool +authors: + - "@SkyLexS" +maintainers: + - "@SkyLexS" diff --git a/modules/nf-core/deepmased/predict/tests/main.nf.test b/modules/nf-core/deepmased/predict/tests/main.nf.test new file mode 100644 index 000000000000..88585e9e7563 --- /dev/null +++ b/modules/nf-core/deepmased/predict/tests/main.nf.test @@ -0,0 +1,46 @@ +// nf-core modules test deepmased/predict +nextflow_process { + + name "Test Process DEEPMASED_PREDICT" + script "../main.nf" + process "DEEPMASED_PREDICT" + + tag "modules" + tag "modules_nfcore" + tag "deepmased" + tag "deepmased/features" + tag "deepmased/predict" + tag "deepmased_predict" + + // NOTE: DeepMAsED is designed for metagenome assemblies with sufficient read + // coverage per contig. The standard sarscov2 test data results in 0 contigs + // after coverage filtering, causing predict to crash (IndexError). + // A full integration test requires metagenome BAM + assembly data. + // The stub test below verifies module structure and output file naming. + + test("sarscov2 [fasta] - paired-end sorted bam - stub") { + + options "-stub" + + when { + process { + """ + input[0] = [ + [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam.bai', checkIfExists: true), // dummy feature_file_table + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam', checkIfExists: true), // dummy feature_files + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + +} diff --git a/modules/nf-core/deepmased/predict/tests/main.nf.test.snap b/modules/nf-core/deepmased/predict/tests/main.nf.test.snap new file mode 100644 index 000000000000..986fd80711db --- /dev/null +++ b/modules/nf-core/deepmased/predict/tests/main.nf.test.snap @@ -0,0 +1,45 @@ +{ + "sarscov2 [fasta] - paired-end sorted bam - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": false + }, + "test_deepmased_predictions.tsv:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "1": [ + [ + "DEEPMASED_PREDICT", + "deepmased", + "0.3.1" + ] + ], + "predictions": [ + [ + { + "id": "test", + "single_end": false + }, + "test_deepmased_predictions.tsv:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "versions_deepmased_predict": [ + [ + "DEEPMASED_PREDICT", + "deepmased", + "0.3.1" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.3" + }, + "timestamp": "2026-05-13T10:04:37.332248142" + } +} \ No newline at end of file