From 6851bf644257eb51e33514e68caedfe7fb6d0d3a Mon Sep 17 00:00:00 2001 From: LiaOb21 Date: Fri, 20 Mar 2026 16:56:31 +0000 Subject: [PATCH 1/5] masurca started --- modules/nf-core/masurca/environment.yml | 10 + modules/nf-core/masurca/main.nf | 176 ++++++++++ modules/nf-core/masurca/meta.yml | 77 +++++ modules/nf-core/masurca/tests/main.nf.test | 309 ++++++++++++++++++ .../nf-core/masurca/tests/main.nf.test.snap | 160 +++++++++ 5 files changed, 732 insertions(+) create mode 100644 modules/nf-core/masurca/environment.yml create mode 100644 modules/nf-core/masurca/main.nf create mode 100644 modules/nf-core/masurca/meta.yml create mode 100644 modules/nf-core/masurca/tests/main.nf.test create mode 100644 modules/nf-core/masurca/tests/main.nf.test.snap diff --git a/modules/nf-core/masurca/environment.yml b/modules/nf-core/masurca/environment.yml new file mode 100644 index 000000000000..c949d090cb48 --- /dev/null +++ b/modules/nf-core/masurca/environment.yml @@ -0,0 +1,10 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json +channels: + - conda-forge + - bioconda +dependencies: + # TODO nf-core: List required Conda package(s). + # Software MUST be pinned to channel (i.e. "bioconda"), version (i.e. "1.10"). + # For Conda, the build (i.e. "h9402c20_2") must be EXCLUDED to support installation on different operating systems. + - "bioconda::masurca=4.1.4" diff --git a/modules/nf-core/masurca/main.nf b/modules/nf-core/masurca/main.nf new file mode 100644 index 000000000000..b34ea3eeb8b7 --- /dev/null +++ b/modules/nf-core/masurca/main.nf @@ -0,0 +1,176 @@ +// TODO nf-core: If in doubt look at other nf-core/modules to see how we are doing things! :) +// https://github.com/nf-core/modules/tree/master/modules/nf-core/ +// You can also ask for help via your pull request or on the #modules channel on the nf-core Slack workspace: +// https://nf-co.re/join +// TODO nf-core: A module file SHOULD only define input and output files as command-line parameters. +// All other parameters MUST be provided using the "task.ext" directive, see here: +// https://www.nextflow.io/docs/latest/process.html#ext +// where "task.ext" is a string. +// Any parameters that need to be evaluated in the context of a particular sample +// e.g. single-end/paired-end data MUST also be defined and evaluated appropriately. +// TODO nf-core: Software that can be piped together SHOULD be added to separate module files +// unless there is a run-time, storage advantage in implementing in this way +// e.g. it's ok to have a single module for bwa to output BAM instead of SAM: +// bwa mem | samtools view -B -T ref.fasta +// TODO nf-core: Optional inputs are not currently supported by Nextflow. However, using an empty +// list (`[]`) instead of a file can be used to work around this issue. + +process MASURCA { + tag "$meta.id" + label 'process_high' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/cf/cf6402ed20c3b089ab88cd8884ddace90693501453a515f9188ae681e8ca8556/data': + 'community.wave.seqera.io/library/masurca:4.1.4--d05ef74c4881d55c' }" + + input: + tuple val(meta), path(illumina), path(jump), path(pacbio), path(nanopore), path(other_reads), path(reference_genome) + val fragment_mean + val fragment_stdev + val jump_mean + val jump_stdev + + + output: + tuple val(meta), path("${prefix}/assemble.sh") , emit: script + tuple val(meta), path("${prefix}/CA*/final.genome.scf.fasta"), optional: true, emit: scaffolds + tuple val(meta), path("${prefix}/CA*/final.genome.ctg.fasta"), optional: true, emit: contigs + tuple val(meta), path("${prefix}/flye/assembly.fasta") , optional: true, emit: flye_assembly + tuple val(meta), path("${prefix}/*_masurca_config.txt") , emit: config + tuple val("${task.process}"), val('masurca'), eval("masurca --version | sed 's/version //g'"), topic: versions, emit: versions_masurca + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + + //get input reads with absolute paths - illumina are mandatory, jump/pacbio/nanopore are optional + def illumina_reads = meta.single_end ? "\$(readlink -f ${illumina})" : "\$(readlink -f ${illumina[0]}) \$(readlink -f ${illumina[1]})" + def jump_reads = jump ? "\$(readlink -f ${jump[0]}) \$(readlink -f ${jump[1]})" : "" + def pacbio_file = pacbio ? "\$(readlink -f ${pacbio})" : "" + def nanopore_file = nanopore ? "\$(readlink -f ${nanopore})" : "" + def other_reads_file = other_reads ? "\$(readlink -f ${other_reads})" : "" + def reference_genome_file = reference_genome ? "\$(readlink -f ${reference_genome})" : "" + + // Configuration parameters with defaults from task.ext + def extend_jump_reads = task.ext.extend_jump_reads != null ? task.ext.extend_jump_reads : 0 + def graph_kmer_size = task.ext.graph_kmer_size ?: 'auto' + def use_linking_mates = task.ext.use_linking_mates != null ? task.ext.use_linking_mates : 0 + def lhe_coverage = task.ext.lhe_coverage ?: 25 + def mega_reads_one_pass = task.ext.mega_reads_one_pass != null ? task.ext.mega_reads_one_pass : 0 + def limit_jump_coverage = task.ext.limit_jump_coverage ?: 300 + def ca_parameters = task.ext.ca_parameters ?: 'cgwErrorRate=0.15' + def close_gaps = task.ext.close_gaps != null ? task.ext.close_gaps : 1 + def jf_size = task.ext.jf_size ?: 200000000 + def soap_assembly = task.ext.soap_assembly != null ? task.ext.soap_assembly : 0 + def flye_assembly = task.ext.flye_assembly != null ? task.ext.flye_assembly : 0 + """ + echo "DATA" > ${prefix}_masurca_config.txt + echo "#Illumina paired end reads supplied as " >> ${prefix}_masurca_config.txt + echo "#if single-end, do not specify " >> ${prefix}_masurca_config.txt + echo "#MUST HAVE Illumina paired end reads to use MaSuRCA" >> ${prefix}_masurca_config.txt + echo "PE= pe ${fragment_mean} ${fragment_stdev} ${illumina_reads}" >> ${prefix}_masurca_config.txt + + # Jump/mate pair reads (optional) + if [ -n "${jump_reads}" ]; then + echo "#Illumina mate pair reads supplied as " >> ${prefix}_masurca_config.txt + echo "JUMP= sh ${jump_mean} ${jump_stdev} ${jump_reads}" >> ${prefix}_masurca_config.txt + fi + + # PacBio and Nanopore reads handling + # If both exist, concatenate them and supply as NANOPORE (per MaSuRCA docs) + if [ -n "${pacbio_file}" ] && [ -n "${nanopore_file}" ]; then + echo "#if you have both PacBio and Nanopore, supply both as NANOPORE type" >> ${prefix}_masurca_config.txt + cat ${pacbio_file} ${nanopore_file} > ${prefix}_long_reads.fastq.gz + echo "NANOPORE=\$(readlink -f ${prefix}_long_reads.fastq.gz)" >> ${prefix}_masurca_config.txt + elif [ -n "${pacbio_file}" ]; then + echo "#PacBio/CCS reads must be in a single fasta or fastq file with absolute path" >> ${prefix}_masurca_config.txt + echo "PACBIO=${pacbio_file}" >> ${prefix}_masurca_config.txt + elif [ -n "${nanopore_file}" ]; then + echo "#Nanopore reads must be in a single fasta or fastq file with absolute path" >> ${prefix}_masurca_config.txt + echo "NANOPORE=${nanopore_file}" >> ${prefix}_masurca_config.txt + fi + + # Other reads (optional) - Sanger, 454, etc. + if [ -n "${other_reads_file}" ]; then + echo "#Other reads (Sanger, 454, etc) one frg file, concatenate your frg files into one if you have many" >> ${prefix}_masurca_config.txt + echo "OTHER=${other_reads_file}" >> ${prefix}_masurca_config.txt + fi + + # Reference genome (optional) - for synteny-assisted assembly + if [ -n "${reference_genome_file}" ]; then + echo "#synteny-assisted assembly, concatenate all reference genomes into one reference.fa; works for Illumina-only data" >> ${prefix}_masurca_config.txt + echo "REFERENCE=${reference_genome_file}" >> ${prefix}_masurca_config.txt + fi + + echo "END" >> ${prefix}_masurca_config.txt + + + echo "" >> ${prefix}_masurca_config.txt + echo "PARAMETERS" >> ${prefix}_masurca_config.txt + echo "#set this to 1 if your Illumina jumping library reads are shorter than 100bp" >> ${prefix}_masurca_config.txt + echo "EXTEND_JUMP_READS=${extend_jump_reads}" >> ${prefix}_masurca_config.txt + echo "#this is k-mer size for deBruijn graph values between 25 and 127 are supported, auto will compute the optimal size based on the read data and GC content" >> ${prefix}_masurca_config.txt + echo "GRAPH_KMER_SIZE = ${graph_kmer_size}" >> ${prefix}_masurca_config.txt + echo "#set this to 1 for all Illumina-only assemblies" >> ${prefix}_masurca_config.txt + echo "#set this to 0 if you have more than 15x coverage by long reads (Pacbio or Nanopore) or any other long reads/mate pairs (Illumina MP, Sanger, 454, etc)" >> ${prefix}_masurca_config.txt + echo "USE_LINKING_MATES = ${use_linking_mates}" >> ${prefix}_masurca_config.txt + echo "#use at most this much coverage by the longest Pacbio or Nanopore reads, discard the rest of the reads" >> ${prefix}_masurca_config.txt + echo "#can increase this to 30 or 35 if your reads are short (N50<7000bp)" >> ${prefix}_masurca_config.txt + echo "LHE_COVERAGE=${lhe_coverage}" >> ${prefix}_masurca_config.txt + echo "#set to 0 (default) to do two passes of mega-reads for slower, but higher quality assembly, otherwise set to 1" >> ${prefix}_masurca_config.txt + echo "MEGA_READS_ONE_PASS=${mega_reads_one_pass}" >> ${prefix}_masurca_config.txt + echo "#this parameter is useful if you have too many Illumina jumping library mates. Typically set it to 60 for bacteria and 300 for the other organisms" >> ${prefix}_masurca_config.txt + echo "LIMIT_JUMP_COVERAGE = ${limit_jump_coverage}" >> ${prefix}_masurca_config.txt + echo "#these are the additional parameters to Celera Assembler. do not worry about performance, number or processors or batch sizes -- these are computed automatically." >> ${prefix}_masurca_config.txt + echo "#CABOG ASSEMBLY ONLY: set cgwErrorRate=0.25 for bacteria and 0.1<=cgwErrorRate<=0.15 for other organisms." >> ${prefix}_masurca_config.txt + echo "CA_PARAMETERS = ${ca_parameters}" >> ${prefix}_masurca_config.txt + echo "#CABOG ASSEMBLY ONLY: whether to attempt to close gaps in scaffolds with Illumina or long read data" >> ${prefix}_masurca_config.txt + echo "CLOSE_GAPS=${close_gaps}" >> ${prefix}_masurca_config.txt + echo "#number of cpus to use, set this to the number of CPUs/threads per node you will be using" >> ${prefix}_masurca_config.txt + echo "NUM_THREADS = ${task.cpus}" >> ${prefix}_masurca_config.txt + echo "#this is mandatory jellyfish hash size -- a safe value is estimated_genome_size*20" >> ${prefix}_masurca_config.txt + echo "JF_SIZE = ${jf_size}" >> ${prefix}_masurca_config.txt + echo "#ILLUMINA ONLY. Set this to 1 to use SOAPdenovo contigging/scaffolding module." >> ${prefix}_masurca_config.txt + echo "#Assembly will be worse but will run faster. Useful for very large (>=8Gbp) genomes from Illumina-only data" >> ${prefix}_masurca_config.txt + echo "SOAP_ASSEMBLY=${soap_assembly}" >> ${prefix}_masurca_config.txt + echo "#If you are doing Hybrid Illumina paired end + Nanopore/PacBio assembly ONLY (no Illumina mate pairs or OTHER frg files)." >> ${prefix}_masurca_config.txt + echo "#Set this to 1 to use Flye assembler for final assembly of corrected mega-reads." >> ${prefix}_masurca_config.txt + echo "#A lot faster than CABOG, AND QUALITY IS THE SAME OR BETTER." >> ${prefix}_masurca_config.txt + echo "#Works well even when MEGA_READS_ONE_PASS is set to 1." >> ${prefix}_masurca_config.txt + echo "#DO NOT use if you have less than 15x coverage by long reads." >> ${prefix}_masurca_config.txt + echo "FLYE_ASSEMBLY=${flye_assembly}" >> ${prefix}_masurca_config.txt + echo "END" >> ${prefix}_masurca_config.txt + + # Generate assembly script + masurca ${prefix}_masurca_config.txt + + # Create output directory and move files + mkdir -p ${prefix} + mv assemble.sh ${prefix}/ + mv ${prefix}_masurca_config.txt ${prefix}/ + chmod +x ${prefix}/assemble.sh + + # Run the assembly + cd ${prefix} + ./assemble.sh + """ + + stub: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + """ + echo $args + + mkdir -p ${prefix}/CA + mkdir -p ${prefix}/flye + touch ${prefix}/assemble.sh + touch ${prefix}/${prefix}_masurca_config.txt + touch ${prefix}/CA/final.genome.scf.fasta + touch ${prefix}/CA/final.genome.ctg.fasta + touch ${prefix}/flye/assembly.fasta + """ +} diff --git a/modules/nf-core/masurca/meta.yml b/modules/nf-core/masurca/meta.yml new file mode 100644 index 000000000000..45a72409d119 --- /dev/null +++ b/modules/nf-core/masurca/meta.yml @@ -0,0 +1,77 @@ +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/meta-schema.json +# # TODO nf-core: Add a description of the module and list keywords +name: "masurca" +description: write your description here +keywords: + - sort + - example + - genomics +tools: + ## TODO nf-core: Add a description and other details for the software below + - "masurca": + description: "MaSuRCA (Maryland Super-Read Celera Assembler) genome assembly software." + homepage: "https://github.com/alekseyzimin/masurca/blob/v4.1.4/README.md" + documentation: "https://github.com/alekseyzimin/masurca/blob/v4.1.4/README.md" + tool_dev_url: "https://github.com/alekseyzimin/masurca" + doi: "" + licence: ["GPL v3"] + identifier: biotools:masurca + +input: + ### TODO nf-core: Add a description of all of the variables used as input + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'sample1' ]` + - bam: + type: file + description: Sorted BAM/CRAM/SAM file + pattern: "*.{bam,cram,sam}" + ontologies: + - edam: "http://edamontology.org/format_2572" # BAM + - edam: "http://edamontology.org/format_2573" # CRAM + - edam: "http://edamontology.org/format_3462" # SAM + +output: + ### TODO nf-core: Add a description of all of the variables used as output + bam: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'sample1' ]` + - "*.bam": + type: file + description: Sorted BAM/CRAM/SAM file + pattern: "*.{bam,cram,sam}" + ontologies: + - edam: "http://edamontology.org/format_2572" # BAM + - edam: "http://edamontology.org/format_2573" # CRAM + - edam: "http://edamontology.org/format_3462" # SAM + versions_masurca: + - - "${task.process}": + type: string + description: The name of the process + - "masurca": + type: string + description: The name of the tool + - "masurca --version": + type: eval + description: The expression to obtain the version of the tool + +topics: + versions: + - - ${task.process}: + type: string + description: The name of the process + - masurca: + type: string + description: The name of the tool + - masurca --version: + type: eval + description: The expression to obtain the version of the tool +authors: + - "@LiaOb21" +maintainers: + - "@LiaOb21" diff --git a/modules/nf-core/masurca/tests/main.nf.test b/modules/nf-core/masurca/tests/main.nf.test new file mode 100644 index 000000000000..d6719d65029b --- /dev/null +++ b/modules/nf-core/masurca/tests/main.nf.test @@ -0,0 +1,309 @@ +nextflow_process { + + name "Test Process MASURCA" + script "../main.nf" + process "MASURCA" + + tag "modules" + tag "modules_nfcore" + tag "masurca" + + test("sarscov2 - illumina - single_end") { + + when { + process { + """ + input[0] = [ + [ id:'test', single_end:true ], + file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true), + [], // no jump reads + [], // no pacbio + [], // no nanopore + [], // no other reads + [] // no reference genome + ] + input[1] = 500 // fragment_mean + input[2] = 50 // fragment_stdev + input[3] = 0 // jump_mean (not used) + input[4] = 0 // jump_stdev (not used) + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + + test("sarscov2 - illumina - paired_end") { + + when { + process { + """ + input[0] = [ + [ id:'test', single_end:false ], + [ + file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true), + file(params.test_data['sarscov2']['illumina']['test_2_fastq_gz'], checkIfExists: true) + ], + [], // no jump reads + [], // no pacbio + [], // no nanopore + [], // no other reads + [] // no reference genome + ] + input[1] = 500 // fragment_mean + input[2] = 50 // fragment_stdev + input[3] = 0 // jump_mean (not used) + input[4] = 0 // jump_stdev (not used) + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot( + process.out.script, + process.out.config, + process.out.versions + ).match() } + ) + } + + } + + test("sarscov2 - illumina - paired_end - with_jump") { + + when { + process { + """ + input[0] = [ + [ id:'test', single_end:false ], + [ + file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true), + file(params.test_data['sarscov2']['illumina']['test_2_fastq_gz'], checkIfExists: true) + ], + [ + file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true), + file(params.test_data['sarscov2']['illumina']['test_2_fastq_gz'], checkIfExists: true) + ], + [], // no pacbio + [], // no nanopore + [], // no other reads + [] // no reference genome + ] + input[1] = 500 // fragment_mean + input[2] = 50 // fragment_stdev + input[3] = 3600 // jump_mean + input[4] = 200 // jump_stdev + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot( + process.out.script, + process.out.config, + process.out.versions + ).match() } + ) + } + + } + + test("sarscov2 - hybrid - illumina_pacbio") { + + when { + process { + """ + input[0] = [ + [ id:'test', single_end:false ], + [ + file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true), + file(params.test_data['sarscov2']['illumina']['test_2_fastq_gz'], checkIfExists: true) + ], + [], // no jump reads + file(params.test_data['sarscov2']['pacbio']['test_fastq_gz'], checkIfExists: true), + [], // no nanopore + [], // no other reads + [] // no reference genome + ] + input[1] = 500 // fragment_mean + input[2] = 50 // fragment_stdev + input[3] = 0 // jump_mean (not used) + input[4] = 0 // jump_stdev (not used) + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot( + process.out.script, + process.out.config, + process.out.versions + ).match() } + ) + } + + } + + test("sarscov2 - hybrid - illumina_nanopore") { + + when { + process { + """ + input[0] = [ + [ id:'test', single_end:false ], + [ + file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true), + file(params.test_data['sarscov2']['illumina']['test_2_fastq_gz'], checkIfExists: true) + ], + [], // no jump reads + [], // no pacbio + file(params.test_data['sarscov2']['nanopore']['test_fastq_gz'], checkIfExists: true), + [], // no other reads + [] // no reference genome + ] + input[1] = 500 // fragment_mean + input[2] = 50 // fragment_stdev + input[3] = 0 // jump_mean (not used) + input[4] = 0 // jump_stdev (not used) + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot( + process.out.script, + process.out.config, + process.out.versions + ).match() } + ) + } + + } + + test("sarscov2 - hybrid - illumina_pacbio_nanopore") { + + when { + process { + """ + input[0] = [ + [ id:'test', single_end:false ], + [ + file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true), + file(params.test_data['sarscov2']['illumina']['test_2_fastq_gz'], checkIfExists: true) + ], + [], // no jump reads + file(params.test_data['sarscov2']['pacbio']['test_fastq_gz'], checkIfExists: true), + file(params.test_data['sarscov2']['nanopore']['test_fastq_gz'], checkIfExists: true), + [], // no other reads + [] // no reference genome + ] + input[1] = 500 // fragment_mean + input[2] = 50 // fragment_stdev + input[3] = 0 // jump_mean (not used) + input[4] = 0 // jump_stdev (not used) + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot( + process.out.script, + process.out.config, + process.out.versions + ).match() } + ) + } + + } + + test("sarscov2 - illumina - with_reference") { + + when { + process { + """ + input[0] = [ + [ id:'test', single_end:false ], + [ + file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true), + file(params.test_data['sarscov2']['illumina']['test_2_fastq_gz'], checkIfExists: true) + ], + [], // no jump reads + [], // no pacbio + [], // no nanopore + [], // no other reads + file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true) + ] + input[1] = 500 // fragment_mean + input[2] = 50 // fragment_stdev + input[3] = 0 // jump_mean (not used) + input[4] = 0 // jump_stdev (not used) + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot( + process.out.script, + process.out.config, + process.out.versions + ).match() } + ) + } + + } + + test("sarscov2 - illumina - paired_end - stub") { + + options "-stub" + + when { + process { + """ + input[0] = [ + [ id:'test', single_end:false ], + [ + file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true), + file(params.test_data['sarscov2']['illumina']['test_2_fastq_gz'], checkIfExists: true) + ], + [], // no jump reads + [], // no pacbio + [], // no nanopore + [], // no other reads + [] // no reference genome + ] + input[1] = 500 // fragment_mean + input[2] = 50 // fragment_stdev + input[3] = 0 // jump_mean (not used) + input[4] = 0 // jump_stdev (not used) + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + +} \ No newline at end of file diff --git a/modules/nf-core/masurca/tests/main.nf.test.snap b/modules/nf-core/masurca/tests/main.nf.test.snap new file mode 100644 index 000000000000..daf30de492c0 --- /dev/null +++ b/modules/nf-core/masurca/tests/main.nf.test.snap @@ -0,0 +1,160 @@ +{ + "sarscov2 - illumina - paired_end - with_jump": { + "content": [ + [ + + ], + [ + + ], + null + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.3" + }, + "timestamp": "2026-03-20T16:53:33.598889584" + }, + "sarscov2 - hybrid - illumina_nanopore": { + "content": [ + [ + + ], + [ + + ], + null + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.3" + }, + "timestamp": "2026-03-20T16:53:55.13953957" + }, + "sarscov2 - illumina - single_end": { + "content": [ + { + "0": [ + + ], + "1": [ + + ], + "2": [ + + ], + "3": [ + + ], + "4": [ + + ], + "5": [ + + ], + "config": [ + + ], + "contigs": [ + + ], + "flye_assembly": [ + + ], + "scaffolds": [ + + ], + "script": [ + + ], + "versions_masurca": [ + + ] + } + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.3" + }, + "timestamp": "2026-03-20T16:53:14.230197251" + }, + "sarscov2 - illumina - paired_end": { + "content": [ + [ + + ], + [ + + ], + null + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.3" + }, + "timestamp": "2026-03-20T16:53:21.827256455" + }, + "sarscov2 - illumina - paired_end - stub": { + "content": [ + { + "0": [ + + ], + "1": [ + + ], + "2": [ + + ], + "3": [ + + ], + "4": [ + + ], + "5": [ + + ], + "config": [ + + ], + "contigs": [ + + ], + "flye_assembly": [ + + ], + "scaffolds": [ + + ], + "script": [ + + ], + "versions_masurca": [ + + ] + } + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.3" + }, + "timestamp": "2026-03-20T16:54:17.392502942" + }, + "sarscov2 - illumina - with_reference": { + "content": [ + [ + + ], + [ + + ], + null + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.3" + }, + "timestamp": "2026-03-20T16:54:09.994377364" + } +} \ No newline at end of file From 40c1b3baf47b949f91d824b62263026093072294 Mon Sep 17 00:00:00 2001 From: LiaOb21 Date: Mon, 23 Mar 2026 17:30:08 +0000 Subject: [PATCH 2/5] still working on it --- modules/nf-core/masurca/main.nf | 19 +- modules/nf-core/masurca/tests/main.nf.test | 345 +++++++++--------- .../nf-core/masurca/tests/main.nf.test.snap | 95 +++-- 3 files changed, 253 insertions(+), 206 deletions(-) diff --git a/modules/nf-core/masurca/main.nf b/modules/nf-core/masurca/main.nf index b34ea3eeb8b7..d3887a32fb7c 100644 --- a/modules/nf-core/masurca/main.nf +++ b/modules/nf-core/masurca/main.nf @@ -33,11 +33,12 @@ process MASURCA { output: - tuple val(meta), path("${prefix}/assemble.sh") , emit: script - tuple val(meta), path("${prefix}/CA*/final.genome.scf.fasta"), optional: true, emit: scaffolds - tuple val(meta), path("${prefix}/CA*/final.genome.ctg.fasta"), optional: true, emit: contigs - tuple val(meta), path("${prefix}/flye/assembly.fasta") , optional: true, emit: flye_assembly - tuple val(meta), path("${prefix}/*_masurca_config.txt") , emit: config + tuple val(meta), path("assemble.sh") , emit: script + tuple val(meta), path("CA*/final.genome.scf.fasta"), optional: true, emit: scaffolds + tuple val(meta), path("CA*/final.genome.ctg.fasta"), optional: true, emit: contigs + tuple val(meta), path("flye/assembly.fasta") , optional: true, emit: flye_assembly + tuple val(meta), path("*_masurca_config.txt") , emit: config + tuple val(meta), path("*-masurca.log") , emit: log tuple val("${task.process}"), val('masurca'), eval("masurca --version | sed 's/version //g'"), topic: versions, emit: versions_masurca when: @@ -48,7 +49,7 @@ process MASURCA { def prefix = task.ext.prefix ?: "${meta.id}" //get input reads with absolute paths - illumina are mandatory, jump/pacbio/nanopore are optional - def illumina_reads = meta.single_end ? "\$(readlink -f ${illumina})" : "\$(readlink -f ${illumina[0]}) \$(readlink -f ${illumina[1]})" + def illumina_reads = [illumina].flatten().join(' ') def jump_reads = jump ? "\$(readlink -f ${jump[0]}) \$(readlink -f ${jump[1]})" : "" def pacbio_file = pacbio ? "\$(readlink -f ${pacbio})" : "" def nanopore_file = nanopore ? "\$(readlink -f ${nanopore})" : "" @@ -156,15 +157,13 @@ process MASURCA { # Run the assembly cd ${prefix} - ./assemble.sh + ./assemble.sh > ${prefix}-masurca.log 2>&1 """ stub: def args = task.ext.args ?: '' def prefix = task.ext.prefix ?: "${meta.id}" - """ - echo $args - + """ mkdir -p ${prefix}/CA mkdir -p ${prefix}/flye touch ${prefix}/assemble.sh diff --git a/modules/nf-core/masurca/tests/main.nf.test b/modules/nf-core/masurca/tests/main.nf.test index d6719d65029b..a892883025c0 100644 --- a/modules/nf-core/masurca/tests/main.nf.test +++ b/modules/nf-core/masurca/tests/main.nf.test @@ -15,7 +15,7 @@ nextflow_process { """ input[0] = [ [ id:'test', single_end:true ], - file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true), + file(params.modules_testdata_base_path + "genomics/sarscov2/illumina/fastq/test_interleaved.fastq.gz", checkIfExists: true) [], // no jump reads [], // no pacbio [], // no nanopore @@ -30,133 +30,22 @@ nextflow_process { } } - then { - assertAll( - { assert process.success }, - { assert snapshot(process.out).match() } - ) - } - - } - - test("sarscov2 - illumina - paired_end") { - - when { - process { - """ - input[0] = [ - [ id:'test', single_end:false ], - [ - file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true), - file(params.test_data['sarscov2']['illumina']['test_2_fastq_gz'], checkIfExists: true) - ], - [], // no jump reads - [], // no pacbio - [], // no nanopore - [], // no other reads - [] // no reference genome - ] - input[1] = 500 // fragment_mean - input[2] = 50 // fragment_stdev - input[3] = 0 // jump_mean (not used) - input[4] = 0 // jump_stdev (not used) - """ - } - } - - then { - assertAll( - { assert process.success }, - { assert snapshot( - process.out.script, - process.out.config, - process.out.versions - ).match() } - ) - } - - } - - test("sarscov2 - illumina - paired_end - with_jump") { - - when { - process { - """ - input[0] = [ - [ id:'test', single_end:false ], - [ - file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true), - file(params.test_data['sarscov2']['illumina']['test_2_fastq_gz'], checkIfExists: true) - ], - [ - file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true), - file(params.test_data['sarscov2']['illumina']['test_2_fastq_gz'], checkIfExists: true) - ], - [], // no pacbio - [], // no nanopore - [], // no other reads - [] // no reference genome - ] - input[1] = 500 // fragment_mean - input[2] = 50 // fragment_stdev - input[3] = 3600 // jump_mean - input[4] = 200 // jump_stdev - """ - } - } - - then { - assertAll( - { assert process.success }, - { assert snapshot( - process.out.script, - process.out.config, - process.out.versions - ).match() } - ) - } - - } - - test("sarscov2 - hybrid - illumina_pacbio") { - - when { - process { - """ - input[0] = [ - [ id:'test', single_end:false ], - [ - file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true), - file(params.test_data['sarscov2']['illumina']['test_2_fastq_gz'], checkIfExists: true) - ], - [], // no jump reads - file(params.test_data['sarscov2']['pacbio']['test_fastq_gz'], checkIfExists: true), - [], // no nanopore - [], // no other reads - [] // no reference genome - ] - input[1] = 500 // fragment_mean - input[2] = 50 // fragment_stdev - input[3] = 0 // jump_mean (not used) - input[4] = 0 // jump_stdev (not used) - """ - } - } - then { assertAll( { assert process.success }, { assert snapshot( + process.out.contigs, + process.out.scaffolds, process.out.script, - process.out.config, - process.out.versions + process.out.log, + process.out.versions_masurca ).match() } ) } } - test("sarscov2 - hybrid - illumina_nanopore") { + test("sarscov2 - illumina - paired_end") { when { process { @@ -164,12 +53,12 @@ nextflow_process { input[0] = [ [ id:'test', single_end:false ], [ - file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true), - file(params.test_data['sarscov2']['illumina']['test_2_fastq_gz'], checkIfExists: true) + file(params.modules_testdata_base_path + "genomics/sarscov2/illumina/fastq/test_1.fastq.gz", checkIfExists: true), + file(params.modules_testdata_base_path + "genomics/sarscov2/illumina/fastq/test_2.fastq.gz", checkIfExists: true) ], [], // no jump reads [], // no pacbio - file(params.test_data['sarscov2']['nanopore']['test_fastq_gz'], checkIfExists: true), + [], // no nanopore [], // no other reads [] // no reference genome ] @@ -185,52 +74,175 @@ nextflow_process { assertAll( { assert process.success }, { assert snapshot( + process.out.contigs, + process.out.scaffolds, process.out.script, - process.out.config, - process.out.versions + process.out.log, + process.out.versions_masurca ).match() } ) } } - test("sarscov2 - hybrid - illumina_pacbio_nanopore") { - - when { - process { - """ - input[0] = [ - [ id:'test', single_end:false ], - [ - file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true), - file(params.test_data['sarscov2']['illumina']['test_2_fastq_gz'], checkIfExists: true) - ], - [], // no jump reads - file(params.test_data['sarscov2']['pacbio']['test_fastq_gz'], checkIfExists: true), - file(params.test_data['sarscov2']['nanopore']['test_fastq_gz'], checkIfExists: true), - [], // no other reads - [] // no reference genome - ] - input[1] = 500 // fragment_mean - input[2] = 50 // fragment_stdev - input[3] = 0 // jump_mean (not used) - input[4] = 0 // jump_stdev (not used) - """ - } - } - - then { - assertAll( - { assert process.success }, - { assert snapshot( - process.out.script, - process.out.config, - process.out.versions - ).match() } - ) - } - - } +// test("sarscov2 - illumina - paired_end - with_jump") { +// +// when { +// process { +// """ +// input[0] = [ +// [ id:'test', single_end:false ], +// [ +// file(params.modules_testdata_base_path + "genomics/sarscov2/illumina/fastq/test_1.fastq.gz", checkIfExists: true), +// file(params.modules_testdata_base_path + "genomics/sarscov2/illumina/fastq/test_2.fastq.gz", checkIfExists: true) +// ], +// [ +// file(params.modules_testdata_base_path + "genomics/sarscov2/illumina/fastq/test2_1.fastq.gz", checkIfExists: true), +// file(params.modules_testdata_base_path + "genomics/sarscov2/illumina/fastq/test2_2.fastq.gz", checkIfExists: true) +// ], +// [], // no pacbio +// [], // no nanopore +// [], // no other reads +// [] // no reference genome +// ] +// input[1] = 500 // fragment_mean +// input[2] = 50 // fragment_stdev +// input[3] = 3600 // jump_mean +// input[4] = 200 // jump_stdev +// """ +// } +// } +// +// then { +// assertAll( +// { assert process.success }, +// { assert snapshot( +// process.out.contigs, +// process.out.scaffolds, +// process.out.script, +// process.out.log, +// process.out.versions_masurca +// ).match() } +// ) +// } +// +// } +// +// test("sarscov2 - hybrid - illumina_pacbio") { +// +// when { +// process { +// """ +// input[0] = [ +// [ id:'test', single_end:false ], +// [ +// file(params.modules_testdata_base_path + "genomics/homo_sapiens/illumina/fastq/test_1.fastq.gz", checkIfExists: true), +// file(params.modules_testdata_base_path + "genomics/homo_sapiens/illumina/fastq/test_2.fastq.gz", checkIfExists: true) +// ], +// [], // no jump reads +// file(params.modules_testdata_base_path + "genomics/homo_sapiens/pacbio/fastq/test_hifi.fastq.gz", checkIfExists: true), +// [], // no nanopore +// [], // no other reads +// [] // no reference genome +// ] +// input[1] = 500 // fragment_mean +// input[2] = 50 // fragment_stdev +// input[3] = 0 // jump_mean (not used) +// input[4] = 0 // jump_stdev (not used) +// """ +// } +// } +// +// then { +// assertAll( +// { assert process.success }, +// { assert snapshot( +// process.out.flye_assembly, +// process.out.script, +// process.out.log, +// process.out.versions_masurca +// ).match() } +// ) +// } +// +// } +// +// test("sarscov2 - hybrid - illumina_nanopore") { +// +// when { +// process { +// """ +// input[0] = [ +// [ id:'test', single_end:false ], +// [ +// file(params.modules_testdata_base_path + "genomics/sarscov2/illumina/fastq/test_1.fastq.gz", checkIfExists: true), +// file(params.modules_testdata_base_path + "genomics/sarscov2/illumina/fastq/test_2.fastq.gz", checkIfExists: true) +// ], +// [], // no jump reads +// [], // no pacbio +// file(params.modules_testdata_base_path + "genomics/sarscov2/nanopore/fastq/test.fastq.gz", checkIfExists: true), +// [], // no other reads +// [] // no reference genome +// ] +// input[1] = 500 // fragment_mean +// input[2] = 50 // fragment_stdev +// input[3] = 0 // jump_mean (not used) +// input[4] = 0 // jump_stdev (not used) +// """ +// } +// } +// +// then { +// assertAll( +// { assert process.success }, +// { assert snapshot( +// process.out.flye_assembly, +// process.out.script, +// process.out.log, +// process.out.versions_masurca +// ).match() } +// ) +// } +// +// } +// +// test("homo_sapiens - hybrid - illumina_pacbio_nanopore") { +// +// when { +// process { +// """ +// input[0] = [ +// [ id:'test', single_end:false ], +// [ +// file(params.modules_testdata_base_path + "genomics/homo_sapiens/illumina/fastq/test_1.fastq.gz", checkIfExists: true), +// file(params.modules_testdata_base_path + "genomics/homo_sapiens/illumina/fastq/test_2.fastq.gz", checkIfExists: true) +// ], +// [], // no jump reads +// file(params.modules_testdata_base_path + "genomics/homo_sapiens/illumina/fastq/test_hifi.fastq.gz", checkIfExists: true), +// file(params.modules_testdata_base_path + "genomics/homo_sapiens/illumina/fastq/test_hifi.fastq.gz", checkIfExists: true), +// [], // no other reads +// [] // no reference genome +// ] +// input[1] = 500 // fragment_mean +// input[2] = 50 // fragment_stdev +// input[3] = 0 // jump_mean (not used) +// input[4] = 0 // jump_stdev (not used) +// """ +// } +// } +// +// then { +// assertAll( +// { assert snapshot( +// process.out.flye_assembly, +// process.out.script, +// process.out.log, +// process.out.versions_masurca +// ).match() } +// ) +// } +// +// } test("sarscov2 - illumina - with_reference") { @@ -240,14 +252,14 @@ nextflow_process { input[0] = [ [ id:'test', single_end:false ], [ - file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true), - file(params.test_data['sarscov2']['illumina']['test_2_fastq_gz'], checkIfExists: true) + file(params.modules_testdata_base_path + "genomics/homo_sapiens/illumina/fastq/test_1.fastq.gz", checkIfExists: true), + file(params.modules_testdata_base_path + "genomics/homo_sapiens/illumina/fastq/test_2.fastq.gz", checkIfExists: true) ], [], // no jump reads [], // no pacbio [], // no nanopore [], // no other reads - file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true) + file(params.modules_testdata_base_path + "genomics/homo_sapiens/genome/genome.fasta.gz", checkIfExists: true) ] input[1] = 500 // fragment_mean input[2] = 50 // fragment_stdev @@ -259,11 +271,12 @@ nextflow_process { then { assertAll( - { assert process.success }, { assert snapshot( + process.out.contigs, + process.out.scaffolds, process.out.script, - process.out.config, - process.out.versions + process.out.log, + process.out.versions_masurca ).match() } ) } @@ -300,7 +313,7 @@ nextflow_process { then { assertAll( { assert process.success }, - { assert snapshot(process.out).match() } + { assert snapshot(sanitizeOutput(process.out)).match() } ) } diff --git a/modules/nf-core/masurca/tests/main.nf.test.snap b/modules/nf-core/masurca/tests/main.nf.test.snap index daf30de492c0..ad490f7042ca 100644 --- a/modules/nf-core/masurca/tests/main.nf.test.snap +++ b/modules/nf-core/masurca/tests/main.nf.test.snap @@ -7,13 +7,21 @@ [ ], - null + [ + + ], + [ + + ], + [ + + ] ], "meta": { "nf-test": "0.9.3", "nextflow": "25.10.3" }, - "timestamp": "2026-03-20T16:53:33.598889584" + "timestamp": "2026-03-23T16:33:30.420661493" }, "sarscov2 - hybrid - illumina_nanopore": { "content": [ @@ -23,13 +31,18 @@ [ ], - null + [ + + ], + [ + + ] ], "meta": { "nf-test": "0.9.3", - "nextflow": "25.10.3" + "nextflow": "25.10.4" }, - "timestamp": "2026-03-20T16:53:55.13953957" + "timestamp": "2026-03-23T16:39:05.105234025" }, "sarscov2 - illumina - single_end": { "content": [ @@ -86,35 +99,46 @@ [ ], - null + [ + + ], + [ + + ], + [ + + ] ], "meta": { "nf-test": "0.9.3", "nextflow": "25.10.3" }, - "timestamp": "2026-03-20T16:53:21.827256455" + "timestamp": "2026-03-23T16:33:21.946468822" + }, + "sarscov2 - hybrid - illumina_pacbio": { + "content": [ + [ + + ], + [ + + ], + [ + + ], + [ + + ] + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.4" + }, + "timestamp": "2026-03-23T16:38:57.588039397" }, "sarscov2 - illumina - paired_end - stub": { "content": [ { - "0": [ - - ], - "1": [ - - ], - "2": [ - - ], - "3": [ - - ], - "4": [ - - ], - "5": [ - - ], "config": [ ], @@ -123,6 +147,9 @@ ], "flye_assembly": [ + ], + "log": [ + ], "scaffolds": [ @@ -137,9 +164,9 @@ ], "meta": { "nf-test": "0.9.3", - "nextflow": "25.10.3" + "nextflow": "25.10.4" }, - "timestamp": "2026-03-20T16:54:17.392502942" + "timestamp": "2026-03-23T16:39:42.421134682" }, "sarscov2 - illumina - with_reference": { "content": [ @@ -149,12 +176,20 @@ [ ], - null + [ + + ], + [ + + ], + [ + + ] ], "meta": { "nf-test": "0.9.3", - "nextflow": "25.10.3" + "nextflow": "25.10.4" }, - "timestamp": "2026-03-20T16:54:09.994377364" + "timestamp": "2026-03-23T16:39:33.587772541" } } \ No newline at end of file From 5c5630906da5d3e67ad97bdad3c4afc38065bf01 Mon Sep 17 00:00:00 2001 From: LiaOb21 Date: Tue, 24 Mar 2026 23:11:28 +0000 Subject: [PATCH 3/5] probably close --- .vscode/extensions.json | 3 - .vscode/settings.json | 8 - modules/nf-core/masurca/main.nf | 46 +-- modules/nf-core/masurca/tests/main.nf.test | 345 +++++++++--------- .../nf-core/masurca/tests/main.nf.test.snap | 337 ++++++++++++++--- 5 files changed, 475 insertions(+), 264 deletions(-) delete mode 100644 .vscode/extensions.json delete mode 100644 .vscode/settings.json diff --git a/.vscode/extensions.json b/.vscode/extensions.json deleted file mode 100644 index 110de1157123..000000000000 --- a/.vscode/extensions.json +++ /dev/null @@ -1,3 +0,0 @@ -{ - "recommendations": ["nextflow.nextflow", "nf-core.nf-core-extensionpack"] -} diff --git a/.vscode/settings.json b/.vscode/settings.json deleted file mode 100644 index a2676d66e969..000000000000 --- a/.vscode/settings.json +++ /dev/null @@ -1,8 +0,0 @@ -{ - "nextflow.formatting.harshilAlignment": true, - "yaml.schemas": { - "./modules/meta-schema.json": ["modules/nf-core/**/meta.yml"], - "./modules/environment-schema.json": ["modules/nf-core/**/environment.yml"], - "./subworkflows/yaml-schema.json": ["subworkflows/nf-core/**/meta.yml"] - } -} diff --git a/modules/nf-core/masurca/main.nf b/modules/nf-core/masurca/main.nf index d3887a32fb7c..7c1fa4245abd 100644 --- a/modules/nf-core/masurca/main.nf +++ b/modules/nf-core/masurca/main.nf @@ -34,9 +34,7 @@ process MASURCA { output: tuple val(meta), path("assemble.sh") , emit: script - tuple val(meta), path("CA*/final.genome.scf.fasta"), optional: true, emit: scaffolds - tuple val(meta), path("CA*/final.genome.ctg.fasta"), optional: true, emit: contigs - tuple val(meta), path("flye/assembly.fasta") , optional: true, emit: flye_assembly + tuple val(meta), path("CA*/primary.genome.scf.fasta") , emit: scaffolds tuple val(meta), path("*_masurca_config.txt") , emit: config tuple val(meta), path("*-masurca.log") , emit: log tuple val("${task.process}"), val('masurca'), eval("masurca --version | sed 's/version //g'"), topic: versions, emit: versions_masurca @@ -49,12 +47,12 @@ process MASURCA { def prefix = task.ext.prefix ?: "${meta.id}" //get input reads with absolute paths - illumina are mandatory, jump/pacbio/nanopore are optional - def illumina_reads = [illumina].flatten().join(' ') - def jump_reads = jump ? "\$(readlink -f ${jump[0]}) \$(readlink -f ${jump[1]})" : "" - def pacbio_file = pacbio ? "\$(readlink -f ${pacbio})" : "" - def nanopore_file = nanopore ? "\$(readlink -f ${nanopore})" : "" - def other_reads_file = other_reads ? "\$(readlink -f ${other_reads})" : "" - def reference_genome_file = reference_genome ? "\$(readlink -f ${reference_genome})" : "" + def illumina_reads = illumina.collect { it.toRealPath() }.join(' ') + def jump_reads = jump ? jump.collect { it.toRealPath() }.join(' ') : "" + def pacbio_file = pacbio ? pacbio.toRealPath() : "" + def nanopore_file = nanopore ? nanopore.toRealPath() : "" + def other_reads_file = other_reads ? other_reads.toRealPath() : "" + def reference_genome_file = reference_genome ? reference_genome.toRealPath() : "" // Configuration parameters with defaults from task.ext def extend_jump_reads = task.ext.extend_jump_reads != null ? task.ext.extend_jump_reads : 0 @@ -64,10 +62,8 @@ process MASURCA { def mega_reads_one_pass = task.ext.mega_reads_one_pass != null ? task.ext.mega_reads_one_pass : 0 def limit_jump_coverage = task.ext.limit_jump_coverage ?: 300 def ca_parameters = task.ext.ca_parameters ?: 'cgwErrorRate=0.15' - def close_gaps = task.ext.close_gaps != null ? task.ext.close_gaps : 1 + def close_gaps = task.ext.close_gaps != null ? task.ext.close_gaps : 0 def jf_size = task.ext.jf_size ?: 200000000 - def soap_assembly = task.ext.soap_assembly != null ? task.ext.soap_assembly : 0 - def flye_assembly = task.ext.flye_assembly != null ? task.ext.flye_assembly : 0 """ echo "DATA" > ${prefix}_masurca_config.txt echo "#Illumina paired end reads supplied as " >> ${prefix}_masurca_config.txt @@ -86,7 +82,7 @@ process MASURCA { if [ -n "${pacbio_file}" ] && [ -n "${nanopore_file}" ]; then echo "#if you have both PacBio and Nanopore, supply both as NANOPORE type" >> ${prefix}_masurca_config.txt cat ${pacbio_file} ${nanopore_file} > ${prefix}_long_reads.fastq.gz - echo "NANOPORE=\$(readlink -f ${prefix}_long_reads.fastq.gz)" >> ${prefix}_masurca_config.txt + echo "NANOPORE= ${prefix}_long_reads.fastq.gz" >> ${prefix}_masurca_config.txt elif [ -n "${pacbio_file}" ]; then echo "#PacBio/CCS reads must be in a single fasta or fastq file with absolute path" >> ${prefix}_masurca_config.txt echo "PACBIO=${pacbio_file}" >> ${prefix}_masurca_config.txt @@ -137,26 +133,18 @@ process MASURCA { echo "JF_SIZE = ${jf_size}" >> ${prefix}_masurca_config.txt echo "#ILLUMINA ONLY. Set this to 1 to use SOAPdenovo contigging/scaffolding module." >> ${prefix}_masurca_config.txt echo "#Assembly will be worse but will run faster. Useful for very large (>=8Gbp) genomes from Illumina-only data" >> ${prefix}_masurca_config.txt - echo "SOAP_ASSEMBLY=${soap_assembly}" >> ${prefix}_masurca_config.txt + echo "SOAP_ASSEMBLY=0" >> ${prefix}_masurca_config.txt echo "#If you are doing Hybrid Illumina paired end + Nanopore/PacBio assembly ONLY (no Illumina mate pairs or OTHER frg files)." >> ${prefix}_masurca_config.txt echo "#Set this to 1 to use Flye assembler for final assembly of corrected mega-reads." >> ${prefix}_masurca_config.txt echo "#A lot faster than CABOG, AND QUALITY IS THE SAME OR BETTER." >> ${prefix}_masurca_config.txt echo "#Works well even when MEGA_READS_ONE_PASS is set to 1." >> ${prefix}_masurca_config.txt echo "#DO NOT use if you have less than 15x coverage by long reads." >> ${prefix}_masurca_config.txt - echo "FLYE_ASSEMBLY=${flye_assembly}" >> ${prefix}_masurca_config.txt + echo "FLYE_ASSEMBLY=0" >> ${prefix}_masurca_config.txt echo "END" >> ${prefix}_masurca_config.txt # Generate assembly script masurca ${prefix}_masurca_config.txt - # Create output directory and move files - mkdir -p ${prefix} - mv assemble.sh ${prefix}/ - mv ${prefix}_masurca_config.txt ${prefix}/ - chmod +x ${prefix}/assemble.sh - - # Run the assembly - cd ${prefix} ./assemble.sh > ${prefix}-masurca.log 2>&1 """ @@ -164,12 +152,10 @@ process MASURCA { def args = task.ext.args ?: '' def prefix = task.ext.prefix ?: "${meta.id}" """ - mkdir -p ${prefix}/CA - mkdir -p ${prefix}/flye - touch ${prefix}/assemble.sh - touch ${prefix}/${prefix}_masurca_config.txt - touch ${prefix}/CA/final.genome.scf.fasta - touch ${prefix}/CA/final.genome.ctg.fasta - touch ${prefix}/flye/assembly.fasta + mkdir -p CA + touch assemble.sh + touch ${prefix}_masurca_config.txt + touch CA/primary.genome.scf.fasta + touch ${prefix}-masurca.log """ } diff --git a/modules/nf-core/masurca/tests/main.nf.test b/modules/nf-core/masurca/tests/main.nf.test index a892883025c0..1d6a1afe517d 100644 --- a/modules/nf-core/masurca/tests/main.nf.test +++ b/modules/nf-core/masurca/tests/main.nf.test @@ -8,14 +8,14 @@ nextflow_process { tag "modules_nfcore" tag "masurca" - test("sarscov2 - illumina - single_end") { + test("homo_sapiens - illumina - single_end") { when { process { """ input[0] = [ - [ id:'test', single_end:true ], - file(params.modules_testdata_base_path + "genomics/sarscov2/illumina/fastq/test_interleaved.fastq.gz", checkIfExists: true) + [ id:'test'], + [file(params.modules_testdata_base_path + "genomics/homo_sapiens/illumina/fastq/test_1.fastq.gz", checkIfExists: true)], [], // no jump reads [], // no pacbio [], // no nanopore @@ -34,7 +34,6 @@ nextflow_process { assertAll( { assert process.success }, { assert snapshot( - process.out.contigs, process.out.scaffolds, process.out.script, process.out.log, @@ -45,16 +44,16 @@ nextflow_process { } - test("sarscov2 - illumina - paired_end") { + test("homo_sapiens - illumina - paired_end") { when { process { """ input[0] = [ - [ id:'test', single_end:false ], + [ id:'test'], [ - file(params.modules_testdata_base_path + "genomics/sarscov2/illumina/fastq/test_1.fastq.gz", checkIfExists: true), - file(params.modules_testdata_base_path + "genomics/sarscov2/illumina/fastq/test_2.fastq.gz", checkIfExists: true) + file(params.modules_testdata_base_path + "genomics/homo_sapiens/illumina/fastq/test_1.fastq.gz", checkIfExists: true), + file(params.modules_testdata_base_path + "genomics/homo_sapiens/illumina/fastq/test_2.fastq.gz", checkIfExists: true) ], [], // no jump reads [], // no pacbio @@ -74,7 +73,6 @@ nextflow_process { assertAll( { assert process.success }, { assert snapshot( - process.out.contigs, process.out.scaffolds, process.out.script, process.out.log, @@ -85,181 +83,142 @@ nextflow_process { } -// test("sarscov2 - illumina - paired_end - with_jump") { -// -// when { -// process { -// """ -// input[0] = [ -// [ id:'test', single_end:false ], -// [ -// file(params.modules_testdata_base_path + "genomics/sarscov2/illumina/fastq/test_1.fastq.gz", checkIfExists: true), -// file(params.modules_testdata_base_path + "genomics/sarscov2/illumina/fastq/test_2.fastq.gz", checkIfExists: true) -// ], -// [ -// file(params.modules_testdata_base_path + "genomics/sarscov2/illumina/fastq/test2_1.fastq.gz", checkIfExists: true), -// file(params.modules_testdata_base_path + "genomics/sarscov2/illumina/fastq/test2_2.fastq.gz", checkIfExists: true) -// ], -// [], // no pacbio -// [], // no nanopore -// [], // no other reads -// [] // no reference genome -// ] -// input[1] = 500 // fragment_mean -// input[2] = 50 // fragment_stdev -// input[3] = 3600 // jump_mean -// input[4] = 200 // jump_stdev -// """ -// } -// } -// -// then { -// assertAll( -// { assert process.success }, -// { assert snapshot( -// process.out.contigs, -// process.out.scaffolds, -// process.out.script, -// process.out.log, -// process.out.versions_masurca -// ).match() } -// ) -// } -// -// } -// -// test("sarscov2 - hybrid - illumina_pacbio") { -// -// when { -// process { -// """ -// input[0] = [ -// [ id:'test', single_end:false ], -// [ -// file(params.modules_testdata_base_path + "genomics/homo_sapiens/illumina/fastq/test_1.fastq.gz", checkIfExists: true), -// file(params.modules_testdata_base_path + "genomics/homo_sapiens/illumina/fastq/test_2.fastq.gz", checkIfExists: true) -// ], -// [], // no jump reads -// file(params.modules_testdata_base_path + "genomics/homo_sapiens/pacbio/fastq/test_hifi.fastq.gz", checkIfExists: true), -// [], // no nanopore -// [], // no other reads -// [] // no reference genome -// ] -// input[1] = 500 // fragment_mean -// input[2] = 50 // fragment_stdev -// input[3] = 0 // jump_mean (not used) -// input[4] = 0 // jump_stdev (not used) -// """ -// } -// } -// -// then { -// assertAll( -// { assert process.success }, -// { assert snapshot( -// process.out.flye_assembly, -// process.out.script, -// process.out.log, -// process.out.versions_masurca -// ).match() } -// ) -// } -// -// } -// -// test("sarscov2 - hybrid - illumina_nanopore") { -// -// when { -// process { -// """ -// input[0] = [ -// [ id:'test', single_end:false ], -// [ -// file(params.modules_testdata_base_path + "genomics/sarscov2/illumina/fastq/test_1.fastq.gz", checkIfExists: true), -// file(params.modules_testdata_base_path + "genomics/sarscov2/illumina/fastq/test_2.fastq.gz", checkIfExists: true) -// ], -// [], // no jump reads -// [], // no pacbio -// file(params.modules_testdata_base_path + "genomics/sarscov2/nanopore/fastq/test.fastq.gz", checkIfExists: true), -// [], // no other reads -// [] // no reference genome -// ] -// input[1] = 500 // fragment_mean -// input[2] = 50 // fragment_stdev -// input[3] = 0 // jump_mean (not used) -// input[4] = 0 // jump_stdev (not used) -// """ -// } -// } -// -// then { -// assertAll( -// { assert process.success }, -// { assert snapshot( -// process.out.flye_assembly, -// process.out.script, -// process.out.log, -// process.out.versions_masurca -// ).match() } -// ) -// } -// -// } -// -// test("homo_sapiens - hybrid - illumina_pacbio_nanopore") { -// -// when { -// process { -// """ -// input[0] = [ -// [ id:'test', single_end:false ], -// [ -// file(params.modules_testdata_base_path + "genomics/homo_sapiens/illumina/fastq/test_1.fastq.gz", checkIfExists: true), -// file(params.modules_testdata_base_path + "genomics/homo_sapiens/illumina/fastq/test_2.fastq.gz", checkIfExists: true) -// ], -// [], // no jump reads -// file(params.modules_testdata_base_path + "genomics/homo_sapiens/illumina/fastq/test_hifi.fastq.gz", checkIfExists: true), -// file(params.modules_testdata_base_path + "genomics/homo_sapiens/illumina/fastq/test_hifi.fastq.gz", checkIfExists: true), -// [], // no other reads -// [] // no reference genome -// ] -// input[1] = 500 // fragment_mean -// input[2] = 50 // fragment_stdev -// input[3] = 0 // jump_mean (not used) -// input[4] = 0 // jump_stdev (not used) -// """ -// } -// } -// -// then { -// assertAll( -// { assert snapshot( -// process.out.flye_assembly, -// process.out.script, -// process.out.log, -// process.out.versions_masurca -// ).match() } -// ) -// } -// -// } - - test("sarscov2 - illumina - with_reference") { + test("homo_sapiens - illumina - paired_end - with_jump") { when { process { """ input[0] = [ - [ id:'test', single_end:false ], + [ id:'test'], [ file(params.modules_testdata_base_path + "genomics/homo_sapiens/illumina/fastq/test_1.fastq.gz", checkIfExists: true), file(params.modules_testdata_base_path + "genomics/homo_sapiens/illumina/fastq/test_2.fastq.gz", checkIfExists: true) ], - [], // no jump reads + [ + file(params.modules_testdata_base_path + "genomics/homo_sapiens/illumina/fastq/test2_germline_1.fq.gz", checkIfExists: true), + file(params.modules_testdata_base_path + "genomics/homo_sapiens/illumina/fastq/test2_germline_2.fq.gz", checkIfExists: true) + ], [], // no pacbio [], // no nanopore [], // no other reads - file(params.modules_testdata_base_path + "genomics/homo_sapiens/genome/genome.fasta.gz", checkIfExists: true) + [] // no reference genome + ] + input[1] = 500 // fragment_mean + input[2] = 50 // fragment_stdev + input[3] = 3600 // jump_mean + input[4] = 200 // jump_stdev + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot( + process.out.scaffolds, + process.out.script, + process.out.log, + process.out.versions_masurca + ).match() } + ) + } + + } + + test("genomeassembler - hybrid - illumina_pacbio") { + + when { + process { + """ + input[0] = [ + [ id:'test'], + [ + file("https://raw.githubusercontent.com/nf-core/test-datasets/genomeassembler/A_thaliana_Col-0_2mb/SR_Col-0_test_data_1.fastq.gz", checkIfExists: true), + file("https://raw.githubusercontent.com/nf-core/test-datasets/genomeassembler/A_thaliana_Col-0_2mb/SR_Col-0_test_data_2.fastq.gz", checkIfExists: true) + ], + [], // no jump reads + [file("https://raw.githubusercontent.com/nf-core/test-datasets/genomeassembler/A_thaliana_Col-0_2mb/HiFi-Col-0_test_data.fastq.gz", checkIfExists: true)], + [], // no nanopore + [], // no other reads + [] // no reference genome + ] + input[1] = 500 // fragment_mean + input[2] = 50 // fragment_stdev + input[3] = 0 // jump_mean (not used) + input[4] = 0 // jump_stdev (not used) + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot( + process.out.scaffolds, + process.out.script, + process.out.log, + process.out.versions_masurca + ).match() } + ) + } + + } + + test("genomeassembler - hybrid - illumina_nanopore") { + + when { + process { + """ + input[0] = [ + [ id:'test'], + [ + file("https://raw.githubusercontent.com/nf-core/test-datasets/genomeassembler/A_thaliana_Col-0_2mb/SR_Col-0_test_data_1.fastq.gz", checkIfExists: true), + file("https://raw.githubusercontent.com/nf-core/test-datasets/genomeassembler/A_thaliana_Col-0_2mb/SR_Col-0_test_data_2.fastq.gz", checkIfExists: true) + ], + [], // no jump reads + [], // no pacbio + [file("https://raw.githubusercontent.com/nf-core/test-datasets/genomeassembler/A_thaliana_Col-0_2mb/ONT-Col-0_test_data.fastq.gz", checkIfExists: true)], + [], // no other reads + [] // no reference genome + ] + input[1] = 500 // fragment_mean + input[2] = 50 // fragment_stdev + input[3] = 0 // jump_mean (not used) + input[4] = 0 // jump_stdev (not used) + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot( + process.out.scaffolds, + process.out.script, + process.out.log, + process.out.versions_masurca + ).match() } + ) + } + + } + + test("genomeassembler - hybrid - illumina_pacbio_nanopore") { + + when { + process { + """ + input[0] = [ + [ id:'test'], + [ + file("https://raw.githubusercontent.com/nf-core/test-datasets/genomeassembler/A_thaliana_Col-0_2mb/SR_Col-0_test_data_1.fastq.gz", checkIfExists: true), + file("https://raw.githubusercontent.com/nf-core/test-datasets/genomeassembler/A_thaliana_Col-0_2mb/SR_Col-0_test_data_2.fastq.gz", checkIfExists: true) + ], + [], // no jump reads + [file("https://raw.githubusercontent.com/nf-core/test-datasets/genomeassembler/A_thaliana_Col-0_2mb/HiFi-Col-0_test_data.fastq.gz", checkIfExists: true)], + [file("https://raw.githubusercontent.com/nf-core/test-datasets/genomeassembler/A_thaliana_Col-0_2mb/ONT-Col-0_test_data.fastq.gz", checkIfExists: true)], + [], // no other reads + [] // no reference genome ] input[1] = 500 // fragment_mean input[2] = 50 // fragment_stdev @@ -272,7 +231,6 @@ nextflow_process { then { assertAll( { assert snapshot( - process.out.contigs, process.out.scaffolds, process.out.script, process.out.log, @@ -283,6 +241,45 @@ nextflow_process { } +// test("sarscov2 - illumina - with_reference") { +// +// when { +// process { +// """ +// input[0] = [ +// [ id:'test'], +// [ +// file(params.modules_testdata_base_path + "genomics/homo_sapiens/illumina/fastq/test_1.fastq.gz", checkIfExists: true), +// file(params.modules_testdata_base_path + "genomics/homo_sapiens/illumina/fastq/test_2.fastq.gz", checkIfExists: true) +// ], +// [], // no jump reads +// [], // no pacbio +// [], // no nanopore +// [], // no other reads +// [file(params.modules_testdata_base_path + "genomics/homo_sapiens/genome/genome.fasta.gz", checkIfExists: true)] +// ] +// input[1] = 500 // fragment_mean +// input[2] = 50 // fragment_stdev +// input[3] = 0 // jump_mean (not used) +// input[4] = 0 // jump_stdev (not used) +// """ +// } +// } + +// then { +// assertAll( +// { assert snapshot( +// process.out.contigs, +// process.out.scaffolds, +// process.out.script, +// process.out.log, +// process.out.versions_masurca +// ).match() } +// ) +// } +// +// } + test("sarscov2 - illumina - paired_end - stub") { options "-stub" @@ -291,7 +288,7 @@ nextflow_process { process { """ input[0] = [ - [ id:'test', single_end:false ], + [ id:'test'], [ file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true), file(params.test_data['sarscov2']['illumina']['test_2_fastq_gz'], checkIfExists: true) diff --git a/modules/nf-core/masurca/tests/main.nf.test.snap b/modules/nf-core/masurca/tests/main.nf.test.snap index ad490f7042ca..c9dc908555b6 100644 --- a/modules/nf-core/masurca/tests/main.nf.test.snap +++ b/modules/nf-core/masurca/tests/main.nf.test.snap @@ -17,11 +17,226 @@ ] ], + "timestamp": "2026-03-23T16:33:30.420661493", "meta": { "nf-test": "0.9.3", "nextflow": "25.10.3" - }, - "timestamp": "2026-03-23T16:33:30.420661493" + } + }, + "homo_sapiens - illumina - single_end": { + "content": [ + [ + + ], + [ + + ], + [ + + ], + [ + + ] + ], + "timestamp": "2026-03-24T12:00:30.630209268", + "meta": { + "nf-test": "0.9.4", + "nextflow": "25.10.4" + } + }, + "genomeassembler - hybrid - illumina_pacbio_nanopore": { + "content": [ + [ + [ + { + "id": "test" + }, + "primary.genome.scf.fasta:md5,2efaf8caf4ee23f5aefdd52c872e72de" + ] + ], + [ + [ + { + "id": "test" + }, + "assemble.sh:md5,86b0e10f065a071e96b0b51d4ea1e0ed" + ] + ], + [ + [ + { + "id": "test" + }, + "test-masurca.log:md5,2ddf4082e9f6c15411bd71b789e08d03" + ] + ], + [ + [ + "MASURCA", + "masurca", + "4.1.4" + ] + ] + ], + "timestamp": "2026-03-24T23:06:29.815457652", + "meta": { + "nf-test": "0.9.4", + "nextflow": "25.10.4" + } + }, + "sarscov2 - illumina - paired_end": { + "content": [ + [ + + ], + [ + + ], + [ + + ], + [ + + ], + [ + + ] + ], + "timestamp": "2026-03-23T16:33:21.946468822", + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.3" + } + }, + "sarscov2 - illumina - paired_end - stub": { + "content": [ + { + "config": [ + [ + { + "id": "test" + }, + "test_masurca_config.txt:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "log": [ + [ + { + "id": "test" + }, + "test-masurca.log:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "scaffolds": [ + [ + { + "id": "test" + }, + "primary.genome.scf.fasta:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "script": [ + [ + { + "id": "test" + }, + "assemble.sh:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "versions_masurca": [ + [ + "MASURCA", + "masurca", + "4.1.4" + ] + ] + } + ], + "timestamp": "2026-03-24T21:44:20.998579705", + "meta": { + "nf-test": "0.9.4", + "nextflow": "25.10.4" + } + }, + "genomeassembler - hybrid - illumina_pacbio": { + "content": [ + [ + [ + { + "id": "test" + }, + "primary.genome.scf.fasta:md5,410641699039df2186ead1fd773e8ef6" + ] + ], + [ + [ + { + "id": "test" + }, + "assemble.sh:md5,d0b63ee04399206b0119e0923d98a45b" + ] + ], + [ + [ + { + "id": "test" + }, + "test-masurca.log:md5,e9e2395995ae101377dda494723e0fb7" + ] + ], + [ + [ + "MASURCA", + "masurca", + "4.1.4" + ] + ] + ], + "timestamp": "2026-03-24T23:01:43.498317838", + "meta": { + "nf-test": "0.9.4", + "nextflow": "25.10.4" + } + }, + "homo_sapiens - illumina - paired_end": { + "content": [ + [ + [ + { + "id": "test" + }, + "primary.genome.scf.fasta:md5,e7578c7d3cbbc2f521cfb8ca723080ab" + ] + ], + [ + [ + { + "id": "test" + }, + "assemble.sh:md5,d13e210a74a167db3def9400dbce80fc" + ] + ], + [ + [ + { + "id": "test" + }, + "test-masurca.log:md5,cd27376502e314e826200e1de0a7d79e" + ] + ], + [ + [ + "MASURCA", + "masurca", + "4.1.4" + ] + ] + ], + "timestamp": "2026-03-24T22:51:36.485831994", + "meta": { + "nf-test": "0.9.4", + "nextflow": "25.10.4" + } }, "sarscov2 - hybrid - illumina_nanopore": { "content": [ @@ -38,11 +253,11 @@ ] ], + "timestamp": "2026-03-23T16:39:05.105234025", "meta": { "nf-test": "0.9.3", "nextflow": "25.10.4" - }, - "timestamp": "2026-03-23T16:39:05.105234025" + } }, "sarscov2 - illumina - single_end": { "content": [ @@ -85,13 +300,13 @@ ] } ], + "timestamp": "2026-03-20T16:53:14.230197251", "meta": { "nf-test": "0.9.3", "nextflow": "25.10.3" - }, - "timestamp": "2026-03-20T16:53:14.230197251" + } }, - "sarscov2 - illumina - paired_end": { + "sarscov2 - hybrid - illumina_pacbio": { "content": [ [ @@ -104,69 +319,93 @@ ], [ - ], - [ - ] ], + "timestamp": "2026-03-23T16:38:57.588039397", "meta": { "nf-test": "0.9.3", - "nextflow": "25.10.3" - }, - "timestamp": "2026-03-23T16:33:21.946468822" + "nextflow": "25.10.4" + } }, - "sarscov2 - hybrid - illumina_pacbio": { + "homo_sapiens - illumina - paired_end - with_jump": { "content": [ [ - + [ + { + "id": "test" + }, + "primary.genome.scf.fasta:md5,3beee4f499100edd3f4f02fab8edc1a5" + ] ], [ - + [ + { + "id": "test" + }, + "assemble.sh:md5,c9d61b83d203d2882300164205b79c8a" + ] ], [ - + [ + { + "id": "test" + }, + "test-masurca.log:md5,0b390f436b223beabac17b283b075fc5" + ] ], [ - + [ + "MASURCA", + "masurca", + "4.1.4" + ] ] ], + "timestamp": "2026-03-24T22:59:02.078300748", "meta": { - "nf-test": "0.9.3", + "nf-test": "0.9.4", "nextflow": "25.10.4" - }, - "timestamp": "2026-03-23T16:38:57.588039397" + } }, - "sarscov2 - illumina - paired_end - stub": { + "genomeassembler - hybrid - illumina_nanopore": { "content": [ - { - "config": [ - - ], - "contigs": [ - - ], - "flye_assembly": [ - - ], - "log": [ - - ], - "scaffolds": [ - - ], - "script": [ - - ], - "versions_masurca": [ - + [ + [ + { + "id": "test" + }, + "primary.genome.scf.fasta:md5,3d495a73c465ed3b339c746d310d36f4" ] - } + ], + [ + [ + { + "id": "test" + }, + "assemble.sh:md5,9342ce732b095ea5f422070f5d9b601d" + ] + ], + [ + [ + { + "id": "test" + }, + "test-masurca.log:md5,c3ec9fabae6523d9074b40cd9713f959" + ] + ], + [ + [ + "MASURCA", + "masurca", + "4.1.4" + ] + ] ], + "timestamp": "2026-03-24T23:03:29.504211443", "meta": { - "nf-test": "0.9.3", + "nf-test": "0.9.4", "nextflow": "25.10.4" - }, - "timestamp": "2026-03-23T16:39:42.421134682" + } }, "sarscov2 - illumina - with_reference": { "content": [ @@ -186,10 +425,10 @@ ] ], + "timestamp": "2026-03-23T16:39:33.587772541", "meta": { "nf-test": "0.9.3", "nextflow": "25.10.4" - }, - "timestamp": "2026-03-23T16:39:33.587772541" + } } } \ No newline at end of file From c8f84d0ddba4678b35824d7864a0d92ec311c1b8 Mon Sep 17 00:00:00 2001 From: LiaOb21 Date: Wed, 25 Mar 2026 18:25:35 +0000 Subject: [PATCH 4/5] still work in progress --- modules/nf-core/masurca/main.nf | 25 +--- modules/nf-core/masurca/meta.yml | 11 +- modules/nf-core/masurca/tests/main.nf.test | 75 ++++++----- .../nf-core/masurca/tests/main.nf.test.snap | 124 ++++++++++-------- 4 files changed, 116 insertions(+), 119 deletions(-) diff --git a/modules/nf-core/masurca/main.nf b/modules/nf-core/masurca/main.nf index 7c1fa4245abd..f210c0b230ee 100644 --- a/modules/nf-core/masurca/main.nf +++ b/modules/nf-core/masurca/main.nf @@ -1,28 +1,11 @@ -// TODO nf-core: If in doubt look at other nf-core/modules to see how we are doing things! :) -// https://github.com/nf-core/modules/tree/master/modules/nf-core/ -// You can also ask for help via your pull request or on the #modules channel on the nf-core Slack workspace: -// https://nf-co.re/join -// TODO nf-core: A module file SHOULD only define input and output files as command-line parameters. -// All other parameters MUST be provided using the "task.ext" directive, see here: -// https://www.nextflow.io/docs/latest/process.html#ext -// where "task.ext" is a string. -// Any parameters that need to be evaluated in the context of a particular sample -// e.g. single-end/paired-end data MUST also be defined and evaluated appropriately. -// TODO nf-core: Software that can be piped together SHOULD be added to separate module files -// unless there is a run-time, storage advantage in implementing in this way -// e.g. it's ok to have a single module for bwa to output BAM instead of SAM: -// bwa mem | samtools view -B -T ref.fasta -// TODO nf-core: Optional inputs are not currently supported by Nextflow. However, using an empty -// list (`[]`) instead of a file can be used to work around this issue. - process MASURCA { tag "$meta.id" label 'process_high' conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/cf/cf6402ed20c3b089ab88cd8884ddace90693501453a515f9188ae681e8ca8556/data': - 'community.wave.seqera.io/library/masurca:4.1.4--d05ef74c4881d55c' }" + 'oras://community.wave.seqera.io/library/coreutils_file_masurca_mummer_perl:73ce913377915362': + 'community.wave.seqera.io/library/coreutils_file_masurca_mummer_perl:93f95b0aad1db22b' }" input: tuple val(meta), path(illumina), path(jump), path(pacbio), path(nanopore), path(other_reads), path(reference_genome) @@ -47,8 +30,8 @@ process MASURCA { def prefix = task.ext.prefix ?: "${meta.id}" //get input reads with absolute paths - illumina are mandatory, jump/pacbio/nanopore are optional - def illumina_reads = illumina.collect { it.toRealPath() }.join(' ') - def jump_reads = jump ? jump.collect { it.toRealPath() }.join(' ') : "" + def illumina_reads = [illumina].flatten().collect { it.toRealPath() }.join(' ') + def jump_reads = jump ? [jump].flatten().collect { it.toRealPath() }.join(' ') : "" def pacbio_file = pacbio ? pacbio.toRealPath() : "" def nanopore_file = nanopore ? nanopore.toRealPath() : "" def other_reads_file = other_reads ? other_reads.toRealPath() : "" diff --git a/modules/nf-core/masurca/meta.yml b/modules/nf-core/masurca/meta.yml index 45a72409d119..69496bb5d4e6 100644 --- a/modules/nf-core/masurca/meta.yml +++ b/modules/nf-core/masurca/meta.yml @@ -1,19 +1,18 @@ # yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/meta-schema.json -# # TODO nf-core: Add a description of the module and list keywords name: "masurca" -description: write your description here +description: The MaSuRCA (Maryland Super Read Cabog Assembler) genome assembly and analysis toolkit keywords: - - sort - - example + - denovo + - assembly + - debruijn - genomics tools: - ## TODO nf-core: Add a description and other details for the software below - "masurca": description: "MaSuRCA (Maryland Super-Read Celera Assembler) genome assembly software." homepage: "https://github.com/alekseyzimin/masurca/blob/v4.1.4/README.md" documentation: "https://github.com/alekseyzimin/masurca/blob/v4.1.4/README.md" tool_dev_url: "https://github.com/alekseyzimin/masurca" - doi: "" + doi: "10.1101/gr.213405.116" licence: ["GPL v3"] identifier: biotools:masurca diff --git a/modules/nf-core/masurca/tests/main.nf.test b/modules/nf-core/masurca/tests/main.nf.test index 1d6a1afe517d..1f9d8f181c7e 100644 --- a/modules/nf-core/masurca/tests/main.nf.test +++ b/modules/nf-core/masurca/tests/main.nf.test @@ -241,44 +241,43 @@ nextflow_process { } -// test("sarscov2 - illumina - with_reference") { -// -// when { -// process { -// """ -// input[0] = [ -// [ id:'test'], -// [ -// file(params.modules_testdata_base_path + "genomics/homo_sapiens/illumina/fastq/test_1.fastq.gz", checkIfExists: true), -// file(params.modules_testdata_base_path + "genomics/homo_sapiens/illumina/fastq/test_2.fastq.gz", checkIfExists: true) -// ], -// [], // no jump reads -// [], // no pacbio -// [], // no nanopore -// [], // no other reads -// [file(params.modules_testdata_base_path + "genomics/homo_sapiens/genome/genome.fasta.gz", checkIfExists: true)] -// ] -// input[1] = 500 // fragment_mean -// input[2] = 50 // fragment_stdev -// input[3] = 0 // jump_mean (not used) -// input[4] = 0 // jump_stdev (not used) -// """ -// } -// } - -// then { -// assertAll( -// { assert snapshot( -// process.out.contigs, -// process.out.scaffolds, -// process.out.script, -// process.out.log, -// process.out.versions_masurca -// ).match() } -// ) -// } -// -// } + test("sarscov2 - illumina - with_reference") { + + when { + process { + """ + input[0] = [ + [ id:'test'], + [ + file(params.modules_testdata_base_path + "genomics/homo_sapiens/illumina/fastq/test_1.fastq.gz", checkIfExists: true), + file(params.modules_testdata_base_path + "genomics/homo_sapiens/illumina/fastq/test_2.fastq.gz", checkIfExists: true) + ], + [], // no jump reads + [], // no pacbio + [], // no nanopore + [], // no other reads + [file(params.modules_testdata_base_path + "genomics/homo_sapiens/genome/genome.fasta.gz", checkIfExists: true)] + ] + input[1] = 500 // fragment_mean + input[2] = 50 // fragment_stdev + input[3] = 0 // jump_mean (not used) + input[4] = 0 // jump_stdev (not used) + """ + } + } + + then { + assertAll( + { assert snapshot( + process.out.scaffolds, + process.out.script, + process.out.log, + process.out.versions_masurca + ).match() } + ) + } + + } test("sarscov2 - illumina - paired_end - stub") { diff --git a/modules/nf-core/masurca/tests/main.nf.test.snap b/modules/nf-core/masurca/tests/main.nf.test.snap index c9dc908555b6..d02445871f0e 100644 --- a/modules/nf-core/masurca/tests/main.nf.test.snap +++ b/modules/nf-core/masurca/tests/main.nf.test.snap @@ -17,32 +17,51 @@ ] ], - "timestamp": "2026-03-23T16:33:30.420661493", "meta": { "nf-test": "0.9.3", "nextflow": "25.10.3" - } + }, + "timestamp": "2026-03-23T16:33:30.420661493" }, "homo_sapiens - illumina - single_end": { "content": [ [ - + [ + { + "id": "test" + }, + "primary.genome.scf.fasta:md5,d3365e7157fb041cb4006a2d11001762" + ] ], [ - + [ + { + "id": "test" + }, + "assemble.sh:md5,72fb3063dc7370a4b2a95b93234bcf0e" + ] ], [ - + [ + { + "id": "test" + }, + "test-masurca.log:md5,2a5a4548af7550a71e43cc82068f415f" + ] ], [ - + [ + "MASURCA", + "masurca", + "4.1.4" + ] ] ], - "timestamp": "2026-03-24T12:00:30.630209268", "meta": { - "nf-test": "0.9.4", + "nf-test": "0.9.3", "nextflow": "25.10.4" - } + }, + "timestamp": "2026-03-25T17:52:08.347930423" }, "genomeassembler - hybrid - illumina_pacbio_nanopore": { "content": [ @@ -51,7 +70,7 @@ { "id": "test" }, - "primary.genome.scf.fasta:md5,2efaf8caf4ee23f5aefdd52c872e72de" + "primary.genome.scf.fasta:md5,b4e3d34bb6c27bc38a5e5aafc9367bb1" ] ], [ @@ -59,7 +78,7 @@ { "id": "test" }, - "assemble.sh:md5,86b0e10f065a071e96b0b51d4ea1e0ed" + "assemble.sh:md5,1e0437f3c91ef409b9b743cc4dc4c3f5" ] ], [ @@ -67,7 +86,7 @@ { "id": "test" }, - "test-masurca.log:md5,2ddf4082e9f6c15411bd71b789e08d03" + "test-masurca.log:md5,ef2bb59b6dd640565bad9825c540b032" ] ], [ @@ -78,11 +97,11 @@ ] ] ], - "timestamp": "2026-03-24T23:06:29.815457652", "meta": { - "nf-test": "0.9.4", + "nf-test": "0.9.3", "nextflow": "25.10.4" - } + }, + "timestamp": "2026-03-25T18:05:57.515112182" }, "sarscov2 - illumina - paired_end": { "content": [ @@ -102,11 +121,11 @@ ] ], - "timestamp": "2026-03-23T16:33:21.946468822", "meta": { "nf-test": "0.9.3", "nextflow": "25.10.3" - } + }, + "timestamp": "2026-03-23T16:33:21.946468822" }, "sarscov2 - illumina - paired_end - stub": { "content": [ @@ -152,11 +171,11 @@ ] } ], - "timestamp": "2026-03-24T21:44:20.998579705", "meta": { "nf-test": "0.9.4", "nextflow": "25.10.4" - } + }, + "timestamp": "2026-03-24T21:44:20.998579705" }, "genomeassembler - hybrid - illumina_pacbio": { "content": [ @@ -165,7 +184,7 @@ { "id": "test" }, - "primary.genome.scf.fasta:md5,410641699039df2186ead1fd773e8ef6" + "primary.genome.scf.fasta:md5,f15d1bf333065e05cd96d46fe1c45f9f" ] ], [ @@ -173,7 +192,7 @@ { "id": "test" }, - "assemble.sh:md5,d0b63ee04399206b0119e0923d98a45b" + "assemble.sh:md5,dfe0ecc3e08715e5fdca93eed0c8f3bd" ] ], [ @@ -181,7 +200,7 @@ { "id": "test" }, - "test-masurca.log:md5,e9e2395995ae101377dda494723e0fb7" + "test-masurca.log:md5,e2f44ba808dfd6d759b97fb177d02049" ] ], [ @@ -192,11 +211,11 @@ ] ] ], - "timestamp": "2026-03-24T23:01:43.498317838", "meta": { - "nf-test": "0.9.4", + "nf-test": "0.9.3", "nextflow": "25.10.4" - } + }, + "timestamp": "2026-03-25T18:01:40.129969288" }, "homo_sapiens - illumina - paired_end": { "content": [ @@ -205,7 +224,7 @@ { "id": "test" }, - "primary.genome.scf.fasta:md5,e7578c7d3cbbc2f521cfb8ca723080ab" + "primary.genome.scf.fasta:md5,f8e715fa707cf39603cf09b700666d57" ] ], [ @@ -213,7 +232,7 @@ { "id": "test" }, - "assemble.sh:md5,d13e210a74a167db3def9400dbce80fc" + "assemble.sh:md5,c4252fd02352bc365b95ac090c8bda6c" ] ], [ @@ -221,7 +240,7 @@ { "id": "test" }, - "test-masurca.log:md5,cd27376502e314e826200e1de0a7d79e" + "test-masurca.log:md5,6e9580c582a91f814b9870238e9d80ca" ] ], [ @@ -232,11 +251,11 @@ ] ] ], - "timestamp": "2026-03-24T22:51:36.485831994", "meta": { - "nf-test": "0.9.4", + "nf-test": "0.9.3", "nextflow": "25.10.4" - } + }, + "timestamp": "2026-03-25T17:53:33.025513309" }, "sarscov2 - hybrid - illumina_nanopore": { "content": [ @@ -253,11 +272,11 @@ ] ], - "timestamp": "2026-03-23T16:39:05.105234025", "meta": { "nf-test": "0.9.3", "nextflow": "25.10.4" - } + }, + "timestamp": "2026-03-23T16:39:05.105234025" }, "sarscov2 - illumina - single_end": { "content": [ @@ -300,11 +319,11 @@ ] } ], - "timestamp": "2026-03-20T16:53:14.230197251", "meta": { "nf-test": "0.9.3", "nextflow": "25.10.3" - } + }, + "timestamp": "2026-03-20T16:53:14.230197251" }, "sarscov2 - hybrid - illumina_pacbio": { "content": [ @@ -321,11 +340,11 @@ ] ], - "timestamp": "2026-03-23T16:38:57.588039397", "meta": { "nf-test": "0.9.3", "nextflow": "25.10.4" - } + }, + "timestamp": "2026-03-23T16:38:57.588039397" }, "homo_sapiens - illumina - paired_end - with_jump": { "content": [ @@ -334,7 +353,7 @@ { "id": "test" }, - "primary.genome.scf.fasta:md5,3beee4f499100edd3f4f02fab8edc1a5" + "primary.genome.scf.fasta:md5,cae25f87f26c09f972eae5aa1cc65617" ] ], [ @@ -342,7 +361,7 @@ { "id": "test" }, - "assemble.sh:md5,c9d61b83d203d2882300164205b79c8a" + "assemble.sh:md5,a14782d876cbb76684e7599489a84b0b" ] ], [ @@ -350,7 +369,7 @@ { "id": "test" }, - "test-masurca.log:md5,0b390f436b223beabac17b283b075fc5" + "test-masurca.log:md5,7ed0e3502298ef267e4fc61758edb564" ] ], [ @@ -361,11 +380,11 @@ ] ] ], - "timestamp": "2026-03-24T22:59:02.078300748", "meta": { - "nf-test": "0.9.4", + "nf-test": "0.9.3", "nextflow": "25.10.4" - } + }, + "timestamp": "2026-03-25T17:59:29.231599584" }, "genomeassembler - hybrid - illumina_nanopore": { "content": [ @@ -374,7 +393,7 @@ { "id": "test" }, - "primary.genome.scf.fasta:md5,3d495a73c465ed3b339c746d310d36f4" + "primary.genome.scf.fasta:md5,2181552d037a61458d322f732fe814dd" ] ], [ @@ -382,7 +401,7 @@ { "id": "test" }, - "assemble.sh:md5,9342ce732b095ea5f422070f5d9b601d" + "assemble.sh:md5,b8268c6dcb9d7b36b7312967359a73b5" ] ], [ @@ -390,7 +409,7 @@ { "id": "test" }, - "test-masurca.log:md5,c3ec9fabae6523d9074b40cd9713f959" + "test-masurca.log:md5,635979d40fc9bc1e159a10185637bcbe" ] ], [ @@ -401,11 +420,11 @@ ] ] ], - "timestamp": "2026-03-24T23:03:29.504211443", "meta": { - "nf-test": "0.9.4", + "nf-test": "0.9.3", "nextflow": "25.10.4" - } + }, + "timestamp": "2026-03-25T18:03:32.971062838" }, "sarscov2 - illumina - with_reference": { "content": [ @@ -420,15 +439,12 @@ ], [ - ], - [ - ] ], - "timestamp": "2026-03-23T16:39:33.587772541", "meta": { "nf-test": "0.9.3", "nextflow": "25.10.4" - } + }, + "timestamp": "2026-03-25T15:27:36.181316757" } } \ No newline at end of file From d6e1274896a172ce22e5193dad7f5908f99111f5 Mon Sep 17 00:00:00 2001 From: LiaOb21 Date: Wed, 25 Mar 2026 18:26:08 +0000 Subject: [PATCH 5/5] still work in progress --- modules/nf-core/masurca/main.nf | 7 ------- 1 file changed, 7 deletions(-) diff --git a/modules/nf-core/masurca/main.nf b/modules/nf-core/masurca/main.nf index f210c0b230ee..468928e35d46 100644 --- a/modules/nf-core/masurca/main.nf +++ b/modules/nf-core/masurca/main.nf @@ -34,7 +34,6 @@ process MASURCA { def jump_reads = jump ? [jump].flatten().collect { it.toRealPath() }.join(' ') : "" def pacbio_file = pacbio ? pacbio.toRealPath() : "" def nanopore_file = nanopore ? nanopore.toRealPath() : "" - def other_reads_file = other_reads ? other_reads.toRealPath() : "" def reference_genome_file = reference_genome ? reference_genome.toRealPath() : "" // Configuration parameters with defaults from task.ext @@ -74,12 +73,6 @@ process MASURCA { echo "NANOPORE=${nanopore_file}" >> ${prefix}_masurca_config.txt fi - # Other reads (optional) - Sanger, 454, etc. - if [ -n "${other_reads_file}" ]; then - echo "#Other reads (Sanger, 454, etc) one frg file, concatenate your frg files into one if you have many" >> ${prefix}_masurca_config.txt - echo "OTHER=${other_reads_file}" >> ${prefix}_masurca_config.txt - fi - # Reference genome (optional) - for synteny-assisted assembly if [ -n "${reference_genome_file}" ]; then echo "#synteny-assisted assembly, concatenate all reference genomes into one reference.fa; works for Illumina-only data" >> ${prefix}_masurca_config.txt