From 73a15fef58a02877e7946aa6ab0e89dd8548384f Mon Sep 17 00:00:00 2001 From: DLBPointon Date: Wed, 25 Mar 2026 13:22:43 +0000 Subject: [PATCH 01/15] Update bioawk --- modules/nf-core/bioawk/main.nf | 27 +++++-- modules/nf-core/bioawk/meta.yml | 50 ++++++++---- modules/nf-core/bioawk/tests/main.nf.test | 31 +++++++- .../nf-core/bioawk/tests/main.nf.test.snap | 77 +++++++++++++++++-- modules/nf-core/bioawk/tests/nextflow.config | 2 +- .../tests/nextflow_telomere_check.config | 6 ++ 6 files changed, 159 insertions(+), 34 deletions(-) create mode 100644 modules/nf-core/bioawk/tests/nextflow_telomere_check.config diff --git a/modules/nf-core/bioawk/main.nf b/modules/nf-core/bioawk/main.nf index 70780b3db8e4..211bf78ffe0d 100644 --- a/modules/nf-core/bioawk/main.nf +++ b/modules/nf-core/bioawk/main.nf @@ -1,5 +1,5 @@ process BIOAWK { - tag "$meta.id" + tag "${meta.id}" label 'process_single' conda "${moduleDir}/environment.yml" @@ -9,9 +9,12 @@ process BIOAWK { input: tuple val(meta), path(input) + val suffix + val zip_bool output: - tuple val(meta), path("*.gz"), emit: output + tuple val(meta), path("${file_output}"), optional: true, emit: output + tuple val(meta), path("*.gz"), optional: true, emit: gz_output tuple val("${task.process}"), val('bioawk'), val("1.0"), emit: versions_bioawk, topic: versions // WARN: Version information not provided by tool on CLI. Please update version string above when bumping container versions. @@ -19,16 +22,26 @@ process BIOAWK { task.ext.when == null || task.ext.when script: - def args = task.ext.args ?: '' - def prefix = task.ext.prefix ?: "${meta.id}" - if ("${input}" == "${prefix}") error "Input and output names are the same, use \"task.ext.prefix\" to disambiguate." + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + file_output = "${prefix}.${suffix}" + if ("${input}" == "${file_output}") error "Input and output names are the same, use \"task.ext.prefix\" to disambiguate." """ bioawk \ $args \ $input \ - > ${prefix} + > ${file_output} - gzip ${prefix} + if [ "${zip_bool}" = "true" ]; then + gzip "${file_output}" + fi + + """ + + stub: + """ + touch ${file_output} + echo "" > ${file_output}.gz """ } diff --git a/modules/nf-core/bioawk/meta.yml b/modules/nf-core/bioawk/meta.yml index 5bea6172672b..8e3a70df608b 100644 --- a/modules/nf-core/bioawk/meta.yml +++ b/modules/nf-core/bioawk/meta.yml @@ -1,7 +1,6 @@ -schema_version: 1.1.0 name: "bioawk" -description: Bioawk is an extension to Brian Kernighan's awk, adding the support of several common biological data formats. - +description: Bioawk is an extension to Brian Kernighan's awk, adding the support + of several common biological data formats. keywords: - bioawk - fastq @@ -9,7 +8,6 @@ keywords: - sam - file manipulation - awk - tools: - bioawk: description: BWK awk modified for biological data @@ -17,9 +15,9 @@ tools: documentation: https://github.com/lh3/bioawk tool_dev_url: https://github.com/lh3/bioawk licence: - - Free software license (https://github.com/lh3/bioawk/blob/master/README.awk#L1) + - Free software license + (https://github.com/lh3/bioawk/blob/master/README.awk#L1) identifier: "" - input: - - meta: type: map @@ -31,9 +29,28 @@ input: description: | Input biological sequence file (optionally gzipped) to be manipulated via the program specified in `$args`. pattern: "*.{bed,gff,sam,vcf,fastq,fasta,tab,bed.gz,gff.gz,sam.gz,vcf.gz,fastq.gz,fasta.gz,tab.gz}" - + ontologies: + - edam: http://edamontology.org/format_1930 + - edam: http://edamontology.org/format_3475 + - suffix: + type: string + description: The suffix to add to the output file name. + - zip_bool: + type: boolean + description: Whether to gzip the output file. + pattern: "true|false" output: output: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - ${file_output}: + type: file + description: Manipulated version of the input sequence file. + ontologies: [] + gz_output: - - meta: type: map description: | @@ -43,32 +60,31 @@ output: type: file description: Manipulated and gzipped version of the input sequence file. pattern: "*.gz" - + ontologies: + - edam: http://edamontology.org/format_3989 versions_bioawk: - - - "${task.process}": + - - ${task.process}: type: string description: The name of the process - - "bioawk": + - bioawk: type: string description: The name of the tool - "1.0": type: string - description: The version of the tool - + description: The expression to obtain the version of the tool topics: versions: - - - "${task.process}": + - - ${task.process}: type: string description: The name of the process - - "bioawk": + - bioawk: type: string description: The name of the tool - "1.0": type: string - description: The version of the tool - + description: The expression to obtain the version of the tool authors: - "@jfy133" - maintainers: - "@jfy133" +schema_version: 1.1.0 diff --git a/modules/nf-core/bioawk/tests/main.nf.test b/modules/nf-core/bioawk/tests/main.nf.test index 270ff1ef3ec9..be0aea9b65dd 100644 --- a/modules/nf-core/bioawk/tests/main.nf.test +++ b/modules/nf-core/bioawk/tests/main.nf.test @@ -4,13 +4,13 @@ nextflow_process { name "Test Process BIOAWK" script "../main.nf" process "BIOAWK" - config "./nextflow.config" tag "modules" tag "modules_nfcore" tag "bioawk" - test("test-bioawk") { + test("fasta bioawk fasta.gz") { + config "./nextflow.config" when { process { @@ -19,6 +19,33 @@ nextflow_process { [ id:'test', single_end:false ], // meta map file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) ] + input[1] = "fa" + input[2] = true + + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } + + test("fasta bioawk tsv") { + config "./nextflow_telomere_check.config" + + when { + process { + """ + input[0] = [ + [ id:'test', single_end:false ], // meta map + file('/lustre/scratch124/tol/teams/tola/users/dp24/nf-modules/modules/nf-core/bioawk/tests/telomere.fasta', checkIfExists: true) + ] + input[1] = "tsv" + input[2] = false """ } diff --git a/modules/nf-core/bioawk/tests/main.nf.test.snap b/modules/nf-core/bioawk/tests/main.nf.test.snap index fa9b59305fc1..908adbde85a0 100644 --- a/modules/nf-core/bioawk/tests/main.nf.test.snap +++ b/modules/nf-core/bioawk/tests/main.nf.test.snap @@ -1,5 +1,5 @@ { - "test-bioawk": { + "fasta bioawk fasta.gz": { "content": [ { "0": [ @@ -12,7 +12,17 @@ ] ], "1": [ - "versions.yml:md5,5fe88e58a71f10551df56518c35ba91a" + + ], + "2": [ + [ + "BIOAWK", + "bioawk", + "1.0" + ] + ], + "gz_output": [ + ], "output": [ [ @@ -23,15 +33,68 @@ "sample_1.fa.gz:md5,b558dd15d8940373a032a827d490e693" ] ], - "versions": [ - "versions.yml:md5,5fe88e58a71f10551df56518c35ba91a" + "versions_bioawk": [ + [ + "BIOAWK", + "bioawk", + "1.0" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.2" + }, + "timestamp": "2026-03-25T12:35:30.509942773" + }, + "fasta bioawk tsv": { + "content": [ + { + "0": [ + + ], + "1": [ + [ + { + "id": "test", + "single_end": false + }, + "telomere_summary.tsv:md5,20facddd524fd8f6c0c03505f0be3e7a" + ] + ], + "2": [ + [ + "BIOAWK", + "bioawk", + "1.0" + ] + ], + "gz_output": [ + [ + { + "id": "test", + "single_end": false + }, + "telomere_summary.tsv:md5,20facddd524fd8f6c0c03505f0be3e7a" + ] + ], + "output": [ + + ], + "versions_bioawk": [ + [ + "BIOAWK", + "bioawk", + "1.0" + ] ] } ], "meta": { - "nf-test": "0.8.4", - "nextflow": "24.04.4" + "nf-test": "0.9.3", + "nextflow": "25.10.2" }, - "timestamp": "2024-08-28T10:24:46.397249" + "timestamp": "2026-03-25T12:39:43.207782527" } } \ No newline at end of file diff --git a/modules/nf-core/bioawk/tests/nextflow.config b/modules/nf-core/bioawk/tests/nextflow.config index 5ef017d97353..d8c4757d0ace 100644 --- a/modules/nf-core/bioawk/tests/nextflow.config +++ b/modules/nf-core/bioawk/tests/nextflow.config @@ -1,6 +1,6 @@ process { withName: BIOAWK { ext.args = "-c fastx \'{print \">\" \$name ORS length(\$seq)}\'" - ext.prefix = "sample_1.fa" + ext.prefix = "sample_1" } } diff --git a/modules/nf-core/bioawk/tests/nextflow_telomere_check.config b/modules/nf-core/bioawk/tests/nextflow_telomere_check.config new file mode 100644 index 000000000000..c39bc2889b3d --- /dev/null +++ b/modules/nf-core/bioawk/tests/nextflow_telomere_check.config @@ -0,0 +1,6 @@ +process { + withName: BIOAWK { + ext.args = "-c fastx \'{s = toupper(\$seq); copy_s = s; g = gsub(/G/, \"\", s); pct = 100*g/length(copy_s); rev = (pct < 30); out = rev ? revcomp(\$seq) : \$seq; printf \"%s\t%d\t%.2f\t%s\t%s\\n\", out, g, pct, (rev ? \"true\" : \"false\"), copy_s}\'" + ext.prefix = "telomere_summary" + } +} From 84217018313b088b104ccc5c178e62a75953f4ed Mon Sep 17 00:00:00 2001 From: DLBPointon Date: Wed, 25 Mar 2026 14:27:39 +0000 Subject: [PATCH 02/15] Updated tests now that dataset is accepted --- modules/nf-core/bioawk/tests/main.nf.test | 2 +- .../nf-core/bioawk/tests/main.nf.test.snap | 28 +++++++++---------- 2 files changed, 15 insertions(+), 15 deletions(-) diff --git a/modules/nf-core/bioawk/tests/main.nf.test b/modules/nf-core/bioawk/tests/main.nf.test index be0aea9b65dd..4f2d8b54353b 100644 --- a/modules/nf-core/bioawk/tests/main.nf.test +++ b/modules/nf-core/bioawk/tests/main.nf.test @@ -42,7 +42,7 @@ nextflow_process { """ input[0] = [ [ id:'test', single_end:false ], // meta map - file('/lustre/scratch124/tol/teams/tola/users/dp24/nf-modules/modules/nf-core/bioawk/tests/telomere.fasta', checkIfExists: true) + file(params.modules_testdata_base_path + 'generic/telomere/telomeric_motifs.fasta', checkIfExists: true) ] input[1] = "tsv" input[2] = false diff --git a/modules/nf-core/bioawk/tests/main.nf.test.snap b/modules/nf-core/bioawk/tests/main.nf.test.snap index 908adbde85a0..82756ab54748 100644 --- a/modules/nf-core/bioawk/tests/main.nf.test.snap +++ b/modules/nf-core/bioawk/tests/main.nf.test.snap @@ -3,6 +3,9 @@ "content": [ { "0": [ + + ], + "1": [ [ { "id": "test", @@ -10,9 +13,6 @@ }, "sample_1.fa.gz:md5,b558dd15d8940373a032a827d490e693" ] - ], - "1": [ - ], "2": [ [ @@ -22,9 +22,6 @@ ] ], "gz_output": [ - - ], - "output": [ [ { "id": "test", @@ -32,6 +29,9 @@ }, "sample_1.fa.gz:md5,b558dd15d8940373a032a827d490e693" ] + ], + "output": [ + ], "versions_bioawk": [ [ @@ -46,15 +46,12 @@ "nf-test": "0.9.3", "nextflow": "25.10.2" }, - "timestamp": "2026-03-25T12:35:30.509942773" + "timestamp": "2026-03-25T14:23:36.455899569" }, "fasta bioawk tsv": { "content": [ { "0": [ - - ], - "1": [ [ { "id": "test", @@ -62,6 +59,9 @@ }, "telomere_summary.tsv:md5,20facddd524fd8f6c0c03505f0be3e7a" ] + ], + "1": [ + ], "2": [ [ @@ -71,6 +71,9 @@ ] ], "gz_output": [ + + ], + "output": [ [ { "id": "test", @@ -78,9 +81,6 @@ }, "telomere_summary.tsv:md5,20facddd524fd8f6c0c03505f0be3e7a" ] - ], - "output": [ - ], "versions_bioawk": [ [ @@ -95,6 +95,6 @@ "nf-test": "0.9.3", "nextflow": "25.10.2" }, - "timestamp": "2026-03-25T12:39:43.207782527" + "timestamp": "2026-03-25T14:23:43.686125173" } } \ No newline at end of file From 902be18f97b1a98fce5cfa3b79d8182e80dbd5a9 Mon Sep 17 00:00:00 2001 From: DLBPointon Date: Wed, 25 Mar 2026 20:41:10 +0000 Subject: [PATCH 03/15] Update to bioawk based on review --- modules/nf-core/bioawk/main.nf | 21 +++++---------- modules/nf-core/bioawk/meta.yml | 27 ++++++------------- modules/nf-core/bioawk/tests/main.nf.test | 18 ++++++++----- .../nf-core/bioawk/tests/main.nf.test.snap | 18 +++---------- modules/nf-core/bioawk/tests/nextflow.config | 4 +-- .../tests/nextflow_telomere_check.config | 6 ----- 6 files changed, 32 insertions(+), 62 deletions(-) delete mode 100644 modules/nf-core/bioawk/tests/nextflow_telomere_check.config diff --git a/modules/nf-core/bioawk/main.nf b/modules/nf-core/bioawk/main.nf index 211bf78ffe0d..0ffe5904e542 100644 --- a/modules/nf-core/bioawk/main.nf +++ b/modules/nf-core/bioawk/main.nf @@ -9,12 +9,10 @@ process BIOAWK { input: tuple val(meta), path(input) - val suffix - val zip_bool + val output_file_extension output: - tuple val(meta), path("${file_output}"), optional: true, emit: output - tuple val(meta), path("*.gz"), optional: true, emit: gz_output + tuple val(meta), path("*.${output_file_extension}"), emit: output tuple val("${task.process}"), val('bioawk'), val("1.0"), emit: versions_bioawk, topic: versions // WARN: Version information not provided by tool on CLI. Please update version string above when bumping container versions. @@ -24,19 +22,14 @@ process BIOAWK { script: def args = task.ext.args ?: '' def prefix = task.ext.prefix ?: "${meta.id}" - file_output = "${prefix}.${suffix}" - if ("${input}" == "${file_output}") error "Input and output names are the same, use \"task.ext.prefix\" to disambiguate." + if ("${input}" == "${prefix}.${output_file_extension}") error "Input and output names are the same, use \"task.ext.prefix\" to disambiguate." + def compress_output = output_file_extension.endsWith(".gz") ? " | gzip " : "" """ bioawk \ - $args \ - $input \ - > ${file_output} - - if [ "${zip_bool}" = "true" ]; then - gzip "${file_output}" - fi - + $args \ + $input \ + ${compress_output} > ${prefix}.${output_file_extension} """ stub: diff --git a/modules/nf-core/bioawk/meta.yml b/modules/nf-core/bioawk/meta.yml index 8e3a70df608b..bfc3f9b215d4 100644 --- a/modules/nf-core/bioawk/meta.yml +++ b/modules/nf-core/bioawk/meta.yml @@ -30,15 +30,16 @@ input: Input biological sequence file (optionally gzipped) to be manipulated via the program specified in `$args`. pattern: "*.{bed,gff,sam,vcf,fastq,fasta,tab,bed.gz,gff.gz,sam.gz,vcf.gz,fastq.gz,fasta.gz,tab.gz}" ontologies: - - edam: http://edamontology.org/format_1930 + - edam: "http://edamontology.org/format_3003" + - edam: "http://edamontology.org/format_3016" + - edam: "http://edamontology.org/format_3989" + - edam: "http://edamontology.org/format_1975" + - edam: "http://edamontology.org/format_1929" + - edam: "http://edamontology.org/format_1930" - edam: http://edamontology.org/format_3475 - - suffix: + - output_file_extension: type: string description: The suffix to add to the output file name. - - zip_bool: - type: boolean - description: Whether to gzip the output file. - pattern: "true|false" output: output: - - meta: @@ -46,22 +47,10 @@ output: description: | Groovy Map containing sample information e.g. [ id:'test', single_end:false ] - - ${file_output}: + - "*.${output_file_extension}": type: file description: Manipulated version of the input sequence file. ontologies: [] - gz_output: - - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test', single_end:false ] - - "*.gz": - type: file - description: Manipulated and gzipped version of the input sequence file. - pattern: "*.gz" - ontologies: - - edam: http://edamontology.org/format_3989 versions_bioawk: - - ${task.process}: type: string diff --git a/modules/nf-core/bioawk/tests/main.nf.test b/modules/nf-core/bioawk/tests/main.nf.test index 4f2d8b54353b..ae3420cdfafa 100644 --- a/modules/nf-core/bioawk/tests/main.nf.test +++ b/modules/nf-core/bioawk/tests/main.nf.test @@ -4,6 +4,7 @@ nextflow_process { name "Test Process BIOAWK" script "../main.nf" process "BIOAWK" + config "./nextflow.config" tag "modules" tag "modules_nfcore" @@ -13,15 +14,18 @@ nextflow_process { config "./nextflow.config" when { + params { + bioawk_command = "-c fastx \'{print \">\" \$name ORS length(\$seq)}\'" + bioawk_prefix = "sample_1" + } + process { """ input[0] = [ [ id:'test', single_end:false ], // meta map file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) ] - input[1] = "fa" - input[2] = true - + input[1] = "fa.gz" """ } } @@ -35,9 +39,13 @@ nextflow_process { } test("fasta bioawk tsv") { - config "./nextflow_telomere_check.config" when { + params { + bioawk_command = "-c fastx \'{s = toupper(\$seq); copy_s = s; g = gsub(/G/, \"\", s); pct = 100*g/length(copy_s); rev = (pct < 30); out = rev ? revcomp(\$seq) : \$seq; printf \"%s\t%d\t%.2f\t%s\t%s\\n\", out, g, pct, (rev ? \"true\" : \"false\"), copy_s}\'" + bioawk_prefix = "telomere_summary" + } + process { """ input[0] = [ @@ -45,8 +53,6 @@ nextflow_process { file(params.modules_testdata_base_path + 'generic/telomere/telomeric_motifs.fasta', checkIfExists: true) ] input[1] = "tsv" - input[2] = false - """ } } diff --git a/modules/nf-core/bioawk/tests/main.nf.test.snap b/modules/nf-core/bioawk/tests/main.nf.test.snap index 82756ab54748..c4f99a95bf0b 100644 --- a/modules/nf-core/bioawk/tests/main.nf.test.snap +++ b/modules/nf-core/bioawk/tests/main.nf.test.snap @@ -3,9 +3,6 @@ "content": [ { "0": [ - - ], - "1": [ [ { "id": "test", @@ -14,14 +11,14 @@ "sample_1.fa.gz:md5,b558dd15d8940373a032a827d490e693" ] ], - "2": [ + "1": [ [ "BIOAWK", "bioawk", "1.0" ] ], - "gz_output": [ + "output": [ [ { "id": "test", @@ -29,9 +26,6 @@ }, "sample_1.fa.gz:md5,b558dd15d8940373a032a827d490e693" ] - ], - "output": [ - ], "versions_bioawk": [ [ @@ -61,17 +55,11 @@ ] ], "1": [ - - ], - "2": [ [ "BIOAWK", "bioawk", "1.0" ] - ], - "gz_output": [ - ], "output": [ [ @@ -97,4 +85,4 @@ }, "timestamp": "2026-03-25T14:23:43.686125173" } -} \ No newline at end of file +} diff --git a/modules/nf-core/bioawk/tests/nextflow.config b/modules/nf-core/bioawk/tests/nextflow.config index d8c4757d0ace..2ee3ea9d82bb 100644 --- a/modules/nf-core/bioawk/tests/nextflow.config +++ b/modules/nf-core/bioawk/tests/nextflow.config @@ -1,6 +1,6 @@ process { withName: BIOAWK { - ext.args = "-c fastx \'{print \">\" \$name ORS length(\$seq)}\'" - ext.prefix = "sample_1" + ext.args = "${params.bioawk_command}" + ext.prefix = "${params.bioawk_prefix}" } } diff --git a/modules/nf-core/bioawk/tests/nextflow_telomere_check.config b/modules/nf-core/bioawk/tests/nextflow_telomere_check.config deleted file mode 100644 index c39bc2889b3d..000000000000 --- a/modules/nf-core/bioawk/tests/nextflow_telomere_check.config +++ /dev/null @@ -1,6 +0,0 @@ -process { - withName: BIOAWK { - ext.args = "-c fastx \'{s = toupper(\$seq); copy_s = s; g = gsub(/G/, \"\", s); pct = 100*g/length(copy_s); rev = (pct < 30); out = rev ? revcomp(\$seq) : \$seq; printf \"%s\t%d\t%.2f\t%s\t%s\\n\", out, g, pct, (rev ? \"true\" : \"false\"), copy_s}\'" - ext.prefix = "telomere_summary" - } -} From 611d76464b4c3d45a2abf2e6442c2838810aa180 Mon Sep 17 00:00:00 2001 From: DLBPointon Date: Wed, 25 Mar 2026 20:47:30 +0000 Subject: [PATCH 04/15] Update to bioawk test and snapshot based on review --- modules/nf-core/bioawk/tests/main.nf.test | 2 +- modules/nf-core/bioawk/tests/main.nf.test.snap | 8 ++++---- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/modules/nf-core/bioawk/tests/main.nf.test b/modules/nf-core/bioawk/tests/main.nf.test index ae3420cdfafa..d0aebb8e2313 100644 --- a/modules/nf-core/bioawk/tests/main.nf.test +++ b/modules/nf-core/bioawk/tests/main.nf.test @@ -42,7 +42,7 @@ nextflow_process { when { params { - bioawk_command = "-c fastx \'{s = toupper(\$seq); copy_s = s; g = gsub(/G/, \"\", s); pct = 100*g/length(copy_s); rev = (pct < 30); out = rev ? revcomp(\$seq) : \$seq; printf \"%s\t%d\t%.2f\t%s\t%s\\n\", out, g, pct, (rev ? \"true\" : \"false\"), copy_s}\'" + bioawk_command = "-c gfastx \'{s = toupper(\$seq); g = gsub(/G/, \"\", s); printf \"%s\\t%d\\n\", s, g}\'" bioawk_prefix = "telomere_summary" } diff --git a/modules/nf-core/bioawk/tests/main.nf.test.snap b/modules/nf-core/bioawk/tests/main.nf.test.snap index c4f99a95bf0b..cc354b1a3ad5 100644 --- a/modules/nf-core/bioawk/tests/main.nf.test.snap +++ b/modules/nf-core/bioawk/tests/main.nf.test.snap @@ -51,7 +51,7 @@ "id": "test", "single_end": false }, - "telomere_summary.tsv:md5,20facddd524fd8f6c0c03505f0be3e7a" + "telomere_summary.tsv:md5,1d6c6398a229b5005b612f0a393ae6a1" ] ], "1": [ @@ -67,7 +67,7 @@ "id": "test", "single_end": false }, - "telomere_summary.tsv:md5,20facddd524fd8f6c0c03505f0be3e7a" + "telomere_summary.tsv:md5,1d6c6398a229b5005b612f0a393ae6a1" ] ], "versions_bioawk": [ @@ -83,6 +83,6 @@ "nf-test": "0.9.3", "nextflow": "25.10.2" }, - "timestamp": "2026-03-25T14:23:43.686125173" + "timestamp": "2026-03-25T20:46:16.217744013" } -} +} \ No newline at end of file From 2b056db98600370e95d66aaefe99dd7d820d063e Mon Sep 17 00:00:00 2001 From: DLBPointon Date: Wed, 25 Mar 2026 20:52:06 +0000 Subject: [PATCH 05/15] Linting fix --- modules/nf-core/bioawk/main.nf | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/modules/nf-core/bioawk/main.nf b/modules/nf-core/bioawk/main.nf index 0ffe5904e542..d99705810393 100644 --- a/modules/nf-core/bioawk/main.nf +++ b/modules/nf-core/bioawk/main.nf @@ -34,7 +34,6 @@ process BIOAWK { stub: """ - touch ${file_output} - echo "" > ${file_output}.gz + echo "" > "${prefix}.${output_file_extension}" """ } From 9d264f86ddc23cc0f61caf7f23a631d7e86b2830 Mon Sep 17 00:00:00 2001 From: DLBPointon Date: Wed, 25 Mar 2026 20:55:19 +0000 Subject: [PATCH 06/15] aaaaah prefix --- modules/nf-core/bioawk/main.nf | 1 + 1 file changed, 1 insertion(+) diff --git a/modules/nf-core/bioawk/main.nf b/modules/nf-core/bioawk/main.nf index d99705810393..1506fba72faf 100644 --- a/modules/nf-core/bioawk/main.nf +++ b/modules/nf-core/bioawk/main.nf @@ -33,6 +33,7 @@ process BIOAWK { """ stub: + def prefix = task.ext.prefix ?: "${meta.id}" """ echo "" > "${prefix}.${output_file_extension}" """ From e119506e099446325a066b03797dc016f9c72104 Mon Sep 17 00:00:00 2001 From: Damon-Lee Pointon <51855558+DLBPointon@users.noreply.github.com> Date: Thu, 26 Mar 2026 09:53:25 +0000 Subject: [PATCH 07/15] Apply suggestions from code review Co-authored-by: Jim Downie <19718667+prototaxites@users.noreply.github.com> --- modules/nf-core/bioawk/main.nf | 4 ++-- modules/nf-core/bioawk/meta.yml | 14 +++++++------- modules/nf-core/bioawk/tests/main.nf.test | 1 - 3 files changed, 9 insertions(+), 10 deletions(-) diff --git a/modules/nf-core/bioawk/main.nf b/modules/nf-core/bioawk/main.nf index 1506fba72faf..1cce5f9a4360 100644 --- a/modules/nf-core/bioawk/main.nf +++ b/modules/nf-core/bioawk/main.nf @@ -13,8 +13,8 @@ process BIOAWK { output: tuple val(meta), path("*.${output_file_extension}"), emit: output - tuple val("${task.process}"), val('bioawk'), val("1.0"), emit: versions_bioawk, topic: versions - // WARN: Version information not provided by tool on CLI. Please update version string above when bumping container versions. +// WARN: Version information not provided by tool on CLI. Please update version string above when bumping container versions. +tuple val("${task.process}"), val('bioawk'), val("1.0"), emit: versions_bioawk, topic: versions when: task.ext.when == null || task.ext.when diff --git a/modules/nf-core/bioawk/meta.yml b/modules/nf-core/bioawk/meta.yml index bfc3f9b215d4..a5e55b10dd7f 100644 --- a/modules/nf-core/bioawk/meta.yml +++ b/modules/nf-core/bioawk/meta.yml @@ -30,13 +30,13 @@ input: Input biological sequence file (optionally gzipped) to be manipulated via the program specified in `$args`. pattern: "*.{bed,gff,sam,vcf,fastq,fasta,tab,bed.gz,gff.gz,sam.gz,vcf.gz,fastq.gz,fasta.gz,tab.gz}" ontologies: - - edam: "http://edamontology.org/format_3003" - - edam: "http://edamontology.org/format_3016" - - edam: "http://edamontology.org/format_3989" - - edam: "http://edamontology.org/format_1975" - - edam: "http://edamontology.org/format_1929" - - edam: "http://edamontology.org/format_1930" - - edam: http://edamontology.org/format_3475 + - edam: http://edamontology.org/format_3003 # BED + - edam: http://edamontology.org/format_3016 # VCF + - edam: http://edamontology.org/format_3989 # GZIP + - edam: http://edamontology.org/format_1975 # GFF3 + - edam: http://edamontology.org/format_1929 # FASTA + - edam: http://edamontology.org/format_1930 # FASTQ + - edam: http://edamontology.org/format_3475 # TSV - output_file_extension: type: string description: The suffix to add to the output file name. diff --git a/modules/nf-core/bioawk/tests/main.nf.test b/modules/nf-core/bioawk/tests/main.nf.test index d0aebb8e2313..a55c5ff4d97c 100644 --- a/modules/nf-core/bioawk/tests/main.nf.test +++ b/modules/nf-core/bioawk/tests/main.nf.test @@ -11,7 +11,6 @@ nextflow_process { tag "bioawk" test("fasta bioawk fasta.gz") { - config "./nextflow.config" when { params { From 93f9c4b8263d7056b1d76470e9cc02d6f207d901 Mon Sep 17 00:00:00 2001 From: Damon-Lee Pointon <51855558+DLBPointon@users.noreply.github.com> Date: Thu, 26 Mar 2026 10:02:49 +0000 Subject: [PATCH 08/15] Update modules/nf-core/bioawk/tests/nextflow.config Co-authored-by: Jim Downie <19718667+prototaxites@users.noreply.github.com> --- modules/nf-core/bioawk/tests/nextflow.config | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/modules/nf-core/bioawk/tests/nextflow.config b/modules/nf-core/bioawk/tests/nextflow.config index 2ee3ea9d82bb..84a10f72ffc9 100644 --- a/modules/nf-core/bioawk/tests/nextflow.config +++ b/modules/nf-core/bioawk/tests/nextflow.config @@ -1,6 +1,6 @@ process { withName: BIOAWK { - ext.args = "${params.bioawk_command}" - ext.prefix = "${params.bioawk_prefix}" + ext.args = { "${params.bioawk_command}" } + ext.prefix = { "${params.bioawk_prefix}" } } } From 834a2f8edfa5cf4448563f931e42ea26f2560ae0 Mon Sep 17 00:00:00 2001 From: DLBPointon Date: Thu, 26 Mar 2026 10:13:56 +0000 Subject: [PATCH 09/15] SPELLING MISTAKE --- modules/nf-core/bioawk/main.nf | 10 +++++----- modules/nf-core/bioawk/tests/main.nf.test | 2 +- modules/nf-core/bioawk/tests/main.nf.test.snap | 2 +- 3 files changed, 7 insertions(+), 7 deletions(-) diff --git a/modules/nf-core/bioawk/main.nf b/modules/nf-core/bioawk/main.nf index 1cce5f9a4360..503701b6905c 100644 --- a/modules/nf-core/bioawk/main.nf +++ b/modules/nf-core/bioawk/main.nf @@ -13,8 +13,8 @@ process BIOAWK { output: tuple val(meta), path("*.${output_file_extension}"), emit: output -// WARN: Version information not provided by tool on CLI. Please update version string above when bumping container versions. -tuple val("${task.process}"), val('bioawk'), val("1.0"), emit: versions_bioawk, topic: versions + // WARN: Version information not provided by tool on CLI. Please update version string above when bumping container versions. + tuple val("${task.process}"), val('bioawk'), val("1.0"), emit: versions_bioawk, topic: versions when: task.ext.when == null || task.ext.when @@ -26,9 +26,9 @@ tuple val("${task.process}"), val('bioawk'), val("1.0"), emit: versions_bioawk, def compress_output = output_file_extension.endsWith(".gz") ? " | gzip " : "" """ - bioawk \ - $args \ - $input \ + bioawk \\ + $args \\ + $input \\ ${compress_output} > ${prefix}.${output_file_extension} """ diff --git a/modules/nf-core/bioawk/tests/main.nf.test b/modules/nf-core/bioawk/tests/main.nf.test index a55c5ff4d97c..d85a20cb5111 100644 --- a/modules/nf-core/bioawk/tests/main.nf.test +++ b/modules/nf-core/bioawk/tests/main.nf.test @@ -41,7 +41,7 @@ nextflow_process { when { params { - bioawk_command = "-c gfastx \'{s = toupper(\$seq); g = gsub(/G/, \"\", s); printf \"%s\\t%d\\n\", s, g}\'" + bioawk_command = "-c fastx \'{s = toupper(\$seq); g = gsub(/G/, \"\", s); printf \"%s\\t%d\\n\", s, g}\'" bioawk_prefix = "telomere_summary" } diff --git a/modules/nf-core/bioawk/tests/main.nf.test.snap b/modules/nf-core/bioawk/tests/main.nf.test.snap index cc354b1a3ad5..c02a080d7edd 100644 --- a/modules/nf-core/bioawk/tests/main.nf.test.snap +++ b/modules/nf-core/bioawk/tests/main.nf.test.snap @@ -83,6 +83,6 @@ "nf-test": "0.9.3", "nextflow": "25.10.2" }, - "timestamp": "2026-03-25T20:46:16.217744013" + "timestamp": "2026-03-26T10:13:18.610496559" } } \ No newline at end of file From 7f67682ee1968bd9712287ff2b68dd37d0990ad5 Mon Sep 17 00:00:00 2001 From: Damon-Lee Pointon <51855558+DLBPointon@users.noreply.github.com> Date: Thu, 26 Mar 2026 10:25:35 +0000 Subject: [PATCH 10/15] Update modules/nf-core/bioawk/main.nf Co-authored-by: Jim Downie <19718667+prototaxites@users.noreply.github.com> --- modules/nf-core/bioawk/main.nf | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/modules/nf-core/bioawk/main.nf b/modules/nf-core/bioawk/main.nf index 503701b6905c..893dfb626452 100644 --- a/modules/nf-core/bioawk/main.nf +++ b/modules/nf-core/bioawk/main.nf @@ -34,7 +34,8 @@ process BIOAWK { stub: def prefix = task.ext.prefix ?: "${meta.id}" + def compress_output = output_file_extension.endsWith(".gz") ? " | gzip " : "" """ - echo "" > "${prefix}.${output_file_extension}" + echo "" ${compress_output} > "${prefix}.${output_file_extension}" """ } From 63c5ad299bf86242efd98935b8bc91669bef76b2 Mon Sep 17 00:00:00 2001 From: DLBPointon Date: Thu, 26 Mar 2026 16:40:11 +0000 Subject: [PATCH 11/15] Update bioawk based on discussions --- modules/nf-core/bioawk/main.nf | 22 ++++++++++++++------ modules/nf-core/bioawk/meta.yml | 17 +++++++++++++++ modules/nf-core/bioawk/tests/main.nf.test | 14 +++++++++---- modules/nf-core/bioawk/tests/nextflow.config | 2 +- 4 files changed, 44 insertions(+), 11 deletions(-) diff --git a/modules/nf-core/bioawk/main.nf b/modules/nf-core/bioawk/main.nf index 893dfb626452..e3a919acad9d 100644 --- a/modules/nf-core/bioawk/main.nf +++ b/modules/nf-core/bioawk/main.nf @@ -9,6 +9,9 @@ process BIOAWK { input: tuple val(meta), path(input) + path(program_file) + val(disable_redirect_output) + val bioawk_extension val output_file_extension output: @@ -21,21 +24,28 @@ process BIOAWK { script: def args = task.ext.args ?: '' + def args2 = task.ext.args2 ?: '' // args2 is used to specify a program when no program file has been given + program = program_file ? "-f ${program_file}" : "${args2}" + awk_ext = bioawk_extension ? "-c ${bioawk_extension}" : "" def prefix = task.ext.prefix ?: "${meta.id}" + output_cmd = output_file_extension.endsWith("gz") ? "| gzip > ${prefix}.${output_file_extension}" : "> ${prefix}.${output_file_extension}" + output = disable_redirect_output ? "" : output_cmd + if ("${input}" == "${prefix}.${output_file_extension}") error "Input and output names are the same, use \"task.ext.prefix\" to disambiguate." - def compress_output = output_file_extension.endsWith(".gz") ? " | gzip " : "" """ bioawk \\ - $args \\ - $input \\ - ${compress_output} > ${prefix}.${output_file_extension} + ${awk_ext} \\ + ${args} \\ + ${program} \\ + ${input} \\ + ${output} """ stub: def prefix = task.ext.prefix ?: "${meta.id}" - def compress_output = output_file_extension.endsWith(".gz") ? " | gzip " : "" + def create_cmd = suffix.endsWith("gz") ? "echo '' | gzip >" : "touch" """ - echo "" ${compress_output} > "${prefix}.${output_file_extension}" + ${create_cmd} ${prefix}.${output_file_extension} """ } diff --git a/modules/nf-core/bioawk/meta.yml b/modules/nf-core/bioawk/meta.yml index a5e55b10dd7f..df87ab069157 100644 --- a/modules/nf-core/bioawk/meta.yml +++ b/modules/nf-core/bioawk/meta.yml @@ -37,6 +37,23 @@ input: - edam: http://edamontology.org/format_1929 # FASTA - edam: http://edamontology.org/format_1930 # FASTQ - edam: http://edamontology.org/format_3475 # TSV + - program_file: + type: file + description: Optional file containing logic for awk to execute. If you don't + wish to use a file, you can use `ext.args2` to specify the logic. + pattern: "*" + ontologies: [] + - disable_redirect_output: + type: boolean + description: | + Disable the redirection of awk output to a given file. This is + useful if you want to use awk's built-in redirect to write files instead + of the shell's redirect. + - bioawk_extension: + type: string + description: | + The extension to use for the bioawk program. + enum: ["fastx", "bed", "vcf", "sam"] - output_file_extension: type: string description: The suffix to add to the output file name. diff --git a/modules/nf-core/bioawk/tests/main.nf.test b/modules/nf-core/bioawk/tests/main.nf.test index d85a20cb5111..0756fad4e2a3 100644 --- a/modules/nf-core/bioawk/tests/main.nf.test +++ b/modules/nf-core/bioawk/tests/main.nf.test @@ -14,7 +14,7 @@ nextflow_process { when { params { - bioawk_command = "-c fastx \'{print \">\" \$name ORS length(\$seq)}\'" + bioawk_command = "\'{print \">\" \$name ORS length(\$seq)}\'" bioawk_prefix = "sample_1" } @@ -24,7 +24,10 @@ nextflow_process { [ id:'test', single_end:false ], // meta map file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) ] - input[1] = "fa.gz" + input[1] = [] + input[2] = false + input[3] = "fastx" + input[4] = "fa.gz" """ } } @@ -41,7 +44,7 @@ nextflow_process { when { params { - bioawk_command = "-c fastx \'{s = toupper(\$seq); g = gsub(/G/, \"\", s); printf \"%s\\t%d\\n\", s, g}\'" + bioawk_command = "\'{s = toupper(\$seq); g = gsub(/G/, \"\", s); printf \"%s\\t%d\\n\", s, g}\'" bioawk_prefix = "telomere_summary" } @@ -51,7 +54,10 @@ nextflow_process { [ id:'test', single_end:false ], // meta map file(params.modules_testdata_base_path + 'generic/telomere/telomeric_motifs.fasta', checkIfExists: true) ] - input[1] = "tsv" + input[1] = [] + input[2] = false + input[3] = "fastx" + input[4] = "tsv" """ } } diff --git a/modules/nf-core/bioawk/tests/nextflow.config b/modules/nf-core/bioawk/tests/nextflow.config index 84a10f72ffc9..4e8bf47c0139 100644 --- a/modules/nf-core/bioawk/tests/nextflow.config +++ b/modules/nf-core/bioawk/tests/nextflow.config @@ -1,6 +1,6 @@ process { withName: BIOAWK { - ext.args = { "${params.bioawk_command}" } + ext.args2 = { "${params.bioawk_command}" } ext.prefix = { "${params.bioawk_prefix}" } } } From b6e1b146496c3e3b1fe021d3972a0481d39d8a90 Mon Sep 17 00:00:00 2001 From: DLBPointon Date: Thu, 26 Mar 2026 16:47:25 +0000 Subject: [PATCH 12/15] Missed a suffix --- modules/nf-core/bioawk/main.nf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/nf-core/bioawk/main.nf b/modules/nf-core/bioawk/main.nf index 8781be1da1c1..6a8819be67f0 100644 --- a/modules/nf-core/bioawk/main.nf +++ b/modules/nf-core/bioawk/main.nf @@ -45,7 +45,7 @@ process BIOAWK { stub: def prefix = task.ext.prefix ?: "${meta.id}" - def create_cmd = suffix.endsWith("gz") ? "echo '' | gzip >" : "touch" + def create_cmd = output_file_extension.endsWith("gz") ? "echo '' | gzip >" : "touch" """ ${create_cmd} ${prefix}.${output_file_extension} """ From 2fe9865baa127ff3f1e50550f06f1c74a93974e6 Mon Sep 17 00:00:00 2001 From: DLBPointon Date: Thu, 26 Mar 2026 19:58:06 +0000 Subject: [PATCH 13/15] Update for programs and tests --- modules/nf-core/bioawk/main.nf | 4 +- modules/nf-core/bioawk/meta.yml | 3 +- modules/nf-core/bioawk/tests/main.nf.test | 60 +++++++++++- .../nf-core/bioawk/tests/main.nf.test.snap | 94 ++++++++++++++++++- modules/nf-core/bioawk/tests/nextflow.config | 2 +- 5 files changed, 153 insertions(+), 10 deletions(-) diff --git a/modules/nf-core/bioawk/main.nf b/modules/nf-core/bioawk/main.nf index 6a8819be67f0..52d296595fb5 100644 --- a/modules/nf-core/bioawk/main.nf +++ b/modules/nf-core/bioawk/main.nf @@ -24,8 +24,7 @@ process BIOAWK { script: def args = task.ext.args ?: '' - def args2 = task.ext.args2 ?: '' // args2 is used to specify a program when no program file has been given - program = program_file ? "-f ${program_file}" : "${args2}" + program = program_file ? "-f ${program_file}" : "${args}" awk_ext = bioawk_extension ? "-c ${bioawk_extension}" : "" def prefix = task.ext.prefix ?: "${meta.id}" output_cmd = output_file_extension.endsWith("gz") ? "| gzip > ${prefix}.${output_file_extension}" : "> ${prefix}.${output_file_extension}" @@ -37,7 +36,6 @@ process BIOAWK { """ bioawk \\ ${awk_ext} \\ - ${args} \\ ${program} \\ ${input} \\ ${output} diff --git a/modules/nf-core/bioawk/meta.yml b/modules/nf-core/bioawk/meta.yml index df87ab069157..6c1fda39b6d5 100644 --- a/modules/nf-core/bioawk/meta.yml +++ b/modules/nf-core/bioawk/meta.yml @@ -42,7 +42,8 @@ input: description: Optional file containing logic for awk to execute. If you don't wish to use a file, you can use `ext.args2` to specify the logic. pattern: "*" - ontologies: [] + ontologies: + - edam: http://edamontology.org/data_3671 # TXT - disable_redirect_output: type: boolean description: | diff --git a/modules/nf-core/bioawk/tests/main.nf.test b/modules/nf-core/bioawk/tests/main.nf.test index 0756fad4e2a3..092e37b45868 100644 --- a/modules/nf-core/bioawk/tests/main.nf.test +++ b/modules/nf-core/bioawk/tests/main.nf.test @@ -44,7 +44,7 @@ nextflow_process { when { params { - bioawk_command = "\'{s = toupper(\$seq); g = gsub(/G/, \"\", s); printf \"%s\\t%d\\n\", s, g}\'" + bioawk_command = "\'BEGIN {OFS = \'\\t\'} {print toupper(\$seq), gsub(/G/, \"\", \$seq)} \'" bioawk_prefix = "telomere_summary" } @@ -70,4 +70,62 @@ nextflow_process { } } + test("fasta bioawk tsv - no redirect") { + + when { + params { + bioawk_prefix = "telomere_summary" + } + + process { + """ + input[0] = [ + [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'generic/telomere/telomeric_motifs.fasta', checkIfExists: true) + ] + input[1] = channel.of('BEGIN {OFS = "\t"} {print toupper(\$seq), gsub(/G/, "", \$seq) > "output.tsv"}').collectFile(name:"program.awk") + input[2] = true + input[3] = "fastx" + input[4] = "tsv" + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } + + test("fasta bioawk tsv - program file") { + + when { + params { + bioawk_prefix = "telomere_summary" + } + + process { + """ + input[0] = [ + [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'generic/telomere/telomeric_motifs.fasta', checkIfExists: true) + ] + input[1] = channel.of('BEGIN {OFS = "\t"} {print toupper(\$seq), gsub(/G/, "", \$seq) }').collectFile(name:"program.awk") + input[2] = false + input[3] = "fastx" + input[4] = "tsv" + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } + } diff --git a/modules/nf-core/bioawk/tests/main.nf.test.snap b/modules/nf-core/bioawk/tests/main.nf.test.snap index c02a080d7edd..e938fbfc318f 100644 --- a/modules/nf-core/bioawk/tests/main.nf.test.snap +++ b/modules/nf-core/bioawk/tests/main.nf.test.snap @@ -1,4 +1,47 @@ { + "fasta bioawk tsv - no redirect": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": false + }, + "output.tsv:md5,45a26fc19df7fafe489fb66ee057b069" + ] + ], + "1": [ + [ + "BIOAWK", + "bioawk", + "1.0" + ] + ], + "output": [ + [ + { + "id": "test", + "single_end": false + }, + "output.tsv:md5,45a26fc19df7fafe489fb66ee057b069" + ] + ], + "versions_bioawk": [ + [ + "BIOAWK", + "bioawk", + "1.0" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.2" + }, + "timestamp": "2026-03-26T17:25:23.91735062" + }, "fasta bioawk fasta.gz": { "content": [ { @@ -40,7 +83,50 @@ "nf-test": "0.9.3", "nextflow": "25.10.2" }, - "timestamp": "2026-03-25T14:23:36.455899569" + "timestamp": "2026-03-26T19:57:03.10201788" + }, + "fasta bioawk tsv - program file": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": false + }, + "telomere_summary.tsv:md5,45a26fc19df7fafe489fb66ee057b069" + ] + ], + "1": [ + [ + "BIOAWK", + "bioawk", + "1.0" + ] + ], + "output": [ + [ + { + "id": "test", + "single_end": false + }, + "telomere_summary.tsv:md5,45a26fc19df7fafe489fb66ee057b069" + ] + ], + "versions_bioawk": [ + [ + "BIOAWK", + "bioawk", + "1.0" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.2" + }, + "timestamp": "2026-03-26T17:25:31.418710838" }, "fasta bioawk tsv": { "content": [ @@ -51,7 +137,7 @@ "id": "test", "single_end": false }, - "telomere_summary.tsv:md5,1d6c6398a229b5005b612f0a393ae6a1" + "telomere_summary.tsv:md5,45a26fc19df7fafe489fb66ee057b069" ] ], "1": [ @@ -67,7 +153,7 @@ "id": "test", "single_end": false }, - "telomere_summary.tsv:md5,1d6c6398a229b5005b612f0a393ae6a1" + "telomere_summary.tsv:md5,45a26fc19df7fafe489fb66ee057b069" ] ], "versions_bioawk": [ @@ -83,6 +169,6 @@ "nf-test": "0.9.3", "nextflow": "25.10.2" }, - "timestamp": "2026-03-26T10:13:18.610496559" + "timestamp": "2026-03-26T19:57:10.200126559" } } \ No newline at end of file diff --git a/modules/nf-core/bioawk/tests/nextflow.config b/modules/nf-core/bioawk/tests/nextflow.config index 4e8bf47c0139..84a10f72ffc9 100644 --- a/modules/nf-core/bioawk/tests/nextflow.config +++ b/modules/nf-core/bioawk/tests/nextflow.config @@ -1,6 +1,6 @@ process { withName: BIOAWK { - ext.args2 = { "${params.bioawk_command}" } + ext.args = { "${params.bioawk_command}" } ext.prefix = { "${params.bioawk_prefix}" } } } From 4c640760c2dffba113ffa1bfbcfb8ef86111e810 Mon Sep 17 00:00:00 2001 From: DLBPointon Date: Mon, 30 Mar 2026 12:21:12 +0100 Subject: [PATCH 14/15] Update to use args for -c --- modules/nf-core/bioawk/main.nf | 6 ++---- modules/nf-core/bioawk/meta.yml | 5 ----- modules/nf-core/bioawk/tests/main.nf.test | 16 ++++++++-------- modules/nf-core/bioawk/tests/nextflow.config | 3 ++- 4 files changed, 12 insertions(+), 18 deletions(-) diff --git a/modules/nf-core/bioawk/main.nf b/modules/nf-core/bioawk/main.nf index 52d296595fb5..db6f86606c9e 100644 --- a/modules/nf-core/bioawk/main.nf +++ b/modules/nf-core/bioawk/main.nf @@ -11,7 +11,6 @@ process BIOAWK { tuple val(meta), path(input) path(program_file) val(disable_redirect_output) - val bioawk_extension val output_file_extension output: @@ -24,8 +23,7 @@ process BIOAWK { script: def args = task.ext.args ?: '' - program = program_file ? "-f ${program_file}" : "${args}" - awk_ext = bioawk_extension ? "-c ${bioawk_extension}" : "" + program = program_file ? "-f ${program_file}" : "${args2}" def prefix = task.ext.prefix ?: "${meta.id}" output_cmd = output_file_extension.endsWith("gz") ? "| gzip > ${prefix}.${output_file_extension}" : "> ${prefix}.${output_file_extension}" output = disable_redirect_output ? "" : output_cmd @@ -35,7 +33,7 @@ process BIOAWK { def compress_output = output_file_extension.endsWith(".gz") ? " | gzip " : "" """ bioawk \\ - ${awk_ext} \\ + ${args} \\ ${program} \\ ${input} \\ ${output} diff --git a/modules/nf-core/bioawk/meta.yml b/modules/nf-core/bioawk/meta.yml index 6c1fda39b6d5..7281e0b07073 100644 --- a/modules/nf-core/bioawk/meta.yml +++ b/modules/nf-core/bioawk/meta.yml @@ -50,11 +50,6 @@ input: Disable the redirection of awk output to a given file. This is useful if you want to use awk's built-in redirect to write files instead of the shell's redirect. - - bioawk_extension: - type: string - description: | - The extension to use for the bioawk program. - enum: ["fastx", "bed", "vcf", "sam"] - output_file_extension: type: string description: The suffix to add to the output file name. diff --git a/modules/nf-core/bioawk/tests/main.nf.test b/modules/nf-core/bioawk/tests/main.nf.test index 092e37b45868..7ab40884cb01 100644 --- a/modules/nf-core/bioawk/tests/main.nf.test +++ b/modules/nf-core/bioawk/tests/main.nf.test @@ -14,6 +14,7 @@ nextflow_process { when { params { + bioawk_extension = "-c fastx" bioawk_command = "\'{print \">\" \$name ORS length(\$seq)}\'" bioawk_prefix = "sample_1" } @@ -26,8 +27,7 @@ nextflow_process { ] input[1] = [] input[2] = false - input[3] = "fastx" - input[4] = "fa.gz" + input[3] = "fa.gz" """ } } @@ -44,6 +44,7 @@ nextflow_process { when { params { + bioawk_extension = "-c fastx" bioawk_command = "\'BEGIN {OFS = \'\\t\'} {print toupper(\$seq), gsub(/G/, \"\", \$seq)} \'" bioawk_prefix = "telomere_summary" } @@ -56,8 +57,7 @@ nextflow_process { ] input[1] = [] input[2] = false - input[3] = "fastx" - input[4] = "tsv" + input[3] = "tsv" """ } } @@ -74,6 +74,7 @@ nextflow_process { when { params { + bioawk_extension = "-c fastx" bioawk_prefix = "telomere_summary" } @@ -85,8 +86,7 @@ nextflow_process { ] input[1] = channel.of('BEGIN {OFS = "\t"} {print toupper(\$seq), gsub(/G/, "", \$seq) > "output.tsv"}').collectFile(name:"program.awk") input[2] = true - input[3] = "fastx" - input[4] = "tsv" + input[3] = "tsv" """ } } @@ -103,6 +103,7 @@ nextflow_process { when { params { + bioawk_extension = "-c fastx" bioawk_prefix = "telomere_summary" } @@ -114,8 +115,7 @@ nextflow_process { ] input[1] = channel.of('BEGIN {OFS = "\t"} {print toupper(\$seq), gsub(/G/, "", \$seq) }').collectFile(name:"program.awk") input[2] = false - input[3] = "fastx" - input[4] = "tsv" + input[3] = "tsv" """ } } diff --git a/modules/nf-core/bioawk/tests/nextflow.config b/modules/nf-core/bioawk/tests/nextflow.config index 84a10f72ffc9..487101e1932e 100644 --- a/modules/nf-core/bioawk/tests/nextflow.config +++ b/modules/nf-core/bioawk/tests/nextflow.config @@ -1,6 +1,7 @@ process { withName: BIOAWK { - ext.args = { "${params.bioawk_command}" } + ext.args = { "${params.bioawk_extension}" } + ext.args2 = { "${params.bioawk_command}" } ext.prefix = { "${params.bioawk_prefix}" } } } From a944e1ab3392fda5eb77a3d51b251873e8eca1d5 Mon Sep 17 00:00:00 2001 From: DLBPointon Date: Mon, 30 Mar 2026 13:01:46 +0100 Subject: [PATCH 15/15] Didn't add in args2 again --- modules/nf-core/bioawk/main.nf | 1 + 1 file changed, 1 insertion(+) diff --git a/modules/nf-core/bioawk/main.nf b/modules/nf-core/bioawk/main.nf index db6f86606c9e..4eaf195c1084 100644 --- a/modules/nf-core/bioawk/main.nf +++ b/modules/nf-core/bioawk/main.nf @@ -23,6 +23,7 @@ process BIOAWK { script: def args = task.ext.args ?: '' + def args2 = task.ext.args2 ?: '' program = program_file ? "-f ${program_file}" : "${args2}" def prefix = task.ext.prefix ?: "${meta.id}" output_cmd = output_file_extension.endsWith("gz") ? "| gzip > ${prefix}.${output_file_extension}" : "> ${prefix}.${output_file_extension}"