diff --git a/modules/nf-core/bioawk/main.nf b/modules/nf-core/bioawk/main.nf index 893dfb62645..4eaf195c108 100644 --- a/modules/nf-core/bioawk/main.nf +++ b/modules/nf-core/bioawk/main.nf @@ -9,6 +9,8 @@ process BIOAWK { input: tuple val(meta), path(input) + path(program_file) + val(disable_redirect_output) val output_file_extension output: @@ -21,21 +23,27 @@ process BIOAWK { script: def args = task.ext.args ?: '' + def args2 = task.ext.args2 ?: '' + program = program_file ? "-f ${program_file}" : "${args2}" def prefix = task.ext.prefix ?: "${meta.id}" + output_cmd = output_file_extension.endsWith("gz") ? "| gzip > ${prefix}.${output_file_extension}" : "> ${prefix}.${output_file_extension}" + output = disable_redirect_output ? "" : output_cmd + if ("${input}" == "${prefix}.${output_file_extension}") error "Input and output names are the same, use \"task.ext.prefix\" to disambiguate." def compress_output = output_file_extension.endsWith(".gz") ? " | gzip " : "" """ bioawk \\ - $args \\ - $input \\ - ${compress_output} > ${prefix}.${output_file_extension} + ${args} \\ + ${program} \\ + ${input} \\ + ${output} """ stub: - def prefix = task.ext.prefix ?: "${meta.id}" - def compress_output = output_file_extension.endsWith(".gz") ? " | gzip " : "" + def prefix = task.ext.prefix ?: "${meta.id}" + def create_cmd = output_file_extension.endsWith("gz") ? "echo '' | gzip >" : "touch" """ - echo "" ${compress_output} > "${prefix}.${output_file_extension}" + ${create_cmd} ${prefix}.${output_file_extension} """ } diff --git a/modules/nf-core/bioawk/meta.yml b/modules/nf-core/bioawk/meta.yml index a5e55b10dd7..7281e0b0707 100644 --- a/modules/nf-core/bioawk/meta.yml +++ b/modules/nf-core/bioawk/meta.yml @@ -37,6 +37,19 @@ input: - edam: http://edamontology.org/format_1929 # FASTA - edam: http://edamontology.org/format_1930 # FASTQ - edam: http://edamontology.org/format_3475 # TSV + - program_file: + type: file + description: Optional file containing logic for awk to execute. If you don't + wish to use a file, you can use `ext.args2` to specify the logic. + pattern: "*" + ontologies: + - edam: http://edamontology.org/data_3671 # TXT + - disable_redirect_output: + type: boolean + description: | + Disable the redirection of awk output to a given file. This is + useful if you want to use awk's built-in redirect to write files instead + of the shell's redirect. - output_file_extension: type: string description: The suffix to add to the output file name. diff --git a/modules/nf-core/bioawk/tests/main.nf.test b/modules/nf-core/bioawk/tests/main.nf.test index d85a20cb511..7ab40884cb0 100644 --- a/modules/nf-core/bioawk/tests/main.nf.test +++ b/modules/nf-core/bioawk/tests/main.nf.test @@ -14,7 +14,8 @@ nextflow_process { when { params { - bioawk_command = "-c fastx \'{print \">\" \$name ORS length(\$seq)}\'" + bioawk_extension = "-c fastx" + bioawk_command = "\'{print \">\" \$name ORS length(\$seq)}\'" bioawk_prefix = "sample_1" } @@ -24,7 +25,9 @@ nextflow_process { [ id:'test', single_end:false ], // meta map file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) ] - input[1] = "fa.gz" + input[1] = [] + input[2] = false + input[3] = "fa.gz" """ } } @@ -41,7 +44,8 @@ nextflow_process { when { params { - bioawk_command = "-c fastx \'{s = toupper(\$seq); g = gsub(/G/, \"\", s); printf \"%s\\t%d\\n\", s, g}\'" + bioawk_extension = "-c fastx" + bioawk_command = "\'BEGIN {OFS = \'\\t\'} {print toupper(\$seq), gsub(/G/, \"\", \$seq)} \'" bioawk_prefix = "telomere_summary" } @@ -51,7 +55,67 @@ nextflow_process { [ id:'test', single_end:false ], // meta map file(params.modules_testdata_base_path + 'generic/telomere/telomeric_motifs.fasta', checkIfExists: true) ] - input[1] = "tsv" + input[1] = [] + input[2] = false + input[3] = "tsv" + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } + + test("fasta bioawk tsv - no redirect") { + + when { + params { + bioawk_extension = "-c fastx" + bioawk_prefix = "telomere_summary" + } + + process { + """ + input[0] = [ + [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'generic/telomere/telomeric_motifs.fasta', checkIfExists: true) + ] + input[1] = channel.of('BEGIN {OFS = "\t"} {print toupper(\$seq), gsub(/G/, "", \$seq) > "output.tsv"}').collectFile(name:"program.awk") + input[2] = true + input[3] = "tsv" + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } + + test("fasta bioawk tsv - program file") { + + when { + params { + bioawk_extension = "-c fastx" + bioawk_prefix = "telomere_summary" + } + + process { + """ + input[0] = [ + [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'generic/telomere/telomeric_motifs.fasta', checkIfExists: true) + ] + input[1] = channel.of('BEGIN {OFS = "\t"} {print toupper(\$seq), gsub(/G/, "", \$seq) }').collectFile(name:"program.awk") + input[2] = false + input[3] = "tsv" """ } } diff --git a/modules/nf-core/bioawk/tests/main.nf.test.snap b/modules/nf-core/bioawk/tests/main.nf.test.snap index c02a080d7ed..e938fbfc318 100644 --- a/modules/nf-core/bioawk/tests/main.nf.test.snap +++ b/modules/nf-core/bioawk/tests/main.nf.test.snap @@ -1,4 +1,47 @@ { + "fasta bioawk tsv - no redirect": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": false + }, + "output.tsv:md5,45a26fc19df7fafe489fb66ee057b069" + ] + ], + "1": [ + [ + "BIOAWK", + "bioawk", + "1.0" + ] + ], + "output": [ + [ + { + "id": "test", + "single_end": false + }, + "output.tsv:md5,45a26fc19df7fafe489fb66ee057b069" + ] + ], + "versions_bioawk": [ + [ + "BIOAWK", + "bioawk", + "1.0" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.2" + }, + "timestamp": "2026-03-26T17:25:23.91735062" + }, "fasta bioawk fasta.gz": { "content": [ { @@ -40,7 +83,50 @@ "nf-test": "0.9.3", "nextflow": "25.10.2" }, - "timestamp": "2026-03-25T14:23:36.455899569" + "timestamp": "2026-03-26T19:57:03.10201788" + }, + "fasta bioawk tsv - program file": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": false + }, + "telomere_summary.tsv:md5,45a26fc19df7fafe489fb66ee057b069" + ] + ], + "1": [ + [ + "BIOAWK", + "bioawk", + "1.0" + ] + ], + "output": [ + [ + { + "id": "test", + "single_end": false + }, + "telomere_summary.tsv:md5,45a26fc19df7fafe489fb66ee057b069" + ] + ], + "versions_bioawk": [ + [ + "BIOAWK", + "bioawk", + "1.0" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.2" + }, + "timestamp": "2026-03-26T17:25:31.418710838" }, "fasta bioawk tsv": { "content": [ @@ -51,7 +137,7 @@ "id": "test", "single_end": false }, - "telomere_summary.tsv:md5,1d6c6398a229b5005b612f0a393ae6a1" + "telomere_summary.tsv:md5,45a26fc19df7fafe489fb66ee057b069" ] ], "1": [ @@ -67,7 +153,7 @@ "id": "test", "single_end": false }, - "telomere_summary.tsv:md5,1d6c6398a229b5005b612f0a393ae6a1" + "telomere_summary.tsv:md5,45a26fc19df7fafe489fb66ee057b069" ] ], "versions_bioawk": [ @@ -83,6 +169,6 @@ "nf-test": "0.9.3", "nextflow": "25.10.2" }, - "timestamp": "2026-03-26T10:13:18.610496559" + "timestamp": "2026-03-26T19:57:10.200126559" } } \ No newline at end of file diff --git a/modules/nf-core/bioawk/tests/nextflow.config b/modules/nf-core/bioawk/tests/nextflow.config index 84a10f72ffc..487101e1932 100644 --- a/modules/nf-core/bioawk/tests/nextflow.config +++ b/modules/nf-core/bioawk/tests/nextflow.config @@ -1,6 +1,7 @@ process { withName: BIOAWK { - ext.args = { "${params.bioawk_command}" } + ext.args = { "${params.bioawk_extension}" } + ext.args2 = { "${params.bioawk_command}" } ext.prefix = { "${params.bioawk_prefix}" } } }