Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
22 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
20 changes: 14 additions & 6 deletions modules/nf-core/bioawk/main.nf
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,8 @@ process BIOAWK {

input:
tuple val(meta), path(input)
path(program_file)
val(disable_redirect_output)
val output_file_extension

output:
Expand All @@ -21,21 +23,27 @@ process BIOAWK {

script:
def args = task.ext.args ?: ''
def args2 = task.ext.args2 ?: ''
program = program_file ? "-f ${program_file}" : "${args2}"
def prefix = task.ext.prefix ?: "${meta.id}"
output_cmd = output_file_extension.endsWith("gz") ? "| gzip > ${prefix}.${output_file_extension}" : "> ${prefix}.${output_file_extension}"
output = disable_redirect_output ? "" : output_cmd

if ("${input}" == "${prefix}.${output_file_extension}") error "Input and output names are the same, use \"task.ext.prefix\" to disambiguate."

def compress_output = output_file_extension.endsWith(".gz") ? " | gzip " : ""
"""
bioawk \\
$args \\
$input \\
${compress_output} > ${prefix}.${output_file_extension}
${args} \\
${program} \\
${input} \\
${output}
"""

stub:
def prefix = task.ext.prefix ?: "${meta.id}"
def compress_output = output_file_extension.endsWith(".gz") ? " | gzip " : ""
def prefix = task.ext.prefix ?: "${meta.id}"
def create_cmd = output_file_extension.endsWith("gz") ? "echo '' | gzip >" : "touch"
"""
echo "" ${compress_output} > "${prefix}.${output_file_extension}"
${create_cmd} ${prefix}.${output_file_extension}
"""
}
13 changes: 13 additions & 0 deletions modules/nf-core/bioawk/meta.yml
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,19 @@ input:
- edam: http://edamontology.org/format_1929 # FASTA
- edam: http://edamontology.org/format_1930 # FASTQ
- edam: http://edamontology.org/format_3475 # TSV
- program_file:
type: file
description: Optional file containing logic for awk to execute. If you don't
wish to use a file, you can use `ext.args2` to specify the logic.
pattern: "*"
ontologies:
- edam: http://edamontology.org/data_3671 # TXT
- disable_redirect_output:
type: boolean
description: |
Disable the redirection of awk output to a given file. This is
useful if you want to use awk's built-in redirect to write files instead
of the shell's redirect.
- output_file_extension:
type: string
description: The suffix to add to the output file name.
Expand Down
72 changes: 68 additions & 4 deletions modules/nf-core/bioawk/tests/main.nf.test
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,8 @@ nextflow_process {

when {
params {
bioawk_command = "-c fastx \'{print \">\" \$name ORS length(\$seq)}\'"
bioawk_extension = "-c fastx"
bioawk_command = "\'{print \">\" \$name ORS length(\$seq)}\'"
bioawk_prefix = "sample_1"
}

Expand All @@ -24,7 +25,9 @@ nextflow_process {
[ id:'test', single_end:false ], // meta map
file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true)
]
input[1] = "fa.gz"
input[1] = []
input[2] = false
input[3] = "fa.gz"
"""
}
}
Expand All @@ -41,7 +44,8 @@ nextflow_process {

when {
params {
bioawk_command = "-c fastx \'{s = toupper(\$seq); g = gsub(/G/, \"\", s); printf \"%s\\t%d\\n\", s, g}\'"
bioawk_extension = "-c fastx"
bioawk_command = "\'BEGIN {OFS = \'\\t\'} {print toupper(\$seq), gsub(/G/, \"\", \$seq)} \'"
bioawk_prefix = "telomere_summary"
}

Expand All @@ -51,7 +55,67 @@ nextflow_process {
[ id:'test', single_end:false ], // meta map
file(params.modules_testdata_base_path + 'generic/telomere/telomeric_motifs.fasta', checkIfExists: true)
]
input[1] = "tsv"
input[1] = []
input[2] = false
input[3] = "tsv"
"""
}
}

then {
assertAll(
{ assert process.success },
{ assert snapshot(process.out).match() }
)
}
}

test("fasta bioawk tsv - no redirect") {

when {
params {
bioawk_extension = "-c fastx"
bioawk_prefix = "telomere_summary"
}

process {
"""
input[0] = [
[ id:'test', single_end:false ], // meta map
file(params.modules_testdata_base_path + 'generic/telomere/telomeric_motifs.fasta', checkIfExists: true)
]
input[1] = channel.of('BEGIN {OFS = "\t"} {print toupper(\$seq), gsub(/G/, "", \$seq) > "output.tsv"}').collectFile(name:"program.awk")
input[2] = true
input[3] = "tsv"
"""
}
}

then {
assertAll(
{ assert process.success },
{ assert snapshot(process.out).match() }
)
}
}

test("fasta bioawk tsv - program file") {

when {
params {
bioawk_extension = "-c fastx"
bioawk_prefix = "telomere_summary"
}

process {
"""
input[0] = [
[ id:'test', single_end:false ], // meta map
file(params.modules_testdata_base_path + 'generic/telomere/telomeric_motifs.fasta', checkIfExists: true)
]
input[1] = channel.of('BEGIN {OFS = "\t"} {print toupper(\$seq), gsub(/G/, "", \$seq) }').collectFile(name:"program.awk")
input[2] = false
input[3] = "tsv"
"""
}
}
Expand Down
94 changes: 90 additions & 4 deletions modules/nf-core/bioawk/tests/main.nf.test.snap
Original file line number Diff line number Diff line change
@@ -1,4 +1,47 @@
{
"fasta bioawk tsv - no redirect": {
"content": [
{
"0": [
[
{
"id": "test",
"single_end": false
},
"output.tsv:md5,45a26fc19df7fafe489fb66ee057b069"
]
],
"1": [
[
"BIOAWK",
"bioawk",
"1.0"
]
],
"output": [
[
{
"id": "test",
"single_end": false
},
"output.tsv:md5,45a26fc19df7fafe489fb66ee057b069"
]
],
"versions_bioawk": [
[
"BIOAWK",
"bioawk",
"1.0"
]
]
}
],
"meta": {
"nf-test": "0.9.3",
"nextflow": "25.10.2"
},
"timestamp": "2026-03-26T17:25:23.91735062"
},
"fasta bioawk fasta.gz": {
"content": [
{
Expand Down Expand Up @@ -40,7 +83,50 @@
"nf-test": "0.9.3",
"nextflow": "25.10.2"
},
"timestamp": "2026-03-25T14:23:36.455899569"
"timestamp": "2026-03-26T19:57:03.10201788"
},
"fasta bioawk tsv - program file": {
"content": [
{
"0": [
[
{
"id": "test",
"single_end": false
},
"telomere_summary.tsv:md5,45a26fc19df7fafe489fb66ee057b069"
]
],
"1": [
[
"BIOAWK",
"bioawk",
"1.0"
]
],
"output": [
[
{
"id": "test",
"single_end": false
},
"telomere_summary.tsv:md5,45a26fc19df7fafe489fb66ee057b069"
]
],
"versions_bioawk": [
[
"BIOAWK",
"bioawk",
"1.0"
]
]
}
],
"meta": {
"nf-test": "0.9.3",
"nextflow": "25.10.2"
},
"timestamp": "2026-03-26T17:25:31.418710838"
},
"fasta bioawk tsv": {
"content": [
Expand All @@ -51,7 +137,7 @@
"id": "test",
"single_end": false
},
"telomere_summary.tsv:md5,1d6c6398a229b5005b612f0a393ae6a1"
"telomere_summary.tsv:md5,45a26fc19df7fafe489fb66ee057b069"
]
],
"1": [
Expand All @@ -67,7 +153,7 @@
"id": "test",
"single_end": false
},
"telomere_summary.tsv:md5,1d6c6398a229b5005b612f0a393ae6a1"
"telomere_summary.tsv:md5,45a26fc19df7fafe489fb66ee057b069"
]
],
"versions_bioawk": [
Expand All @@ -83,6 +169,6 @@
"nf-test": "0.9.3",
"nextflow": "25.10.2"
},
"timestamp": "2026-03-26T10:13:18.610496559"
"timestamp": "2026-03-26T19:57:10.200126559"
}
}
3 changes: 2 additions & 1 deletion modules/nf-core/bioawk/tests/nextflow.config
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
process {
withName: BIOAWK {
ext.args = { "${params.bioawk_command}" }
ext.args = { "${params.bioawk_extension}" }
ext.args2 = { "${params.bioawk_command}" }
ext.prefix = { "${params.bioawk_prefix}" }
}
}
Loading