Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
22 changes: 17 additions & 5 deletions modules/nf-core/tabix/tabix/main.nf
Original file line number Diff line number Diff line change
Expand Up @@ -8,28 +8,40 @@ process TABIX_TABIX {
'community.wave.seqera.io/library/htslib:1.21--ff8e28a189fbecaa' }"

input:
tuple val(meta), path(tab)
tuple val(meta), path(tab), path(tai), path(regions)

output:
tuple val(meta), path("*.{tbi,csi}"), emit: index
tuple val(meta), path("*.{tbi,csi}"), emit: index, optional: true
tuple val(meta), path("${prefix}.*gz"), emit: extracted, optional: true
tuple val("${task.process}"), val('tabix'), eval("tabix -h 2>&1 | grep -oP 'Version:\\s*\\K[^\\s]+'") , topic: versions , emit: versions_tabix

when:
task.ext.when == null || task.ext.when

script:
def args = task.ext.args ?: ''
prefix = task.ext.prefix ?: "${meta.id}"
def tab_suffix = tab.name.indexOf('.') >= 0 ? tab.name.substring(tab.name.indexOf('.')) : ''
def regions_arg = regions ? "-R ${regions}" : ""
def output_arg = regions ? "| bgzip --threads ${task.cpus} > ${prefix}${tab_suffix}" : ""
"""
tabix \\
${regions_arg} \\
--threads $task.cpus \\
$args \\
$tab
$tab \\
${output_arg}

"""
stub:
def args = task.ext.args ?: ''
def index = args.contains("-C ") || args.contains("--csi") ? "csi" : "tbi"
prefix = task.ext.prefix ?: "${meta.id}"
def tab_suffix = tab.name.indexOf('.') >= 0 ? tab.name.substring(tab.name.indexOf('.')) : ''
def ext = args.contains("-C ") || args.contains("--csi") ? "csi" : "tbi"
def index = regions ? "" : "touch ${tab}.${ext}"
def extracted = regions ? "echo | gzip > ${prefix}${tab_suffix}" : ""
"""
touch ${tab}.${index}
${index}
${extracted}
"""
}
41 changes: 36 additions & 5 deletions modules/nf-core/tabix/tabix/meta.yml
Original file line number Diff line number Diff line change
@@ -1,16 +1,21 @@
name: tabix_tabix
description: create tabix index from a sorted bgzip tab-delimited genome file
description: |
Create a tabix index from a sorted bgzip TAB-delimited genome file, or
extract regions from a bgzipped VCF file using an optional regions file.
keywords:
- index
- tabix
- vcf
- extract
- regions
tools:
- tabix:
description: Generic indexer for TAB-delimited genome position files.
homepage: https://www.htslib.org/doc/tabix.html
documentation: https://www.htslib.org/doc/tabix.1.html
doi: 10.1093/bioinformatics/btq671
licence: ["MIT"]
licence:
- "MIT"
identifier: biotools:tabix
input:
- - meta:
Expand All @@ -23,6 +28,21 @@ input:
description: TAB-delimited genome position file compressed with bgzip
pattern: "*.{bed.gz,gff.gz,sam.gz,vcf.gz}"
ontologies: []
- tai:
type: file
description: |
Tabix index for the input file. Required when extracting regions.
Pass [] when creating an index instead.
pattern: "*.{tbi,csi}"
ontologies: []
- regions:
type: file
description: |
Optional file of regions to extract (BED or chr:start-end format).
Pass [] to create an index instead of extracting regions.
pattern: "*.{bed,txt,tsv}"
ontologies:
- edam: http://edamontology.org/format_3475
output:
index:
- - meta:
Expand All @@ -32,9 +52,22 @@ output:
e.g. [ id:'test', single_end:false ]
- "*.{tbi,csi}":
type: file
description: Tabix index file (either tbi or csi)
description: Tabix index file (tbi or csi). Emitted when no regions file
is provided.
pattern: "*.{tbi,csi}"
ontologies: []
extracted:
- - meta:
type: map
description: |
Groovy Map containing sample information
e.g. [ id:'test', single_end:false ]
- "${prefix}.*gz":
type: file
description: Bgzipped file of extracted regions, preserving the input file
extension. Emitted when a regions file is provided.
pattern: "*.*gz"
ontologies: []
versions_tabix:
- - ${task.process}:
type: string
Expand All @@ -45,7 +78,6 @@ output:
- tabix -h 2>&1 | grep -oP 'Version:\s*\K[^\s]+':
type: eval
description: The expression to obtain the version of the tool

topics:
versions:
- - ${task.process}:
Expand All @@ -57,7 +89,6 @@ topics:
- tabix -h 2>&1 | grep -oP 'Version:\s*\K[^\s]+':
type: eval
description: The expression to obtain the version of the tool

authors:
- "@joseespinosa"
- "@drpatelh"
Expand Down
75 changes: 70 additions & 5 deletions modules/nf-core/tabix/tabix/tests/main.nf.test
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,9 @@ nextflow_process {
"""
input[0] = [
[ id:'tbi_bed' ],
[ file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/bed/test.bed.gz', checkIfExists: true) ]
file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/bed/test.bed.gz', checkIfExists: true),
[],
[]
]
"""
}
Expand All @@ -43,7 +45,9 @@ nextflow_process {
"""
input[0] = [
[ id:'tbi_gff' ],
[ file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.gff3.gz', checkIfExists: true) ]
file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.gff3.gz', checkIfExists: true),
[],
[]
]
"""
}
Expand All @@ -67,7 +71,9 @@ nextflow_process {
"""
input[0] = [
[ id:'tbi_vcf' ],
[ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz', checkIfExists: true) ]
file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz', checkIfExists: true),
[],
[]
]
"""
}
Expand All @@ -91,7 +97,9 @@ nextflow_process {
"""
input[0] = [
[ id:'vcf_csi' ],
[ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz', checkIfExists: true) ]
file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz', checkIfExists: true),
[],
[]
]
"""
}
Expand All @@ -106,6 +114,61 @@ nextflow_process {

}

test("sarscov2_vcf_tbi_regions") {
when {
params {
module_args = '-h'
}
process {
"""
input[0] = [
[ id:'vcf_regions' ],
file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz', checkIfExists: true),
file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz.tbi', checkIfExists: true),
file('https://raw.githubusercontent.com/luisas/test-datasets/refs/heads/add-bedgraph-subset-illumina/data/genomics/sarscov2/illumina/bed/test.bed', checkIfExists: true)
]
"""
}
}

then {
assertAll (
{ assert process.success },
{ assert snapshot(
process.out.index,
path(process.out.extracted[0][1]).vcf.getVariantsMD5(),
process.out.versions_tabix
).match() }
)
}
}

test("sarscov2_vcf_tbi_regions_stub") {
options "-stub"
when {
params {
module_args = ''
}
process {
"""
input[0] = [
[ id:'vcf_regions_stub' ],
file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz', checkIfExists: true),
file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz.tbi', checkIfExists: true),
file('https://raw.githubusercontent.com/luisas/test-datasets/refs/heads/add-bedgraph-subset-illumina/data/genomics/sarscov2/illumina/bed/test.bed', checkIfExists: true)
]
"""
}
}

then {
assertAll (
{ assert process.success },
{ assert snapshot(sanitizeOutput(process.out)).match() }
)
}
}

test("sarscov2_vcf_csi_stub") {
options "-stub"
when {
Expand All @@ -116,7 +179,9 @@ nextflow_process {
"""
input[0] = [
[ id:'vcf_csi_stub' ],
[ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz', checkIfExists: true) ]
file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz', checkIfExists: true),
[],
[]
]
"""
}
Expand Down
Loading
Loading