diff --git a/modules/nf-core/tabix/tabix/main.nf b/modules/nf-core/tabix/tabix/main.nf index c8bcef64eeef..36e64c318f1a 100644 --- a/modules/nf-core/tabix/tabix/main.nf +++ b/modules/nf-core/tabix/tabix/main.nf @@ -8,10 +8,11 @@ process TABIX_TABIX { 'community.wave.seqera.io/library/htslib:1.21--ff8e28a189fbecaa' }" input: - tuple val(meta), path(tab) + tuple val(meta), path(tab), path(tai), path(regions) output: - tuple val(meta), path("*.{tbi,csi}"), emit: index + tuple val(meta), path("*.{tbi,csi}"), emit: index, optional: true + tuple val(meta), path("${prefix}.*gz"), emit: extracted, optional: true tuple val("${task.process}"), val('tabix'), eval("tabix -h 2>&1 | grep -oP 'Version:\\s*\\K[^\\s]+'") , topic: versions , emit: versions_tabix when: @@ -19,17 +20,28 @@ process TABIX_TABIX { script: def args = task.ext.args ?: '' + prefix = task.ext.prefix ?: "${meta.id}" + def tab_suffix = tab.name.indexOf('.') >= 0 ? tab.name.substring(tab.name.indexOf('.')) : '' + def regions_arg = regions ? "-R ${regions}" : "" + def output_arg = regions ? "| bgzip --threads ${task.cpus} > ${prefix}${tab_suffix}" : "" """ tabix \\ + ${regions_arg} \\ --threads $task.cpus \\ $args \\ - $tab + $tab \\ + ${output_arg} """ stub: def args = task.ext.args ?: '' - def index = args.contains("-C ") || args.contains("--csi") ? "csi" : "tbi" + prefix = task.ext.prefix ?: "${meta.id}" + def tab_suffix = tab.name.indexOf('.') >= 0 ? tab.name.substring(tab.name.indexOf('.')) : '' + def ext = args.contains("-C ") || args.contains("--csi") ? "csi" : "tbi" + def index = regions ? "" : "touch ${tab}.${ext}" + def extracted = regions ? "echo | gzip > ${prefix}${tab_suffix}" : "" """ - touch ${tab}.${index} + ${index} + ${extracted} """ } diff --git a/modules/nf-core/tabix/tabix/meta.yml b/modules/nf-core/tabix/tabix/meta.yml index f5b6b3c1604d..5c32b22a26bc 100644 --- a/modules/nf-core/tabix/tabix/meta.yml +++ b/modules/nf-core/tabix/tabix/meta.yml @@ -1,16 +1,21 @@ name: tabix_tabix -description: create tabix index from a sorted bgzip tab-delimited genome file +description: | + Create a tabix index from a sorted bgzip TAB-delimited genome file, or + extract regions from a bgzipped VCF file using an optional regions file. keywords: - index - tabix - vcf + - extract + - regions tools: - tabix: description: Generic indexer for TAB-delimited genome position files. homepage: https://www.htslib.org/doc/tabix.html documentation: https://www.htslib.org/doc/tabix.1.html doi: 10.1093/bioinformatics/btq671 - licence: ["MIT"] + licence: + - "MIT" identifier: biotools:tabix input: - - meta: @@ -23,6 +28,21 @@ input: description: TAB-delimited genome position file compressed with bgzip pattern: "*.{bed.gz,gff.gz,sam.gz,vcf.gz}" ontologies: [] + - tai: + type: file + description: | + Tabix index for the input file. Required when extracting regions. + Pass [] when creating an index instead. + pattern: "*.{tbi,csi}" + ontologies: [] + - regions: + type: file + description: | + Optional file of regions to extract (BED or chr:start-end format). + Pass [] to create an index instead of extracting regions. + pattern: "*.{bed,txt,tsv}" + ontologies: + - edam: http://edamontology.org/format_3475 output: index: - - meta: @@ -32,9 +52,22 @@ output: e.g. [ id:'test', single_end:false ] - "*.{tbi,csi}": type: file - description: Tabix index file (either tbi or csi) + description: Tabix index file (tbi or csi). Emitted when no regions file + is provided. pattern: "*.{tbi,csi}" ontologies: [] + extracted: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "${prefix}.*gz": + type: file + description: Bgzipped file of extracted regions, preserving the input file + extension. Emitted when a regions file is provided. + pattern: "*.*gz" + ontologies: [] versions_tabix: - - ${task.process}: type: string @@ -45,7 +78,6 @@ output: - tabix -h 2>&1 | grep -oP 'Version:\s*\K[^\s]+': type: eval description: The expression to obtain the version of the tool - topics: versions: - - ${task.process}: @@ -57,7 +89,6 @@ topics: - tabix -h 2>&1 | grep -oP 'Version:\s*\K[^\s]+': type: eval description: The expression to obtain the version of the tool - authors: - "@joseespinosa" - "@drpatelh" diff --git a/modules/nf-core/tabix/tabix/tests/main.nf.test b/modules/nf-core/tabix/tabix/tests/main.nf.test index 19eefab87faf..280eac25959d 100644 --- a/modules/nf-core/tabix/tabix/tests/main.nf.test +++ b/modules/nf-core/tabix/tabix/tests/main.nf.test @@ -20,7 +20,9 @@ nextflow_process { """ input[0] = [ [ id:'tbi_bed' ], - [ file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/bed/test.bed.gz', checkIfExists: true) ] + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/bed/test.bed.gz', checkIfExists: true), + [], + [] ] """ } @@ -43,7 +45,9 @@ nextflow_process { """ input[0] = [ [ id:'tbi_gff' ], - [ file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.gff3.gz', checkIfExists: true) ] + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.gff3.gz', checkIfExists: true), + [], + [] ] """ } @@ -67,7 +71,9 @@ nextflow_process { """ input[0] = [ [ id:'tbi_vcf' ], - [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz', checkIfExists: true) ] + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz', checkIfExists: true), + [], + [] ] """ } @@ -91,7 +97,9 @@ nextflow_process { """ input[0] = [ [ id:'vcf_csi' ], - [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz', checkIfExists: true) ] + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz', checkIfExists: true), + [], + [] ] """ } @@ -106,6 +114,61 @@ nextflow_process { } + test("sarscov2_vcf_tbi_regions") { + when { + params { + module_args = '-h' + } + process { + """ + input[0] = [ + [ id:'vcf_regions' ], + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz.tbi', checkIfExists: true), + file('https://raw.githubusercontent.com/luisas/test-datasets/refs/heads/add-bedgraph-subset-illumina/data/genomics/sarscov2/illumina/bed/test.bed', checkIfExists: true) + ] + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot( + process.out.index, + path(process.out.extracted[0][1]).vcf.getVariantsMD5(), + process.out.versions_tabix + ).match() } + ) + } + } + + test("sarscov2_vcf_tbi_regions_stub") { + options "-stub" + when { + params { + module_args = '' + } + process { + """ + input[0] = [ + [ id:'vcf_regions_stub' ], + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz.tbi', checkIfExists: true), + file('https://raw.githubusercontent.com/luisas/test-datasets/refs/heads/add-bedgraph-subset-illumina/data/genomics/sarscov2/illumina/bed/test.bed', checkIfExists: true) + ] + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot(sanitizeOutput(process.out)).match() } + ) + } + } + test("sarscov2_vcf_csi_stub") { options "-stub" when { @@ -116,7 +179,9 @@ nextflow_process { """ input[0] = [ [ id:'vcf_csi_stub' ], - [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz', checkIfExists: true) ] + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz', checkIfExists: true), + [], + [] ] """ } diff --git a/modules/nf-core/tabix/tabix/tests/main.nf.test.snap b/modules/nf-core/tabix/tabix/tests/main.nf.test.snap index 91a3a66de0e4..27e977e0f81e 100644 --- a/modules/nf-core/tabix/tabix/tests/main.nf.test.snap +++ b/modules/nf-core/tabix/tabix/tests/main.nf.test.snap @@ -1,4 +1,33 @@ { + "sarscov2_vcf_tbi_regions_stub": { + "content": [ + { + "extracted": [ + [ + { + "id": "vcf_regions_stub" + }, + "vcf_regions_stub.vcf.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ], + "index": [ + + ], + "versions_tabix": [ + [ + "TABIX_TABIX", + "tabix", + "1.21" + ] + ] + } + ], + "timestamp": "2026-04-01T15:11:16.940666", + "meta": { + "nf-test": "0.9.4", + "nextflow": "25.10.4" + } + }, "sarscov2_gff_tbi": { "content": [ { @@ -11,11 +40,17 @@ ] ], "1": [ + + ], + "2": [ [ "TABIX_TABIX", "tabix", "1.21" ] + ], + "extracted": [ + ], "index": [ [ @@ -34,11 +69,31 @@ ] } ], + "timestamp": "2026-04-01T15:10:57.509986", + "meta": { + "nf-test": "0.9.4", + "nextflow": "25.10.4" + } + }, + "sarscov2_vcf_tbi_regions": { + "content": [ + [ + + ], + "c57539b0a39bee90f9dc6f4c60b268ed", + [ + [ + "TABIX_TABIX", + "tabix", + "1.21" + ] + ] + ], + "timestamp": "2026-03-31T14:38:35.804865", "meta": { - "nf-test": "0.9.3", - "nextflow": "25.04.7" - }, - "timestamp": "2025-11-20T13:47:34.055936" + "nf-test": "0.9.4", + "nextflow": "25.10.4" + } }, "sarscov2_bedgz_tbi": { "content": [ @@ -52,11 +107,17 @@ ] ], "1": [ + + ], + "2": [ [ "TABIX_TABIX", "tabix", "1.21" ] + ], + "extracted": [ + ], "index": [ [ @@ -75,11 +136,11 @@ ] } ], + "timestamp": "2026-04-01T15:10:52.971222", "meta": { - "nf-test": "0.9.3", - "nextflow": "25.04.7" - }, - "timestamp": "2025-11-20T13:47:29.90469" + "nf-test": "0.9.4", + "nextflow": "25.10.4" + } }, "sarscov2_vcf_tbi": { "content": [ @@ -93,11 +154,17 @@ ] ], "1": [ + + ], + "2": [ [ "TABIX_TABIX", "tabix", "1.21" ] + ], + "extracted": [ + ], "index": [ [ @@ -116,11 +183,11 @@ ] } ], + "timestamp": "2026-04-01T15:11:01.818669", "meta": { - "nf-test": "0.9.3", - "nextflow": "25.04.7" - }, - "timestamp": "2025-11-20T13:47:38.044307" + "nf-test": "0.9.4", + "nextflow": "25.10.4" + } }, "sarscov2_vcf_csi_stub": { "content": [ @@ -134,11 +201,17 @@ ] ], "1": [ + + ], + "2": [ [ "TABIX_TABIX", "tabix", "1.21" ] + ], + "extracted": [ + ], "index": [ [ @@ -157,11 +230,11 @@ ] } ], + "timestamp": "2026-04-01T15:11:22.403222", "meta": { - "nf-test": "0.9.2", - "nextflow": "25.10.2" - }, - "timestamp": "2025-12-10T14:31:29.90297082" + "nf-test": "0.9.4", + "nextflow": "25.10.4" + } }, "sarscov2_vcf_csi": { "content": [ @@ -175,11 +248,17 @@ ] ], "1": [ + + ], + "2": [ [ "TABIX_TABIX", "tabix", "1.21" ] + ], + "extracted": [ + ], "index": [ [ @@ -198,10 +277,10 @@ ] } ], + "timestamp": "2026-04-01T15:11:05.735059", "meta": { - "nf-test": "0.9.3", - "nextflow": "25.04.7" - }, - "timestamp": "2025-11-20T13:47:42.013054" + "nf-test": "0.9.4", + "nextflow": "25.10.4" + } } } \ No newline at end of file diff --git a/subworkflows/nf-core/vcf_annotate_ensemblvep_snpeff/main.nf b/subworkflows/nf-core/vcf_annotate_ensemblvep_snpeff/main.nf index f265df0fecf8..3ee3fb26c75d 100644 --- a/subworkflows/nf-core/vcf_annotate_ensemblvep_snpeff/main.nf +++ b/subworkflows/nf-core/vcf_annotate_ensemblvep_snpeff/main.nf @@ -166,7 +166,7 @@ workflow VCF_ANNOTATE_ENSEMBLVEP_SNPEFF { return [meta, vcf, []] } - TABIX_TABIX(ch_tabix_input.bgzip) + TABIX_TABIX(ch_tabix_input.bgzip.map { meta, vcf -> [meta, vcf, [], []] }) def ch_vcf_tbi = ch_tabix_input.bgzip .join(TABIX_TABIX.out.index, failOnDuplicate: true, failOnMismatch: true) diff --git a/subworkflows/nf-core/vcf_extract_relate_somalier/main.nf b/subworkflows/nf-core/vcf_extract_relate_somalier/main.nf index 758b7a61a847..4b4f593c8034 100644 --- a/subworkflows/nf-core/vcf_extract_relate_somalier/main.nf +++ b/subworkflows/nf-core/vcf_extract_relate_somalier/main.nf @@ -22,7 +22,7 @@ workflow VCF_EXTRACT_RELATE_SOMALIER { } TABIX_TABIX( - ch_input.no_tbi + ch_input.no_tbi.map { meta, vcf -> [meta, vcf, [], []] } ) ch_somalierextract_input = ch_input.no_tbi diff --git a/subworkflows/nf-core/vcf_gather_bcftools/main.nf b/subworkflows/nf-core/vcf_gather_bcftools/main.nf index 0fa54ae806a4..0d0df166b65e 100644 --- a/subworkflows/nf-core/vcf_gather_bcftools/main.nf +++ b/subworkflows/nf-core/vcf_gather_bcftools/main.nf @@ -54,7 +54,7 @@ workflow VCF_GATHER_BCFTOOLS { ch_tabix_input = ch_vcf_concat } - TABIX_TABIX(ch_tabix_input) + TABIX_TABIX(ch_tabix_input.map { meta, vcf -> [meta, vcf, [], []] }) ch_vcf_index = ch_tabix_input .join(TABIX_TABIX.out.index)