nf-core · luisas · Mar 27, 2026 · Mar 27, 2026 · Mar 27, 2026 · Mar 27, 2026
diff --git a/modules/nf-core/tabix/tabix/main.nf b/modules/nf-core/tabix/tabix/main.nf
@@ -8,28 +8,40 @@ process TABIX_TABIX {
         'community.wave.seqera.io/library/htslib:1.21--ff8e28a189fbecaa' }"
 
     input:
-    tuple val(meta), path(tab)
+    tuple val(meta),  path(tab), path(tai), path(regions)
 
     output:
-    tuple val(meta), path("*.{tbi,csi}"), emit: index
+    tuple val(meta), path("*.{tbi,csi}"),         emit: index,     optional: true
+    tuple val(meta), path("${prefix}.*gz"),       emit: extracted, optional: true
     tuple val("${task.process}"), val('tabix'), eval("tabix -h 2>&1 | grep -oP 'Version:\\s*\\K[^\\s]+'")   , topic: versions   , emit: versions_tabix
 
     when:
     task.ext.when == null || task.ext.when
 
     script:
     def args = task.ext.args ?: ''
+    prefix          = task.ext.prefix ?: "${meta.id}"
+    def tab_suffix  = tab.name.indexOf('.') >= 0 ? tab.name.substring(tab.name.indexOf('.')) : ''
+    def regions_arg = regions ? "-R ${regions}" : ""
+    def output_arg  = regions ? "| bgzip --threads ${task.cpus} > ${prefix}${tab_suffix}" : ""
     """
     tabix \\
+        ${regions_arg} \\
         --threads $task.cpus \\
         $args \\
-        $tab
+        $tab \\
+        ${output_arg}
 
     """
     stub:
     def args = task.ext.args ?: ''
-    def index = args.contains("-C ") || args.contains("--csi") ? "csi" : "tbi"
+    prefix = task.ext.prefix ?: "${meta.id}"
+    def tab_suffix = tab.name.indexOf('.') >= 0 ? tab.name.substring(tab.name.indexOf('.')) : ''
+    def ext = args.contains("-C ") || args.contains("--csi") ? "csi" : "tbi"
+    def index     = regions ? "" : "touch ${tab}.${ext}"
+    def extracted = regions ? "echo | gzip > ${prefix}${tab_suffix}" : ""
     """
-    touch ${tab}.${index}
+    ${index}
+    ${extracted}
     """
 }
diff --git a/modules/nf-core/tabix/tabix/meta.yml b/modules/nf-core/tabix/tabix/meta.yml
@@ -1,16 +1,21 @@
 name: tabix_tabix
-description: create tabix index from a sorted bgzip tab-delimited genome file
+description: |
+  Create a tabix index from a sorted bgzip TAB-delimited genome file, or
+  extract regions from a bgzipped VCF file using an optional regions file.
 keywords:
   - index
   - tabix
   - vcf
+  - extract
+  - regions
 tools:
   - tabix:
       description: Generic indexer for TAB-delimited genome position files.
       homepage: https://www.htslib.org/doc/tabix.html
       documentation: https://www.htslib.org/doc/tabix.1.html
       doi: 10.1093/bioinformatics/btq671
-      licence: ["MIT"]
+      licence:
+        - "MIT"
       identifier: biotools:tabix
 input:
   - - meta:
@@ -23,6 +28,21 @@ input:
         description: TAB-delimited genome position file compressed with bgzip
         pattern: "*.{bed.gz,gff.gz,sam.gz,vcf.gz}"
         ontologies: []
+    - tai:
+        type: file
+        description: |
+          Tabix index for the input file. Required when extracting regions.
+          Pass [] when creating an index instead.
+        pattern: "*.{tbi,csi}"
+        ontologies: []
+    - regions:
+        type: file
+        description: |
+          Optional file of regions to extract (BED or chr:start-end format).
+          Pass [] to create an index instead of extracting regions.
+        pattern: "*.{bed,txt,tsv}"
+        ontologies:
+          - edam: http://edamontology.org/format_3475
 output:
   index:
     - - meta:
@@ -32,9 +52,22 @@ output:
             e.g. [ id:'test', single_end:false ]
       - "*.{tbi,csi}":
           type: file
-          description: Tabix index file (either tbi or csi)
+          description: Tabix index file (tbi or csi). Emitted when no regions file
+            is provided.
           pattern: "*.{tbi,csi}"
           ontologies: []
+  extracted:
+    - - meta:
+          type: map
+          description: |
+            Groovy Map containing sample information
+            e.g. [ id:'test', single_end:false ]
+      - "${prefix}.*gz":
+          type: file
+          description: Bgzipped file of extracted regions, preserving the input file
+            extension. Emitted when a regions file is provided.
+          pattern: "*.*gz"
+          ontologies: []
   versions_tabix:
     - - ${task.process}:
           type: string
@@ -45,7 +78,6 @@ output:
       - tabix -h 2>&1 | grep -oP 'Version:\s*\K[^\s]+':
           type: eval
           description: The expression to obtain the version of the tool
-
 topics:
   versions:
     - - ${task.process}:
@@ -57,7 +89,6 @@ topics:
       - tabix -h 2>&1 | grep -oP 'Version:\s*\K[^\s]+':
           type: eval
           description: The expression to obtain the version of the tool
-
 authors:
   - "@joseespinosa"
   - "@drpatelh"

diff --git a/modules/nf-core/tabix/tabix/tests/main.nf.test b/modules/nf-core/tabix/tabix/tests/main.nf.test
@@ -20,7 +20,9 @@ nextflow_process {
                 """
                 input[0] = [
                                 [ id:'tbi_bed' ],
-                                [ file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/bed/test.bed.gz', checkIfExists: true) ]
+                                file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/bed/test.bed.gz', checkIfExists: true),
+                                [],
+                                []
                             ]
                 """
             }
@@ -43,7 +45,9 @@ nextflow_process {
                 """
                 input[0] = [
                                 [ id:'tbi_gff' ],
-                                [ file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.gff3.gz', checkIfExists: true) ]
+                                file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.gff3.gz', checkIfExists: true),
+                                [],
+                                []
                             ]
                 """
             }
@@ -67,7 +71,9 @@ nextflow_process {
                 """
                 input[0] =  [
                                 [ id:'tbi_vcf' ],
-                                [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz', checkIfExists: true) ]
+                                file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz', checkIfExists: true),
+                                [],
+                                []
                             ]
                 """
             }
@@ -91,7 +97,9 @@ nextflow_process {
                 """
                 input[0] =  [
                                 [ id:'vcf_csi' ],
-                                [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz', checkIfExists: true) ]
+                                file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz', checkIfExists: true),
+                                [],
+                                []
                             ]
                 """
             }
@@ -106,6 +114,61 @@ nextflow_process {
 
     }
 
+    test("sarscov2_vcf_tbi_regions") {
+        when {
+            params {
+                module_args = '-h'
+            }
+            process {
+                """
+                input[0] = [
+                                [ id:'vcf_regions' ],
+                                file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz', checkIfExists: true),
+                                file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz.tbi', checkIfExists: true),
+                                file('https://raw.githubusercontent.com/luisas/test-datasets/refs/heads/add-bedgraph-subset-illumina/data/genomics/sarscov2/illumina/bed/test.bed', checkIfExists: true)
+                            ]
+                """
+            }
+        }
+
+        then {
+            assertAll (
+                { assert process.success },
+                { assert snapshot(
+                    process.out.index,
+                    path(process.out.extracted[0][1]).vcf.getVariantsMD5(),
+                    process.out.versions_tabix
+                ).match() }
+            )
+        }
+    }
+
+    test("sarscov2_vcf_tbi_regions_stub") {
+        options "-stub"
+        when {
+            params {
+                module_args = ''
+            }
+            process {
+                """
+                input[0] = [
+                                [ id:'vcf_regions_stub' ],
+                                file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz', checkIfExists: true),
+                                file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz.tbi', checkIfExists: true),
+                                file('https://raw.githubusercontent.com/luisas/test-datasets/refs/heads/add-bedgraph-subset-illumina/data/genomics/sarscov2/illumina/bed/test.bed', checkIfExists: true)
+                            ]
+                """
+            }
+        }
+
+        then {
+            assertAll (
+                { assert process.success },
+                { assert snapshot(sanitizeOutput(process.out)).match() }
+            )
+        }
+    }
+
     test("sarscov2_vcf_csi_stub") {
         options "-stub"
         when {
@@ -116,7 +179,9 @@ nextflow_process {
                 """
                 input[0] =  [
                                 [ id:'vcf_csi_stub' ],
-                                [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz', checkIfExists: true) ]
+                                file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz', checkIfExists: true),
+                                [],
+                                []
                             ]
                 """
             }