From 283cabba2f3352225a31f445dd9b1eff45dd9b46 Mon Sep 17 00:00:00 2001 From: Luisa Santus Date: Fri, 27 Mar 2026 09:47:03 +0100 Subject: [PATCH 01/21] tabix/tabix: update module to support region-based VCF extraction --- modules/nf-core/tabix/tabix/main.nf | 21 ++++++++++++--------- 1 file changed, 12 insertions(+), 9 deletions(-) diff --git a/modules/nf-core/tabix/tabix/main.nf b/modules/nf-core/tabix/tabix/main.nf index c8bcef64eeef..3593ea16ee94 100644 --- a/modules/nf-core/tabix/tabix/main.nf +++ b/modules/nf-core/tabix/tabix/main.nf @@ -8,28 +8,31 @@ process TABIX_TABIX { 'community.wave.seqera.io/library/htslib:1.21--ff8e28a189fbecaa' }" input: - tuple val(meta), path(tab) + tuple val(meta), path(vcf), path(tbi) + tuple val(meta2), path(regions) output: - tuple val(meta), path("*.{tbi,csi}"), emit: index + tuple val(meta), path("${prefix}.vcf"), emit: vcf tuple val("${task.process}"), val('tabix'), eval("tabix -h 2>&1 | grep -oP 'Version:\\s*\\K[^\\s]+'") , topic: versions , emit: versions_tabix when: task.ext.when == null || task.ext.when script: - def args = task.ext.args ?: '' + def args = task.ext.args ?: '' + prefix = task.ext.prefix ?: "${meta.id}" + def regions_arg = regions ? "-R ${regions}" : "" """ tabix \\ - --threads $task.cpus \\ - $args \\ - $tab + ${regions_arg} \\ + ${args} \\ + ${vcf} \\ + > ${prefix}.vcf """ stub: - def args = task.ext.args ?: '' - def index = args.contains("-C ") || args.contains("--csi") ? "csi" : "tbi" + prefix = task.ext.prefix ?: "${meta.id}" """ - touch ${tab}.${index} + touch ${prefix}.vcf """ } From d0071a86fc28d66fb94f7fea65cdf6452d2f7f86 Mon Sep 17 00:00:00 2001 From: Luisa Santus Date: Fri, 27 Mar 2026 10:04:35 +0100 Subject: [PATCH 02/21] tabix/extract: add new module for region-based VCF extraction - Reverts tabix/tabix to its original indexing behaviour - New tabix/extract module: takes a bgzipped VCF + tbi + optional regions file - Outputs bgzipped VCF (always) and tbi index (optional, controlled by create_index input) - Follows samtools/view pattern for optional outputs --- modules/nf-core/tabix/extract/environment.yml | 8 ++ modules/nf-core/tabix/extract/main.nf | 45 ++++++++ modules/nf-core/tabix/extract/meta.yml | 107 ++++++++++++++++++ .../nf-core/tabix/extract/tests/main.nf.test | 86 ++++++++++++++ .../tabix/extract/tests/nextflow.config | 6 + modules/nf-core/tabix/tabix/main.nf | 21 ++-- 6 files changed, 261 insertions(+), 12 deletions(-) create mode 100644 modules/nf-core/tabix/extract/environment.yml create mode 100644 modules/nf-core/tabix/extract/main.nf create mode 100644 modules/nf-core/tabix/extract/meta.yml create mode 100644 modules/nf-core/tabix/extract/tests/main.nf.test create mode 100644 modules/nf-core/tabix/extract/tests/nextflow.config diff --git a/modules/nf-core/tabix/extract/environment.yml b/modules/nf-core/tabix/extract/environment.yml new file mode 100644 index 000000000000..771b13870781 --- /dev/null +++ b/modules/nf-core/tabix/extract/environment.yml @@ -0,0 +1,8 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json +channels: + - conda-forge + - bioconda + +dependencies: + - bioconda::htslib=1.21 diff --git a/modules/nf-core/tabix/extract/main.nf b/modules/nf-core/tabix/extract/main.nf new file mode 100644 index 000000000000..776f77dfc92a --- /dev/null +++ b/modules/nf-core/tabix/extract/main.nf @@ -0,0 +1,45 @@ +process TABIX_EXTRACT { + tag "$meta.id" + label 'process_single' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/92/92859404d861ae01afb87e2b789aebc71c0ab546397af890c7df74e4ee22c8dd/data' : + 'community.wave.seqera.io/library/htslib:1.21--ff8e28a189fbecaa' }" + + input: + tuple val(meta), path(vcf), path(tbi) + tuple val(meta2), path(regions) + val create_index + + output: + tuple val(meta), path("${prefix}.vcf.gz"), emit: vcf + tuple val(meta), path("${prefix}.vcf.gz.tbi"), emit: tbi, optional: true + tuple val("${task.process}"), val('tabix'), eval("tabix -h 2>&1 | grep -oP 'Version:\\s*\\K[^\\s]+'"), topic: versions, emit: versions_tabix + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def args2 = task.ext.args2 ?: '' + prefix = task.ext.prefix ?: "${meta.id}" + def regions_arg = regions ? "-R ${regions}" : "" + def index_cmd = create_index ? "tabix ${prefix}.vcf.gz" : "" + """ + tabix \\ + ${regions_arg} \\ + ${args} \\ + ${vcf} \\ + | bgzip ${args2} > ${prefix}.vcf.gz + + ${index_cmd} + """ + + stub: + prefix = task.ext.prefix ?: "${meta.id}" + """ + echo "" | bgzip > ${prefix}.vcf.gz + ${ create_index ? "touch ${prefix}.vcf.gz.tbi" : "" } + """ +} diff --git a/modules/nf-core/tabix/extract/meta.yml b/modules/nf-core/tabix/extract/meta.yml new file mode 100644 index 000000000000..f466b831cfe2 --- /dev/null +++ b/modules/nf-core/tabix/extract/meta.yml @@ -0,0 +1,107 @@ +name: tabix_extract +description: Extract regions from a bgzipped VCF file using tabix +keywords: + - tabix + - vcf + - extract + - regions + - index +tools: + - tabix: + description: | + Tabix indexes a TAB-delimited genome position file in.tab.bgz and creates + an index file. It can also extract regions from a bgzipped VCF/BCF/BED/GFF file. + homepage: http://www.htslib.org/ + documentation: http://www.htslib.org/doc/tabix.html + doi: 10.1093/bioinformatics/btq671 + licence: ["MIT"] + identifier: biotools:tabix + +input: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test' ] + - vcf: + type: file + description: bgzipped VCF file + pattern: "*.vcf.gz" + ontologies: + - edam: http://edamontology.org/format_3016 # VCF + - tbi: + type: file + description: Tabix index for the VCF file + pattern: "*.vcf.gz.tbi" + ontologies: + - edam: http://edamontology.org/format_3326 # Index + - - meta2: + type: map + description: | + Groovy Map containing regions information + e.g. [ id:'regions' ] + - regions: + type: file + description: | + Optional file of regions to extract (one per line, BED or chr:start-end format). + Pass an empty list [] to extract all records. + pattern: "*.{bed,txt,tsv}" + ontologies: [] + - create_index: + type: boolean + description: | + Whether to create a tabix index (.tbi) for the output VCF. + Set to true to emit the tbi output channel. + +output: + vcf: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test' ] + - ${prefix}.vcf.gz: + type: file + description: bgzipped extracted VCF file + pattern: "*.vcf.gz" + ontologies: + - edam: http://edamontology.org/format_3016 # VCF + tbi: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test' ] + - ${prefix}.vcf.gz.tbi: + type: file + description: Optional tabix index for the extracted VCF + pattern: "*.vcf.gz.tbi" + ontologies: + - edam: http://edamontology.org/format_3326 # Index + versions_tabix: + - - ${task.process}: + type: string + description: Name of the process + - tabix: + type: string + description: Name of the tool + - tabix -h 2>&1 | grep -oP 'Version:\s*\K[^\s]+': + type: eval + description: The expression to obtain the version of the tool + +topics: + versions: + - - ${task.process}: + type: string + description: Name of the process + - tabix: + type: string + description: Name of the tool + - tabix -h 2>&1 | grep -oP 'Version:\s*\K[^\s]+': + type: eval + description: The expression to obtain the version of the tool + +authors: + - "@luisasantus" +maintainers: + - "@luisasantus" diff --git a/modules/nf-core/tabix/extract/tests/main.nf.test b/modules/nf-core/tabix/extract/tests/main.nf.test new file mode 100644 index 000000000000..3b2f36e03450 --- /dev/null +++ b/modules/nf-core/tabix/extract/tests/main.nf.test @@ -0,0 +1,86 @@ +nextflow_process { + + name "Test Process TABIX_EXTRACT" + script "../main.nf" + process "TABIX_EXTRACT" + + tag "modules" + tag "modules_nfcore" + tag "tabix" + tag "tabix/extract" + + config "./nextflow.config" + + test("sarscov2 - vcf - no regions - no index") { + when { + process { + """ + input[0] = [ + [ id:'test' ], + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz.tbi', checkIfExists: true) + ] + input[1] = [ [], [] ] + input[2] = false + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out.vcf).match("vcf_no_regions_no_index") }, + { assert process.out.tbi == [] } + ) + } + } + + test("sarscov2 - vcf - no regions - with index") { + when { + process { + """ + input[0] = [ + [ id:'test' ], + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz.tbi', checkIfExists: true) + ] + input[1] = [ [], [] ] + input[2] = true + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out.vcf).match("vcf_no_regions_with_index") }, + { assert process.out.tbi.size() == 1 } + ) + } + } + + test("sarscov2 - vcf - stub") { + options "-stub" + when { + process { + """ + input[0] = [ + [ id:'test' ], + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz.tbi', checkIfExists: true) + ] + input[1] = [ [], [] ] + input[2] = true + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } + +} diff --git a/modules/nf-core/tabix/extract/tests/nextflow.config b/modules/nf-core/tabix/extract/tests/nextflow.config new file mode 100644 index 000000000000..09c4f750dce6 --- /dev/null +++ b/modules/nf-core/tabix/extract/tests/nextflow.config @@ -0,0 +1,6 @@ +process { + withName: TABIX_EXTRACT { + ext.args = params.module_args ?: '' + ext.prefix = params.module_prefix ?: null + } +} diff --git a/modules/nf-core/tabix/tabix/main.nf b/modules/nf-core/tabix/tabix/main.nf index 3593ea16ee94..c8bcef64eeef 100644 --- a/modules/nf-core/tabix/tabix/main.nf +++ b/modules/nf-core/tabix/tabix/main.nf @@ -8,31 +8,28 @@ process TABIX_TABIX { 'community.wave.seqera.io/library/htslib:1.21--ff8e28a189fbecaa' }" input: - tuple val(meta), path(vcf), path(tbi) - tuple val(meta2), path(regions) + tuple val(meta), path(tab) output: - tuple val(meta), path("${prefix}.vcf"), emit: vcf + tuple val(meta), path("*.{tbi,csi}"), emit: index tuple val("${task.process}"), val('tabix'), eval("tabix -h 2>&1 | grep -oP 'Version:\\s*\\K[^\\s]+'") , topic: versions , emit: versions_tabix when: task.ext.when == null || task.ext.when script: - def args = task.ext.args ?: '' - prefix = task.ext.prefix ?: "${meta.id}" - def regions_arg = regions ? "-R ${regions}" : "" + def args = task.ext.args ?: '' """ tabix \\ - ${regions_arg} \\ - ${args} \\ - ${vcf} \\ - > ${prefix}.vcf + --threads $task.cpus \\ + $args \\ + $tab """ stub: - prefix = task.ext.prefix ?: "${meta.id}" + def args = task.ext.args ?: '' + def index = args.contains("-C ") || args.contains("--csi") ? "csi" : "tbi" """ - touch ${prefix}.vcf + touch ${tab}.${index} """ } From f6c4dac1d6b508dc211aac761b5e7763f0b76e30 Mon Sep 17 00:00:00 2001 From: Luisa Santus Date: Fri, 27 Mar 2026 10:33:24 +0100 Subject: [PATCH 03/21] tabix/extract: make both vcf and tbi outputs optional MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Always runs extraction + bgzip + tabix index. Both vcf and tbi outputs are optional: true — callers use whichever channels they need. --- modules/nf-core/tabix/extract/main.nf | 8 +++----- modules/nf-core/tabix/extract/meta.yml | 6 ------ .../nf-core/tabix/extract/tests/main.nf.test | 18 ++++++++---------- 3 files changed, 11 insertions(+), 21 deletions(-) diff --git a/modules/nf-core/tabix/extract/main.nf b/modules/nf-core/tabix/extract/main.nf index 776f77dfc92a..6b09559db4d9 100644 --- a/modules/nf-core/tabix/extract/main.nf +++ b/modules/nf-core/tabix/extract/main.nf @@ -10,10 +10,9 @@ process TABIX_EXTRACT { input: tuple val(meta), path(vcf), path(tbi) tuple val(meta2), path(regions) - val create_index output: - tuple val(meta), path("${prefix}.vcf.gz"), emit: vcf + tuple val(meta), path("${prefix}.vcf.gz"), emit: vcf, optional: true tuple val(meta), path("${prefix}.vcf.gz.tbi"), emit: tbi, optional: true tuple val("${task.process}"), val('tabix'), eval("tabix -h 2>&1 | grep -oP 'Version:\\s*\\K[^\\s]+'"), topic: versions, emit: versions_tabix @@ -25,7 +24,6 @@ process TABIX_EXTRACT { def args2 = task.ext.args2 ?: '' prefix = task.ext.prefix ?: "${meta.id}" def regions_arg = regions ? "-R ${regions}" : "" - def index_cmd = create_index ? "tabix ${prefix}.vcf.gz" : "" """ tabix \\ ${regions_arg} \\ @@ -33,13 +31,13 @@ process TABIX_EXTRACT { ${vcf} \\ | bgzip ${args2} > ${prefix}.vcf.gz - ${index_cmd} + tabix ${prefix}.vcf.gz """ stub: prefix = task.ext.prefix ?: "${meta.id}" """ echo "" | bgzip > ${prefix}.vcf.gz - ${ create_index ? "touch ${prefix}.vcf.gz.tbi" : "" } + touch ${prefix}.vcf.gz.tbi """ } diff --git a/modules/nf-core/tabix/extract/meta.yml b/modules/nf-core/tabix/extract/meta.yml index f466b831cfe2..eb0d917ecff5 100644 --- a/modules/nf-core/tabix/extract/meta.yml +++ b/modules/nf-core/tabix/extract/meta.yml @@ -47,12 +47,6 @@ input: Pass an empty list [] to extract all records. pattern: "*.{bed,txt,tsv}" ontologies: [] - - create_index: - type: boolean - description: | - Whether to create a tabix index (.tbi) for the output VCF. - Set to true to emit the tbi output channel. - output: vcf: - - meta: diff --git a/modules/nf-core/tabix/extract/tests/main.nf.test b/modules/nf-core/tabix/extract/tests/main.nf.test index 3b2f36e03450..b2c686f4cc32 100644 --- a/modules/nf-core/tabix/extract/tests/main.nf.test +++ b/modules/nf-core/tabix/extract/tests/main.nf.test @@ -11,7 +11,7 @@ nextflow_process { config "./nextflow.config" - test("sarscov2 - vcf - no regions - no index") { + test("sarscov2 - vcf - no regions") { when { process { """ @@ -21,7 +21,6 @@ nextflow_process { file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz.tbi', checkIfExists: true) ] input[1] = [ [], [] ] - input[2] = false """ } } @@ -29,13 +28,12 @@ nextflow_process { then { assertAll( { assert process.success }, - { assert snapshot(process.out.vcf).match("vcf_no_regions_no_index") }, - { assert process.out.tbi == [] } + { assert snapshot(process.out).match() } ) } } - test("sarscov2 - vcf - no regions - with index") { + test("sarscov2 - vcf - with regions") { when { process { """ @@ -44,8 +42,10 @@ nextflow_process { file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz', checkIfExists: true), file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz.tbi', checkIfExists: true) ] - input[1] = [ [], [] ] - input[2] = true + input[1] = [ + [ id:'regions' ], + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/bed/test.bed', checkIfExists: true) + ] """ } } @@ -53,8 +53,7 @@ nextflow_process { then { assertAll( { assert process.success }, - { assert snapshot(process.out.vcf).match("vcf_no_regions_with_index") }, - { assert process.out.tbi.size() == 1 } + { assert snapshot(process.out).match() } ) } } @@ -70,7 +69,6 @@ nextflow_process { file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz.tbi', checkIfExists: true) ] input[1] = [ [], [] ] - input[2] = true """ } } From 30ab6ea46bd5434b022693f0bd8686e3e0807f2c Mon Sep 17 00:00:00 2001 From: Luisa Santus Date: Fri, 27 Mar 2026 10:39:46 +0100 Subject: [PATCH 04/21] tabix/tabix: add region extraction with optional vcf/tbi outputs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Drop tabix/extract — functionality merged into tabix/tabix - New input: optional regions file (pass [] to use indexing mode) - New input: index path (required for extraction mode) - index output: optional, emitted in indexing mode - vcf output: optional, emitted in extraction mode - tbi output: optional, emitted in extraction mode --- modules/nf-core/tabix/extract/environment.yml | 8 -- modules/nf-core/tabix/extract/main.nf | 43 -------- modules/nf-core/tabix/extract/meta.yml | 101 ------------------ .../nf-core/tabix/extract/tests/main.nf.test | 84 --------------- .../tabix/extract/tests/nextflow.config | 6 -- modules/nf-core/tabix/tabix/main.nf | 54 +++++++--- modules/nf-core/tabix/tabix/meta.yml | 51 ++++++++- 7 files changed, 89 insertions(+), 258 deletions(-) delete mode 100644 modules/nf-core/tabix/extract/environment.yml delete mode 100644 modules/nf-core/tabix/extract/main.nf delete mode 100644 modules/nf-core/tabix/extract/meta.yml delete mode 100644 modules/nf-core/tabix/extract/tests/main.nf.test delete mode 100644 modules/nf-core/tabix/extract/tests/nextflow.config diff --git a/modules/nf-core/tabix/extract/environment.yml b/modules/nf-core/tabix/extract/environment.yml deleted file mode 100644 index 771b13870781..000000000000 --- a/modules/nf-core/tabix/extract/environment.yml +++ /dev/null @@ -1,8 +0,0 @@ ---- -# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json -channels: - - conda-forge - - bioconda - -dependencies: - - bioconda::htslib=1.21 diff --git a/modules/nf-core/tabix/extract/main.nf b/modules/nf-core/tabix/extract/main.nf deleted file mode 100644 index 6b09559db4d9..000000000000 --- a/modules/nf-core/tabix/extract/main.nf +++ /dev/null @@ -1,43 +0,0 @@ -process TABIX_EXTRACT { - tag "$meta.id" - label 'process_single' - - conda "${moduleDir}/environment.yml" - container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/92/92859404d861ae01afb87e2b789aebc71c0ab546397af890c7df74e4ee22c8dd/data' : - 'community.wave.seqera.io/library/htslib:1.21--ff8e28a189fbecaa' }" - - input: - tuple val(meta), path(vcf), path(tbi) - tuple val(meta2), path(regions) - - output: - tuple val(meta), path("${prefix}.vcf.gz"), emit: vcf, optional: true - tuple val(meta), path("${prefix}.vcf.gz.tbi"), emit: tbi, optional: true - tuple val("${task.process}"), val('tabix'), eval("tabix -h 2>&1 | grep -oP 'Version:\\s*\\K[^\\s]+'"), topic: versions, emit: versions_tabix - - when: - task.ext.when == null || task.ext.when - - script: - def args = task.ext.args ?: '' - def args2 = task.ext.args2 ?: '' - prefix = task.ext.prefix ?: "${meta.id}" - def regions_arg = regions ? "-R ${regions}" : "" - """ - tabix \\ - ${regions_arg} \\ - ${args} \\ - ${vcf} \\ - | bgzip ${args2} > ${prefix}.vcf.gz - - tabix ${prefix}.vcf.gz - """ - - stub: - prefix = task.ext.prefix ?: "${meta.id}" - """ - echo "" | bgzip > ${prefix}.vcf.gz - touch ${prefix}.vcf.gz.tbi - """ -} diff --git a/modules/nf-core/tabix/extract/meta.yml b/modules/nf-core/tabix/extract/meta.yml deleted file mode 100644 index eb0d917ecff5..000000000000 --- a/modules/nf-core/tabix/extract/meta.yml +++ /dev/null @@ -1,101 +0,0 @@ -name: tabix_extract -description: Extract regions from a bgzipped VCF file using tabix -keywords: - - tabix - - vcf - - extract - - regions - - index -tools: - - tabix: - description: | - Tabix indexes a TAB-delimited genome position file in.tab.bgz and creates - an index file. It can also extract regions from a bgzipped VCF/BCF/BED/GFF file. - homepage: http://www.htslib.org/ - documentation: http://www.htslib.org/doc/tabix.html - doi: 10.1093/bioinformatics/btq671 - licence: ["MIT"] - identifier: biotools:tabix - -input: - - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test' ] - - vcf: - type: file - description: bgzipped VCF file - pattern: "*.vcf.gz" - ontologies: - - edam: http://edamontology.org/format_3016 # VCF - - tbi: - type: file - description: Tabix index for the VCF file - pattern: "*.vcf.gz.tbi" - ontologies: - - edam: http://edamontology.org/format_3326 # Index - - - meta2: - type: map - description: | - Groovy Map containing regions information - e.g. [ id:'regions' ] - - regions: - type: file - description: | - Optional file of regions to extract (one per line, BED or chr:start-end format). - Pass an empty list [] to extract all records. - pattern: "*.{bed,txt,tsv}" - ontologies: [] -output: - vcf: - - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test' ] - - ${prefix}.vcf.gz: - type: file - description: bgzipped extracted VCF file - pattern: "*.vcf.gz" - ontologies: - - edam: http://edamontology.org/format_3016 # VCF - tbi: - - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test' ] - - ${prefix}.vcf.gz.tbi: - type: file - description: Optional tabix index for the extracted VCF - pattern: "*.vcf.gz.tbi" - ontologies: - - edam: http://edamontology.org/format_3326 # Index - versions_tabix: - - - ${task.process}: - type: string - description: Name of the process - - tabix: - type: string - description: Name of the tool - - tabix -h 2>&1 | grep -oP 'Version:\s*\K[^\s]+': - type: eval - description: The expression to obtain the version of the tool - -topics: - versions: - - - ${task.process}: - type: string - description: Name of the process - - tabix: - type: string - description: Name of the tool - - tabix -h 2>&1 | grep -oP 'Version:\s*\K[^\s]+': - type: eval - description: The expression to obtain the version of the tool - -authors: - - "@luisasantus" -maintainers: - - "@luisasantus" diff --git a/modules/nf-core/tabix/extract/tests/main.nf.test b/modules/nf-core/tabix/extract/tests/main.nf.test deleted file mode 100644 index b2c686f4cc32..000000000000 --- a/modules/nf-core/tabix/extract/tests/main.nf.test +++ /dev/null @@ -1,84 +0,0 @@ -nextflow_process { - - name "Test Process TABIX_EXTRACT" - script "../main.nf" - process "TABIX_EXTRACT" - - tag "modules" - tag "modules_nfcore" - tag "tabix" - tag "tabix/extract" - - config "./nextflow.config" - - test("sarscov2 - vcf - no regions") { - when { - process { - """ - input[0] = [ - [ id:'test' ], - file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz', checkIfExists: true), - file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz.tbi', checkIfExists: true) - ] - input[1] = [ [], [] ] - """ - } - } - - then { - assertAll( - { assert process.success }, - { assert snapshot(process.out).match() } - ) - } - } - - test("sarscov2 - vcf - with regions") { - when { - process { - """ - input[0] = [ - [ id:'test' ], - file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz', checkIfExists: true), - file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz.tbi', checkIfExists: true) - ] - input[1] = [ - [ id:'regions' ], - file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/bed/test.bed', checkIfExists: true) - ] - """ - } - } - - then { - assertAll( - { assert process.success }, - { assert snapshot(process.out).match() } - ) - } - } - - test("sarscov2 - vcf - stub") { - options "-stub" - when { - process { - """ - input[0] = [ - [ id:'test' ], - file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz', checkIfExists: true), - file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz.tbi', checkIfExists: true) - ] - input[1] = [ [], [] ] - """ - } - } - - then { - assertAll( - { assert process.success }, - { assert snapshot(process.out).match() } - ) - } - } - -} diff --git a/modules/nf-core/tabix/extract/tests/nextflow.config b/modules/nf-core/tabix/extract/tests/nextflow.config deleted file mode 100644 index 09c4f750dce6..000000000000 --- a/modules/nf-core/tabix/extract/tests/nextflow.config +++ /dev/null @@ -1,6 +0,0 @@ -process { - withName: TABIX_EXTRACT { - ext.args = params.module_args ?: '' - ext.prefix = params.module_prefix ?: null - } -} diff --git a/modules/nf-core/tabix/tabix/main.nf b/modules/nf-core/tabix/tabix/main.nf index c8bcef64eeef..eafdeebf7749 100644 --- a/modules/nf-core/tabix/tabix/main.nf +++ b/modules/nf-core/tabix/tabix/main.nf @@ -8,28 +8,54 @@ process TABIX_TABIX { 'community.wave.seqera.io/library/htslib:1.21--ff8e28a189fbecaa' }" input: - tuple val(meta), path(tab) + tuple val(meta), path(tab), path(index) + tuple val(meta2), path(regions) output: - tuple val(meta), path("*.{tbi,csi}"), emit: index - tuple val("${task.process}"), val('tabix'), eval("tabix -h 2>&1 | grep -oP 'Version:\\s*\\K[^\\s]+'") , topic: versions , emit: versions_tabix + tuple val(meta), path("*.{tbi,csi}"), emit: index, optional: true + tuple val(meta), path("${prefix}.vcf.gz"), emit: vcf, optional: true + tuple val(meta), path("${prefix}.vcf.gz.tbi"), emit: tbi, optional: true + tuple val("${task.process}"), val('tabix'), eval("tabix -h 2>&1 | grep -oP 'Version:\\s*\\K[^\\s]+'"), topic: versions, emit: versions_tabix when: task.ext.when == null || task.ext.when script: - def args = task.ext.args ?: '' - """ - tabix \\ - --threads $task.cpus \\ - $args \\ - $tab + def args = task.ext.args ?: '' + def args2 = task.ext.args2 ?: '' + prefix = task.ext.prefix ?: "${meta.id}" + def regions_arg = regions ? "-R ${regions}" : "" + if (regions) { + """ + tabix \\ + ${regions_arg} \\ + ${args} \\ + ${tab} \\ + | bgzip ${args2} > ${prefix}.vcf.gz + + tabix ${prefix}.vcf.gz + """ + } else { + """ + tabix \\ + --threads $task.cpus \\ + $args \\ + $tab + """ + } - """ stub: def args = task.ext.args ?: '' - def index = args.contains("-C ") || args.contains("--csi") ? "csi" : "tbi" - """ - touch ${tab}.${index} - """ + prefix = task.ext.prefix ?: "${meta.id}" + def idx = args.contains("-C ") || args.contains("--csi") ? "csi" : "tbi" + if (regions) { + """ + echo "" | bgzip > ${prefix}.vcf.gz + touch ${prefix}.vcf.gz.tbi + """ + } else { + """ + touch ${tab}.${idx} + """ + } } diff --git a/modules/nf-core/tabix/tabix/meta.yml b/modules/nf-core/tabix/tabix/meta.yml index f5b6b3c1604d..ba725cddda4d 100644 --- a/modules/nf-core/tabix/tabix/meta.yml +++ b/modules/nf-core/tabix/tabix/meta.yml @@ -1,9 +1,13 @@ name: tabix_tabix -description: create tabix index from a sorted bgzip tab-delimited genome file +description: | + Create a tabix index from a sorted bgzip TAB-delimited genome file, or + extract regions from a bgzipped VCF file using an optional regions file. keywords: - index - tabix - vcf + - extract + - regions tools: - tabix: description: Generic indexer for TAB-delimited genome position files. @@ -23,6 +27,25 @@ input: description: TAB-delimited genome position file compressed with bgzip pattern: "*.{bed.gz,gff.gz,sam.gz,vcf.gz}" ontologies: [] + - index: + type: file + description: | + Tabix index file for the input (required when using regions). + Pass [] when only creating an index. + pattern: "*.{tbi,csi}" + ontologies: [] + - - meta2: + type: map + description: | + Groovy Map containing regions information + e.g. [ id:'regions' ] + - regions: + type: file + description: | + Optional file of regions to extract (BED or chr:start-end format). + Pass [] to create an index instead of extracting regions. + pattern: "*.{bed,txt,tsv}" + ontologies: [] output: index: - - meta: @@ -32,9 +55,33 @@ output: e.g. [ id:'test', single_end:false ] - "*.{tbi,csi}": type: file - description: Tabix index file (either tbi or csi) + description: Tabix index file (tbi or csi). Emitted when no regions file is provided. pattern: "*.{tbi,csi}" ontologies: [] + vcf: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "${prefix}.vcf.gz": + type: file + description: bgzipped VCF of extracted regions. Emitted when a regions file is provided. + pattern: "*.vcf.gz" + ontologies: + - edam: http://edamontology.org/format_3016 # VCF + tbi: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "${prefix}.vcf.gz.tbi": + type: file + description: Tabix index for the extracted VCF. Emitted when a regions file is provided. + pattern: "*.vcf.gz.tbi" + ontologies: + - edam: http://edamontology.org/format_3326 # Index versions_tabix: - - ${task.process}: type: string From 24f25b524da59844ba82c6fcc01bd850e3a5b778 Mon Sep 17 00:00:00 2001 From: Luisa Santus Date: Fri, 27 Mar 2026 14:10:02 +0100 Subject: [PATCH 05/21] add tabix/tabix --- modules/nf-core/tabix/tabix/main.nf | 56 ++---- .../nf-core/tabix/tabix/tests/main.nf.test | 66 +++++++ .../tabix/tabix/tests/main.nf.test.snap | 184 ++++++++++++++++-- 3 files changed, 250 insertions(+), 56 deletions(-) diff --git a/modules/nf-core/tabix/tabix/main.nf b/modules/nf-core/tabix/tabix/main.nf index eafdeebf7749..7b957fe45bce 100644 --- a/modules/nf-core/tabix/tabix/main.nf +++ b/modules/nf-core/tabix/tabix/main.nf @@ -8,54 +8,38 @@ process TABIX_TABIX { 'community.wave.seqera.io/library/htslib:1.21--ff8e28a189fbecaa' }" input: - tuple val(meta), path(tab), path(index) + tuple val(meta), path(tab) tuple val(meta2), path(regions) output: - tuple val(meta), path("*.{tbi,csi}"), emit: index, optional: true - tuple val(meta), path("${prefix}.vcf.gz"), emit: vcf, optional: true - tuple val(meta), path("${prefix}.vcf.gz.tbi"), emit: tbi, optional: true - tuple val("${task.process}"), val('tabix'), eval("tabix -h 2>&1 | grep -oP 'Version:\\s*\\K[^\\s]+'"), topic: versions, emit: versions_tabix + tuple val(meta), path("*.{tbi,csi}"), emit: index, optional: true + tuple val(meta), path("${prefix}.vcf"), emit: vcf, optional: true + tuple val("${task.process}"), val('tabix'), eval("tabix -h 2>&1 | grep -oP 'Version:\\s*\\K[^\\s]+'") , topic: versions , emit: versions_tabix when: task.ext.when == null || task.ext.when script: - def args = task.ext.args ?: '' - def args2 = task.ext.args2 ?: '' + def args = task.ext.args ?: '' prefix = task.ext.prefix ?: "${meta.id}" def regions_arg = regions ? "-R ${regions}" : "" - if (regions) { - """ - tabix \\ - ${regions_arg} \\ - ${args} \\ - ${tab} \\ - | bgzip ${args2} > ${prefix}.vcf.gz + def output_arg = regions ? "> ${prefix}.vcf" : "" - tabix ${prefix}.vcf.gz - """ - } else { - """ - tabix \\ - --threads $task.cpus \\ - $args \\ - $tab - """ - } + """ + tabix \\ + ${regions_arg} \\ + --threads $task.cpus \\ + $args \\ + $tab \\ + ${output_arg} + """ stub: def args = task.ext.args ?: '' - prefix = task.ext.prefix ?: "${meta.id}" - def idx = args.contains("-C ") || args.contains("--csi") ? "csi" : "tbi" - if (regions) { - """ - echo "" | bgzip > ${prefix}.vcf.gz - touch ${prefix}.vcf.gz.tbi - """ - } else { - """ - touch ${tab}.${idx} - """ - } + prefix = task.ext.prefix ?: "${meta.id}" + def index = args.contains("-C ") || args.contains("--csi") ? "csi" : "tbi" + """ + touch ${tab}.${index} + touch ${prefix}.vcf + """ } diff --git a/modules/nf-core/tabix/tabix/tests/main.nf.test b/modules/nf-core/tabix/tabix/tests/main.nf.test index 19eefab87faf..f2a7bb3a0116 100644 --- a/modules/nf-core/tabix/tabix/tests/main.nf.test +++ b/modules/nf-core/tabix/tabix/tests/main.nf.test @@ -22,6 +22,7 @@ nextflow_process { [ id:'tbi_bed' ], [ file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/bed/test.bed.gz', checkIfExists: true) ] ] + input[1] = [[], []] """ } } @@ -45,6 +46,7 @@ nextflow_process { [ id:'tbi_gff' ], [ file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.gff3.gz', checkIfExists: true) ] ] + input[1] = [[], []] """ } } @@ -69,6 +71,7 @@ nextflow_process { [ id:'tbi_vcf' ], [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz', checkIfExists: true) ] ] + input[1] = [[], []] """ } } @@ -93,6 +96,7 @@ nextflow_process { [ id:'vcf_csi' ], [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz', checkIfExists: true) ] ] + input[1] = [[], []] """ } } @@ -106,6 +110,67 @@ nextflow_process { } + test("sarscov2_vcf_tbi_regions") { + when { + params { + module_args = '' + } + process { + """ + input[0] = [ + [ id:'vcf_regions' ], + [ + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz.tbi', checkIfExists: true) + ] + ] + input[1] = [ + [ id:'vcf_regions' ], + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/bed/test.bed.gz', checkIfExists: true) + ] + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } + + test("sarscov2_vcf_tbi_regions_stub") { + options "-stub" + when { + params { + module_args = '' + } + process { + """ + input[0] = [ + [ id:'vcf_regions_stub' ], + [ + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz.tbi', checkIfExists: true) + ] + ] + input[1] = [ + [ id:'vcf_regions_stub' ], + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/bed/test.bed.gz', checkIfExists: true) + ] + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } + test("sarscov2_vcf_csi_stub") { options "-stub" when { @@ -118,6 +183,7 @@ nextflow_process { [ id:'vcf_csi_stub' ], [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz', checkIfExists: true) ] ] + input[1] = [[], []] """ } } diff --git a/modules/nf-core/tabix/tabix/tests/main.nf.test.snap b/modules/nf-core/tabix/tabix/tests/main.nf.test.snap index 91a3a66de0e4..6b9a5ef83f9f 100644 --- a/modules/nf-core/tabix/tabix/tests/main.nf.test.snap +++ b/modules/nf-core/tabix/tabix/tests/main.nf.test.snap @@ -1,4 +1,61 @@ { + "sarscov2_vcf_tbi_regions_stub": { + "content": [ + { + "0": [ + [ + { + "id": "vcf_regions_stub" + }, + "test.vcf.gz.tbi.tbi:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "1": [ + [ + { + "id": "vcf_regions_stub" + }, + "vcf_regions_stub.vcf:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "2": [ + [ + "TABIX_TABIX", + "tabix", + "1.21" + ] + ], + "index": [ + [ + { + "id": "vcf_regions_stub" + }, + "test.vcf.gz.tbi.tbi:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "vcf": [ + [ + { + "id": "vcf_regions_stub" + }, + "vcf_regions_stub.vcf:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "versions_tabix": [ + [ + "TABIX_TABIX", + "tabix", + "1.21" + ] + ] + } + ], + "timestamp": "2026-03-27T13:58:12.45707", + "meta": { + "nf-test": "0.9.4", + "nextflow": "25.10.4" + } + }, "sarscov2_gff_tbi": { "content": [ { @@ -11,6 +68,9 @@ ] ], "1": [ + + ], + "2": [ [ "TABIX_TABIX", "tabix", @@ -25,6 +85,56 @@ "genome.gff3.gz.tbi:md5,f79a67d95a98076e04fbe0455d825926" ] ], + "vcf": [ + + ], + "versions_tabix": [ + [ + "TABIX_TABIX", + "tabix", + "1.21" + ] + ] + } + ], + "timestamp": "2026-03-27T14:08:24.679407", + "meta": { + "nf-test": "0.9.4", + "nextflow": "25.10.4" + } + }, + "sarscov2_vcf_tbi_regions": { + "content": [ + { + "0": [ + + ], + "1": [ + [ + { + "id": "vcf_regions" + }, + "vcf_regions.vcf:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "2": [ + [ + "TABIX_TABIX", + "tabix", + "1.21" + ] + ], + "index": [ + + ], + "vcf": [ + [ + { + "id": "vcf_regions" + }, + "vcf_regions.vcf:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], "versions_tabix": [ [ "TABIX_TABIX", @@ -34,11 +144,11 @@ ] } ], + "timestamp": "2026-03-27T13:58:08.630661", "meta": { - "nf-test": "0.9.3", - "nextflow": "25.04.7" - }, - "timestamp": "2025-11-20T13:47:34.055936" + "nf-test": "0.9.4", + "nextflow": "25.10.4" + } }, "sarscov2_bedgz_tbi": { "content": [ @@ -52,6 +162,9 @@ ] ], "1": [ + + ], + "2": [ [ "TABIX_TABIX", "tabix", @@ -65,6 +178,9 @@ }, "test.bed.gz.tbi:md5,9a761d51cc81835fd1199201fdbcdd5d" ] + ], + "vcf": [ + ], "versions_tabix": [ [ @@ -75,11 +191,11 @@ ] } ], + "timestamp": "2026-03-27T14:08:20.827824", "meta": { - "nf-test": "0.9.3", - "nextflow": "25.04.7" - }, - "timestamp": "2025-11-20T13:47:29.90469" + "nf-test": "0.9.4", + "nextflow": "25.10.4" + } }, "sarscov2_vcf_tbi": { "content": [ @@ -93,6 +209,9 @@ ] ], "1": [ + + ], + "2": [ [ "TABIX_TABIX", "tabix", @@ -106,6 +225,9 @@ }, "test.vcf.gz.tbi:md5,d22e5b84e4fcd18792179f72e6da702e" ] + ], + "vcf": [ + ], "versions_tabix": [ [ @@ -116,11 +238,11 @@ ] } ], + "timestamp": "2026-03-27T14:08:28.488344", "meta": { - "nf-test": "0.9.3", - "nextflow": "25.04.7" - }, - "timestamp": "2025-11-20T13:47:38.044307" + "nf-test": "0.9.4", + "nextflow": "25.10.4" + } }, "sarscov2_vcf_csi_stub": { "content": [ @@ -134,6 +256,14 @@ ] ], "1": [ + [ + { + "id": "vcf_csi_stub" + }, + "vcf_csi_stub.vcf:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "2": [ [ "TABIX_TABIX", "tabix", @@ -148,6 +278,14 @@ "test.vcf.gz.csi:md5,d41d8cd98f00b204e9800998ecf8427e" ] ], + "vcf": [ + [ + { + "id": "vcf_csi_stub" + }, + "vcf_csi_stub.vcf:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], "versions_tabix": [ [ "TABIX_TABIX", @@ -157,11 +295,11 @@ ] } ], + "timestamp": "2026-03-27T14:08:44.196453", "meta": { - "nf-test": "0.9.2", - "nextflow": "25.10.2" - }, - "timestamp": "2025-12-10T14:31:29.90297082" + "nf-test": "0.9.4", + "nextflow": "25.10.4" + } }, "sarscov2_vcf_csi": { "content": [ @@ -175,6 +313,9 @@ ] ], "1": [ + + ], + "2": [ [ "TABIX_TABIX", "tabix", @@ -188,6 +329,9 @@ }, "test.vcf.gz.csi:md5,04b41c1efd9ab3c6b1e008a286e27d2b" ] + ], + "vcf": [ + ], "versions_tabix": [ [ @@ -198,10 +342,10 @@ ] } ], + "timestamp": "2026-03-27T14:08:32.308038", "meta": { - "nf-test": "0.9.3", - "nextflow": "25.04.7" - }, - "timestamp": "2025-11-20T13:47:42.013054" + "nf-test": "0.9.4", + "nextflow": "25.10.4" + } } } \ No newline at end of file From 39c5bd6a95d99672b8ddba41f2a28311b52d7634 Mon Sep 17 00:00:00 2001 From: Luisa Santus Date: Fri, 27 Mar 2026 14:25:26 +0100 Subject: [PATCH 06/21] update tests --- modules/nf-core/tabix/tabix/meta.yml | 27 +++---------------- .../nf-core/tabix/tabix/tests/main.nf.test | 4 +-- .../tabix/tabix/tests/main.nf.test.snap | 14 +++++----- 3 files changed, 13 insertions(+), 32 deletions(-) diff --git a/modules/nf-core/tabix/tabix/meta.yml b/modules/nf-core/tabix/tabix/meta.yml index ba725cddda4d..c37098b0a421 100644 --- a/modules/nf-core/tabix/tabix/meta.yml +++ b/modules/nf-core/tabix/tabix/meta.yml @@ -24,16 +24,9 @@ input: e.g. [ id:'test', single_end:false ] - tab: type: file - description: TAB-delimited genome position file compressed with bgzip + description: TAB-delimited genome position file compressed with bgzip, with its index staged alongside when using regions pattern: "*.{bed.gz,gff.gz,sam.gz,vcf.gz}" ontologies: [] - - index: - type: file - description: | - Tabix index file for the input (required when using regions). - Pass [] when only creating an index. - pattern: "*.{tbi,csi}" - ontologies: [] - - meta2: type: map description: | @@ -64,24 +57,12 @@ output: description: | Groovy Map containing sample information e.g. [ id:'test', single_end:false ] - - "${prefix}.vcf.gz": + - "${prefix}.vcf": type: file - description: bgzipped VCF of extracted regions. Emitted when a regions file is provided. - pattern: "*.vcf.gz" + description: Plain VCF of extracted regions. Emitted when a regions file is provided. + pattern: "*.vcf" ontologies: - edam: http://edamontology.org/format_3016 # VCF - tbi: - - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test', single_end:false ] - - "${prefix}.vcf.gz.tbi": - type: file - description: Tabix index for the extracted VCF. Emitted when a regions file is provided. - pattern: "*.vcf.gz.tbi" - ontologies: - - edam: http://edamontology.org/format_3326 # Index versions_tabix: - - ${task.process}: type: string diff --git a/modules/nf-core/tabix/tabix/tests/main.nf.test b/modules/nf-core/tabix/tabix/tests/main.nf.test index f2a7bb3a0116..77f01796203e 100644 --- a/modules/nf-core/tabix/tabix/tests/main.nf.test +++ b/modules/nf-core/tabix/tabix/tests/main.nf.test @@ -126,7 +126,7 @@ nextflow_process { ] input[1] = [ [ id:'vcf_regions' ], - file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/bed/test.bed.gz', checkIfExists: true) + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/bed/test.bed', checkIfExists: true) ] """ } @@ -157,7 +157,7 @@ nextflow_process { ] input[1] = [ [ id:'vcf_regions_stub' ], - file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/bed/test.bed.gz', checkIfExists: true) + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/bed/test.bed', checkIfExists: true) ] """ } diff --git a/modules/nf-core/tabix/tabix/tests/main.nf.test.snap b/modules/nf-core/tabix/tabix/tests/main.nf.test.snap index 6b9a5ef83f9f..3556d7f7f755 100644 --- a/modules/nf-core/tabix/tabix/tests/main.nf.test.snap +++ b/modules/nf-core/tabix/tabix/tests/main.nf.test.snap @@ -50,7 +50,7 @@ ] } ], - "timestamp": "2026-03-27T13:58:12.45707", + "timestamp": "2026-03-27T14:24:35.528676", "meta": { "nf-test": "0.9.4", "nextflow": "25.10.4" @@ -97,7 +97,7 @@ ] } ], - "timestamp": "2026-03-27T14:08:24.679407", + "timestamp": "2026-03-27T14:24:18.982696", "meta": { "nf-test": "0.9.4", "nextflow": "25.10.4" @@ -144,7 +144,7 @@ ] } ], - "timestamp": "2026-03-27T13:58:08.630661", + "timestamp": "2026-03-27T14:24:31.700556", "meta": { "nf-test": "0.9.4", "nextflow": "25.10.4" @@ -191,7 +191,7 @@ ] } ], - "timestamp": "2026-03-27T14:08:20.827824", + "timestamp": "2026-03-27T14:24:15.186432", "meta": { "nf-test": "0.9.4", "nextflow": "25.10.4" @@ -238,7 +238,7 @@ ] } ], - "timestamp": "2026-03-27T14:08:28.488344", + "timestamp": "2026-03-27T14:24:22.674157", "meta": { "nf-test": "0.9.4", "nextflow": "25.10.4" @@ -295,7 +295,7 @@ ] } ], - "timestamp": "2026-03-27T14:08:44.196453", + "timestamp": "2026-03-27T14:24:39.331531", "meta": { "nf-test": "0.9.4", "nextflow": "25.10.4" @@ -342,7 +342,7 @@ ] } ], - "timestamp": "2026-03-27T14:08:32.308038", + "timestamp": "2026-03-27T14:24:27.355489", "meta": { "nf-test": "0.9.4", "nextflow": "25.10.4" From b54b6029fa3701d3fe2fc78a94ad215df2f880ad Mon Sep 17 00:00:00 2001 From: Luisa Santus Date: Fri, 27 Mar 2026 14:45:47 +0100 Subject: [PATCH 07/21] fix test --- modules/nf-core/tabix/tabix/main.nf | 8 +++-- .../tabix/tabix/tests/main.nf.test.snap | 32 ++++--------------- 2 files changed, 11 insertions(+), 29 deletions(-) diff --git a/modules/nf-core/tabix/tabix/main.nf b/modules/nf-core/tabix/tabix/main.nf index 7b957fe45bce..283633007d35 100644 --- a/modules/nf-core/tabix/tabix/main.nf +++ b/modules/nf-core/tabix/tabix/main.nf @@ -37,9 +37,11 @@ process TABIX_TABIX { stub: def args = task.ext.args ?: '' prefix = task.ext.prefix ?: "${meta.id}" - def index = args.contains("-C ") || args.contains("--csi") ? "csi" : "tbi" + def ext = args.contains("-C ") || args.contains("--csi") ? "csi" : "tbi" + def index = regions ? "" : "touch ${tab}.${ext}" + def vcf = regions ? "touch ${prefix}.vcf" : "" """ - touch ${tab}.${index} - touch ${prefix}.vcf + ${index} + ${vcf} """ } diff --git a/modules/nf-core/tabix/tabix/tests/main.nf.test.snap b/modules/nf-core/tabix/tabix/tests/main.nf.test.snap index 3556d7f7f755..47aaeb46465e 100644 --- a/modules/nf-core/tabix/tabix/tests/main.nf.test.snap +++ b/modules/nf-core/tabix/tabix/tests/main.nf.test.snap @@ -3,12 +3,7 @@ "content": [ { "0": [ - [ - { - "id": "vcf_regions_stub" - }, - "test.vcf.gz.tbi.tbi:md5,d41d8cd98f00b204e9800998ecf8427e" - ] + ], "1": [ [ @@ -26,12 +21,7 @@ ] ], "index": [ - [ - { - "id": "vcf_regions_stub" - }, - "test.vcf.gz.tbi.tbi:md5,d41d8cd98f00b204e9800998ecf8427e" - ] + ], "vcf": [ [ @@ -50,7 +40,7 @@ ] } ], - "timestamp": "2026-03-27T14:24:35.528676", + "timestamp": "2026-03-27T14:36:37.322495", "meta": { "nf-test": "0.9.4", "nextflow": "25.10.4" @@ -256,12 +246,7 @@ ] ], "1": [ - [ - { - "id": "vcf_csi_stub" - }, - "vcf_csi_stub.vcf:md5,d41d8cd98f00b204e9800998ecf8427e" - ] + ], "2": [ [ @@ -279,12 +264,7 @@ ] ], "vcf": [ - [ - { - "id": "vcf_csi_stub" - }, - "vcf_csi_stub.vcf:md5,d41d8cd98f00b204e9800998ecf8427e" - ] + ], "versions_tabix": [ [ @@ -295,7 +275,7 @@ ] } ], - "timestamp": "2026-03-27T14:24:39.331531", + "timestamp": "2026-03-27T14:36:41.192364", "meta": { "nf-test": "0.9.4", "nextflow": "25.10.4" From d5d6fa1559e523e7cd2f0cd4069d1fa5fdfcb9de Mon Sep 17 00:00:00 2001 From: Luisa Santus Date: Fri, 27 Mar 2026 14:53:13 +0100 Subject: [PATCH 08/21] fix downstream tests --- modules/nf-core/tabix/tabix/tests/main.nf.test | 2 +- subworkflows/nf-core/vcf_annotate_ensemblvep_snpeff/main.nf | 2 +- subworkflows/nf-core/vcf_extract_relate_somalier/main.nf | 3 ++- subworkflows/nf-core/vcf_gather_bcftools/main.nf | 2 +- 4 files changed, 5 insertions(+), 4 deletions(-) diff --git a/modules/nf-core/tabix/tabix/tests/main.nf.test b/modules/nf-core/tabix/tabix/tests/main.nf.test index 77f01796203e..6ebf62be52ec 100644 --- a/modules/nf-core/tabix/tabix/tests/main.nf.test +++ b/modules/nf-core/tabix/tabix/tests/main.nf.test @@ -135,7 +135,7 @@ nextflow_process { then { assertAll ( { assert process.success }, - { assert snapshot(process.out).match() } + { assert snapshot(sanitizeOutput(process.out, unstableKeys: ["vcf"])).match() } ) } } diff --git a/subworkflows/nf-core/vcf_annotate_ensemblvep_snpeff/main.nf b/subworkflows/nf-core/vcf_annotate_ensemblvep_snpeff/main.nf index f265df0fecf8..8c8eb40e909a 100644 --- a/subworkflows/nf-core/vcf_annotate_ensemblvep_snpeff/main.nf +++ b/subworkflows/nf-core/vcf_annotate_ensemblvep_snpeff/main.nf @@ -166,7 +166,7 @@ workflow VCF_ANNOTATE_ENSEMBLVEP_SNPEFF { return [meta, vcf, []] } - TABIX_TABIX(ch_tabix_input.bgzip) + TABIX_TABIX(ch_tabix_input.bgzip, [[],[]]) def ch_vcf_tbi = ch_tabix_input.bgzip .join(TABIX_TABIX.out.index, failOnDuplicate: true, failOnMismatch: true) diff --git a/subworkflows/nf-core/vcf_extract_relate_somalier/main.nf b/subworkflows/nf-core/vcf_extract_relate_somalier/main.nf index 758b7a61a847..a196c7a20aef 100644 --- a/subworkflows/nf-core/vcf_extract_relate_somalier/main.nf +++ b/subworkflows/nf-core/vcf_extract_relate_somalier/main.nf @@ -22,7 +22,8 @@ workflow VCF_EXTRACT_RELATE_SOMALIER { } TABIX_TABIX( - ch_input.no_tbi + ch_input.no_tbi, + [[],[]] ) ch_somalierextract_input = ch_input.no_tbi diff --git a/subworkflows/nf-core/vcf_gather_bcftools/main.nf b/subworkflows/nf-core/vcf_gather_bcftools/main.nf index 0fa54ae806a4..9f871293a250 100644 --- a/subworkflows/nf-core/vcf_gather_bcftools/main.nf +++ b/subworkflows/nf-core/vcf_gather_bcftools/main.nf @@ -54,7 +54,7 @@ workflow VCF_GATHER_BCFTOOLS { ch_tabix_input = ch_vcf_concat } - TABIX_TABIX(ch_tabix_input) + TABIX_TABIX(ch_tabix_input, [[],[]]) ch_vcf_index = ch_tabix_input .join(TABIX_TABIX.out.index) From 0be1e039765609118ea9bf2ea4e12576a814af65 Mon Sep 17 00:00:00 2001 From: Luisa Santus Date: Fri, 27 Mar 2026 15:00:26 +0100 Subject: [PATCH 09/21] update snapshot --- .../tabix/tabix/tests/main.nf.test.snap | 26 +++---------------- 1 file changed, 4 insertions(+), 22 deletions(-) diff --git a/modules/nf-core/tabix/tabix/tests/main.nf.test.snap b/modules/nf-core/tabix/tabix/tests/main.nf.test.snap index 47aaeb46465e..3d20eb4f28f8 100644 --- a/modules/nf-core/tabix/tabix/tests/main.nf.test.snap +++ b/modules/nf-core/tabix/tabix/tests/main.nf.test.snap @@ -87,7 +87,7 @@ ] } ], - "timestamp": "2026-03-27T14:24:18.982696", + "timestamp": "2026-03-27T14:59:10.252663", "meta": { "nf-test": "0.9.4", "nextflow": "25.10.4" @@ -96,24 +96,6 @@ "sarscov2_vcf_tbi_regions": { "content": [ { - "0": [ - - ], - "1": [ - [ - { - "id": "vcf_regions" - }, - "vcf_regions.vcf:md5,d41d8cd98f00b204e9800998ecf8427e" - ] - ], - "2": [ - [ - "TABIX_TABIX", - "tabix", - "1.21" - ] - ], "index": [ ], @@ -122,7 +104,7 @@ { "id": "vcf_regions" }, - "vcf_regions.vcf:md5,d41d8cd98f00b204e9800998ecf8427e" + "vcf_regions.vcf" ] ], "versions_tabix": [ @@ -134,7 +116,7 @@ ] } ], - "timestamp": "2026-03-27T14:24:31.700556", + "timestamp": "2026-03-27T14:59:22.524536", "meta": { "nf-test": "0.9.4", "nextflow": "25.10.4" @@ -181,7 +163,7 @@ ] } ], - "timestamp": "2026-03-27T14:24:15.186432", + "timestamp": "2026-03-27T14:59:06.309394", "meta": { "nf-test": "0.9.4", "nextflow": "25.10.4" From 44dd0197133216edacb26e2c766eb7a8101f00d9 Mon Sep 17 00:00:00 2001 From: Luisa Santus Date: Fri, 27 Mar 2026 17:21:24 +0100 Subject: [PATCH 10/21] add tai input for the regions --- modules/nf-core/tabix/tabix/main.nf | 2 +- modules/nf-core/tabix/tabix/meta.yml | 9 ++++++- .../nf-core/tabix/tabix/tests/main.nf.test | 27 ++++++++++--------- .../vcf_annotate_ensemblvep_snpeff/main.nf | 2 +- .../vcf_extract_relate_somalier/main.nf | 2 +- .../nf-core/vcf_gather_bcftools/main.nf | 2 +- 6 files changed, 26 insertions(+), 18 deletions(-) diff --git a/modules/nf-core/tabix/tabix/main.nf b/modules/nf-core/tabix/tabix/main.nf index 283633007d35..0fd3e43dd5ca 100644 --- a/modules/nf-core/tabix/tabix/main.nf +++ b/modules/nf-core/tabix/tabix/main.nf @@ -8,7 +8,7 @@ process TABIX_TABIX { 'community.wave.seqera.io/library/htslib:1.21--ff8e28a189fbecaa' }" input: - tuple val(meta), path(tab) + tuple val(meta), path(tab), path(tai) tuple val(meta2), path(regions) output: diff --git a/modules/nf-core/tabix/tabix/meta.yml b/modules/nf-core/tabix/tabix/meta.yml index c37098b0a421..f53dcd1cabc2 100644 --- a/modules/nf-core/tabix/tabix/meta.yml +++ b/modules/nf-core/tabix/tabix/meta.yml @@ -24,9 +24,16 @@ input: e.g. [ id:'test', single_end:false ] - tab: type: file - description: TAB-delimited genome position file compressed with bgzip, with its index staged alongside when using regions + description: TAB-delimited genome position file compressed with bgzip pattern: "*.{bed.gz,gff.gz,sam.gz,vcf.gz}" ontologies: [] + - tai: + type: file + description: | + Tabix index for the input file. Required when extracting regions. + Pass [] when creating an index instead. + pattern: "*.{tbi,csi}" + ontologies: [] - - meta2: type: map description: | diff --git a/modules/nf-core/tabix/tabix/tests/main.nf.test b/modules/nf-core/tabix/tabix/tests/main.nf.test index 6ebf62be52ec..eb5f9e23b478 100644 --- a/modules/nf-core/tabix/tabix/tests/main.nf.test +++ b/modules/nf-core/tabix/tabix/tests/main.nf.test @@ -20,7 +20,8 @@ nextflow_process { """ input[0] = [ [ id:'tbi_bed' ], - [ file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/bed/test.bed.gz', checkIfExists: true) ] + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/bed/test.bed.gz', checkIfExists: true), + [] ] input[1] = [[], []] """ @@ -44,7 +45,8 @@ nextflow_process { """ input[0] = [ [ id:'tbi_gff' ], - [ file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.gff3.gz', checkIfExists: true) ] + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.gff3.gz', checkIfExists: true), + [] ] input[1] = [[], []] """ @@ -69,7 +71,8 @@ nextflow_process { """ input[0] = [ [ id:'tbi_vcf' ], - [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz', checkIfExists: true) ] + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz', checkIfExists: true), + [] ] input[1] = [[], []] """ @@ -94,7 +97,8 @@ nextflow_process { """ input[0] = [ [ id:'vcf_csi' ], - [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz', checkIfExists: true) ] + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz', checkIfExists: true), + [] ] input[1] = [[], []] """ @@ -119,10 +123,8 @@ nextflow_process { """ input[0] = [ [ id:'vcf_regions' ], - [ - file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz', checkIfExists: true), - file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz.tbi', checkIfExists: true) - ] + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz.tbi', checkIfExists: true) ] input[1] = [ [ id:'vcf_regions' ], @@ -150,10 +152,8 @@ nextflow_process { """ input[0] = [ [ id:'vcf_regions_stub' ], - [ - file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz', checkIfExists: true), - file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz.tbi', checkIfExists: true) - ] + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz.tbi', checkIfExists: true) ] input[1] = [ [ id:'vcf_regions_stub' ], @@ -181,7 +181,8 @@ nextflow_process { """ input[0] = [ [ id:'vcf_csi_stub' ], - [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz', checkIfExists: true) ] + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz', checkIfExists: true), + [] ] input[1] = [[], []] """ diff --git a/subworkflows/nf-core/vcf_annotate_ensemblvep_snpeff/main.nf b/subworkflows/nf-core/vcf_annotate_ensemblvep_snpeff/main.nf index 8c8eb40e909a..863326095db9 100644 --- a/subworkflows/nf-core/vcf_annotate_ensemblvep_snpeff/main.nf +++ b/subworkflows/nf-core/vcf_annotate_ensemblvep_snpeff/main.nf @@ -166,7 +166,7 @@ workflow VCF_ANNOTATE_ENSEMBLVEP_SNPEFF { return [meta, vcf, []] } - TABIX_TABIX(ch_tabix_input.bgzip, [[],[]]) + TABIX_TABIX(ch_tabix_input.bgzip.map { meta, vcf -> [meta, vcf, []] }, [[],[]]) def ch_vcf_tbi = ch_tabix_input.bgzip .join(TABIX_TABIX.out.index, failOnDuplicate: true, failOnMismatch: true) diff --git a/subworkflows/nf-core/vcf_extract_relate_somalier/main.nf b/subworkflows/nf-core/vcf_extract_relate_somalier/main.nf index a196c7a20aef..1576503239c7 100644 --- a/subworkflows/nf-core/vcf_extract_relate_somalier/main.nf +++ b/subworkflows/nf-core/vcf_extract_relate_somalier/main.nf @@ -22,7 +22,7 @@ workflow VCF_EXTRACT_RELATE_SOMALIER { } TABIX_TABIX( - ch_input.no_tbi, + ch_input.no_tbi.map { meta, vcf -> [meta, vcf, []] }, [[],[]] ) diff --git a/subworkflows/nf-core/vcf_gather_bcftools/main.nf b/subworkflows/nf-core/vcf_gather_bcftools/main.nf index 9f871293a250..7afa43492079 100644 --- a/subworkflows/nf-core/vcf_gather_bcftools/main.nf +++ b/subworkflows/nf-core/vcf_gather_bcftools/main.nf @@ -54,7 +54,7 @@ workflow VCF_GATHER_BCFTOOLS { ch_tabix_input = ch_vcf_concat } - TABIX_TABIX(ch_tabix_input, [[],[]]) + TABIX_TABIX(ch_tabix_input.map { meta, vcf -> [meta, vcf, []] }, [[],[]]) ch_vcf_index = ch_tabix_input .join(TABIX_TABIX.out.index) From 067bffc09bd3dd61aef664de3a907a57c691283e Mon Sep 17 00:00:00 2001 From: Luisa Santus Date: Fri, 27 Mar 2026 18:05:54 +0100 Subject: [PATCH 11/21] emit bgzipped --- modules/nf-core/tabix/tabix/main.nf | 8 +++--- modules/nf-core/tabix/tabix/meta.yml | 6 ++--- .../nf-core/tabix/tabix/tests/main.nf.test | 2 +- .../tabix/tabix/tests/main.nf.test.snap | 26 +++---------------- 4 files changed, 12 insertions(+), 30 deletions(-) diff --git a/modules/nf-core/tabix/tabix/main.nf b/modules/nf-core/tabix/tabix/main.nf index 0fd3e43dd5ca..133ac2a5841d 100644 --- a/modules/nf-core/tabix/tabix/main.nf +++ b/modules/nf-core/tabix/tabix/main.nf @@ -12,8 +12,8 @@ process TABIX_TABIX { tuple val(meta2), path(regions) output: - tuple val(meta), path("*.{tbi,csi}"), emit: index, optional: true - tuple val(meta), path("${prefix}.vcf"), emit: vcf, optional: true + tuple val(meta), path("*.{tbi,csi}"), emit: index, optional: true + tuple val(meta), path("${prefix}.vcf.gz"), emit: vcf, optional: true tuple val("${task.process}"), val('tabix'), eval("tabix -h 2>&1 | grep -oP 'Version:\\s*\\K[^\\s]+'") , topic: versions , emit: versions_tabix when: @@ -23,7 +23,7 @@ process TABIX_TABIX { def args = task.ext.args ?: '' prefix = task.ext.prefix ?: "${meta.id}" def regions_arg = regions ? "-R ${regions}" : "" - def output_arg = regions ? "> ${prefix}.vcf" : "" + def output_arg = regions ? "| bgzip --threads ${task.cpus} > ${prefix}.vcf.gz" : "" """ tabix \\ @@ -39,7 +39,7 @@ process TABIX_TABIX { prefix = task.ext.prefix ?: "${meta.id}" def ext = args.contains("-C ") || args.contains("--csi") ? "csi" : "tbi" def index = regions ? "" : "touch ${tab}.${ext}" - def vcf = regions ? "touch ${prefix}.vcf" : "" + def vcf = regions ? "touch ${prefix}.vcf.gz" : "" """ ${index} ${vcf} diff --git a/modules/nf-core/tabix/tabix/meta.yml b/modules/nf-core/tabix/tabix/meta.yml index f53dcd1cabc2..ea67a57c931a 100644 --- a/modules/nf-core/tabix/tabix/meta.yml +++ b/modules/nf-core/tabix/tabix/meta.yml @@ -64,10 +64,10 @@ output: description: | Groovy Map containing sample information e.g. [ id:'test', single_end:false ] - - "${prefix}.vcf": + - "${prefix}.vcf.gz": type: file - description: Plain VCF of extracted regions. Emitted when a regions file is provided. - pattern: "*.vcf" + description: bgzipped VCF of extracted regions. Emitted when a regions file is provided. + pattern: "*.vcf.gz" ontologies: - edam: http://edamontology.org/format_3016 # VCF versions_tabix: diff --git a/modules/nf-core/tabix/tabix/tests/main.nf.test b/modules/nf-core/tabix/tabix/tests/main.nf.test index eb5f9e23b478..54fb8d87140b 100644 --- a/modules/nf-core/tabix/tabix/tests/main.nf.test +++ b/modules/nf-core/tabix/tabix/tests/main.nf.test @@ -166,7 +166,7 @@ nextflow_process { then { assertAll ( { assert process.success }, - { assert snapshot(process.out).match() } + { assert snapshot(sanitizeOutput(process.out, unstableKeys: ["vcf"])).match() } ) } } diff --git a/modules/nf-core/tabix/tabix/tests/main.nf.test.snap b/modules/nf-core/tabix/tabix/tests/main.nf.test.snap index 3d20eb4f28f8..08afa437e50b 100644 --- a/modules/nf-core/tabix/tabix/tests/main.nf.test.snap +++ b/modules/nf-core/tabix/tabix/tests/main.nf.test.snap @@ -2,24 +2,6 @@ "sarscov2_vcf_tbi_regions_stub": { "content": [ { - "0": [ - - ], - "1": [ - [ - { - "id": "vcf_regions_stub" - }, - "vcf_regions_stub.vcf:md5,d41d8cd98f00b204e9800998ecf8427e" - ] - ], - "2": [ - [ - "TABIX_TABIX", - "tabix", - "1.21" - ] - ], "index": [ ], @@ -28,7 +10,7 @@ { "id": "vcf_regions_stub" }, - "vcf_regions_stub.vcf:md5,d41d8cd98f00b204e9800998ecf8427e" + "vcf_regions_stub.vcf.gz" ] ], "versions_tabix": [ @@ -40,7 +22,7 @@ ] } ], - "timestamp": "2026-03-27T14:36:37.322495", + "timestamp": "2026-03-27T17:51:54.703596", "meta": { "nf-test": "0.9.4", "nextflow": "25.10.4" @@ -104,7 +86,7 @@ { "id": "vcf_regions" }, - "vcf_regions.vcf" + "vcf_regions.vcf.gz" ] ], "versions_tabix": [ @@ -116,7 +98,7 @@ ] } ], - "timestamp": "2026-03-27T14:59:22.524536", + "timestamp": "2026-03-27T17:43:18.320775", "meta": { "nf-test": "0.9.4", "nextflow": "25.10.4" From 116bfcab61fc53277e41c37437c273369ab31952 Mon Sep 17 00:00:00 2001 From: Luisa Santus Date: Mon, 30 Mar 2026 09:50:41 +0200 Subject: [PATCH 12/21] add regions to the same input channel --- modules/nf-core/tabix/tabix/main.nf | 3 +-- modules/nf-core/tabix/tabix/meta.yml | 5 ----- .../nf-core/tabix/tabix/tests/main.nf.test | 20 +++++++------------ .../vcf_annotate_ensemblvep_snpeff/main.nf | 2 +- .../vcf_extract_relate_somalier/main.nf | 3 +-- .../nf-core/vcf_gather_bcftools/main.nf | 2 +- 6 files changed, 11 insertions(+), 24 deletions(-) diff --git a/modules/nf-core/tabix/tabix/main.nf b/modules/nf-core/tabix/tabix/main.nf index 133ac2a5841d..c3bd4d855e91 100644 --- a/modules/nf-core/tabix/tabix/main.nf +++ b/modules/nf-core/tabix/tabix/main.nf @@ -8,8 +8,7 @@ process TABIX_TABIX { 'community.wave.seqera.io/library/htslib:1.21--ff8e28a189fbecaa' }" input: - tuple val(meta), path(tab), path(tai) - tuple val(meta2), path(regions) + tuple val(meta), path(tab), path(tai), path(regions) output: tuple val(meta), path("*.{tbi,csi}"), emit: index, optional: true diff --git a/modules/nf-core/tabix/tabix/meta.yml b/modules/nf-core/tabix/tabix/meta.yml index ea67a57c931a..841103fcc767 100644 --- a/modules/nf-core/tabix/tabix/meta.yml +++ b/modules/nf-core/tabix/tabix/meta.yml @@ -34,11 +34,6 @@ input: Pass [] when creating an index instead. pattern: "*.{tbi,csi}" ontologies: [] - - - meta2: - type: map - description: | - Groovy Map containing regions information - e.g. [ id:'regions' ] - regions: type: file description: | diff --git a/modules/nf-core/tabix/tabix/tests/main.nf.test b/modules/nf-core/tabix/tabix/tests/main.nf.test index 54fb8d87140b..b35e6cbcc286 100644 --- a/modules/nf-core/tabix/tabix/tests/main.nf.test +++ b/modules/nf-core/tabix/tabix/tests/main.nf.test @@ -21,9 +21,9 @@ nextflow_process { input[0] = [ [ id:'tbi_bed' ], file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/bed/test.bed.gz', checkIfExists: true), + [], [] ] - input[1] = [[], []] """ } } @@ -46,9 +46,9 @@ nextflow_process { input[0] = [ [ id:'tbi_gff' ], file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.gff3.gz', checkIfExists: true), + [], [] ] - input[1] = [[], []] """ } } @@ -72,9 +72,9 @@ nextflow_process { input[0] = [ [ id:'tbi_vcf' ], file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz', checkIfExists: true), + [], [] ] - input[1] = [[], []] """ } } @@ -98,9 +98,9 @@ nextflow_process { input[0] = [ [ id:'vcf_csi' ], file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz', checkIfExists: true), + [], [] ] - input[1] = [[], []] """ } } @@ -124,10 +124,7 @@ nextflow_process { input[0] = [ [ id:'vcf_regions' ], file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz', checkIfExists: true), - file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz.tbi', checkIfExists: true) - ] - input[1] = [ - [ id:'vcf_regions' ], + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz.tbi', checkIfExists: true), file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/bed/test.bed', checkIfExists: true) ] """ @@ -153,10 +150,7 @@ nextflow_process { input[0] = [ [ id:'vcf_regions_stub' ], file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz', checkIfExists: true), - file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz.tbi', checkIfExists: true) - ] - input[1] = [ - [ id:'vcf_regions_stub' ], + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz.tbi', checkIfExists: true), file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/bed/test.bed', checkIfExists: true) ] """ @@ -182,9 +176,9 @@ nextflow_process { input[0] = [ [ id:'vcf_csi_stub' ], file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz', checkIfExists: true), + [], [] ] - input[1] = [[], []] """ } } diff --git a/subworkflows/nf-core/vcf_annotate_ensemblvep_snpeff/main.nf b/subworkflows/nf-core/vcf_annotate_ensemblvep_snpeff/main.nf index 863326095db9..3ee3fb26c75d 100644 --- a/subworkflows/nf-core/vcf_annotate_ensemblvep_snpeff/main.nf +++ b/subworkflows/nf-core/vcf_annotate_ensemblvep_snpeff/main.nf @@ -166,7 +166,7 @@ workflow VCF_ANNOTATE_ENSEMBLVEP_SNPEFF { return [meta, vcf, []] } - TABIX_TABIX(ch_tabix_input.bgzip.map { meta, vcf -> [meta, vcf, []] }, [[],[]]) + TABIX_TABIX(ch_tabix_input.bgzip.map { meta, vcf -> [meta, vcf, [], []] }) def ch_vcf_tbi = ch_tabix_input.bgzip .join(TABIX_TABIX.out.index, failOnDuplicate: true, failOnMismatch: true) diff --git a/subworkflows/nf-core/vcf_extract_relate_somalier/main.nf b/subworkflows/nf-core/vcf_extract_relate_somalier/main.nf index 1576503239c7..4b4f593c8034 100644 --- a/subworkflows/nf-core/vcf_extract_relate_somalier/main.nf +++ b/subworkflows/nf-core/vcf_extract_relate_somalier/main.nf @@ -22,8 +22,7 @@ workflow VCF_EXTRACT_RELATE_SOMALIER { } TABIX_TABIX( - ch_input.no_tbi.map { meta, vcf -> [meta, vcf, []] }, - [[],[]] + ch_input.no_tbi.map { meta, vcf -> [meta, vcf, [], []] } ) ch_somalierextract_input = ch_input.no_tbi diff --git a/subworkflows/nf-core/vcf_gather_bcftools/main.nf b/subworkflows/nf-core/vcf_gather_bcftools/main.nf index 7afa43492079..0d0df166b65e 100644 --- a/subworkflows/nf-core/vcf_gather_bcftools/main.nf +++ b/subworkflows/nf-core/vcf_gather_bcftools/main.nf @@ -54,7 +54,7 @@ workflow VCF_GATHER_BCFTOOLS { ch_tabix_input = ch_vcf_concat } - TABIX_TABIX(ch_tabix_input.map { meta, vcf -> [meta, vcf, []] }, [[],[]]) + TABIX_TABIX(ch_tabix_input.map { meta, vcf -> [meta, vcf, [], []] }) ch_vcf_index = ch_tabix_input .join(TABIX_TABIX.out.index) From 958601e4f40a3778fd2eedc34bf7329a360237fa Mon Sep 17 00:00:00 2001 From: Luisa Santus Date: Tue, 31 Mar 2026 10:49:41 +0200 Subject: [PATCH 13/21] Update modules/nf-core/tabix/tabix/meta.yml MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: Famke Bäuerle <45968370+famosab@users.noreply.github.com> --- modules/nf-core/tabix/tabix/meta.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/nf-core/tabix/tabix/meta.yml b/modules/nf-core/tabix/tabix/meta.yml index 841103fcc767..bbf38874c63c 100644 --- a/modules/nf-core/tabix/tabix/meta.yml +++ b/modules/nf-core/tabix/tabix/meta.yml @@ -27,7 +27,7 @@ input: description: TAB-delimited genome position file compressed with bgzip pattern: "*.{bed.gz,gff.gz,sam.gz,vcf.gz}" ontologies: [] - - tai: + - index: type: file description: | Tabix index for the input file. Required when extracting regions. From b238a9e761ba6242bcc01fec9104e537cb9add8e Mon Sep 17 00:00:00 2001 From: Luisa Santus Date: Tue, 31 Mar 2026 10:49:50 +0200 Subject: [PATCH 14/21] Update modules/nf-core/tabix/tabix/main.nf MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: Famke Bäuerle <45968370+famosab@users.noreply.github.com> --- modules/nf-core/tabix/tabix/main.nf | 1 - 1 file changed, 1 deletion(-) diff --git a/modules/nf-core/tabix/tabix/main.nf b/modules/nf-core/tabix/tabix/main.nf index c3bd4d855e91..e8ab351e3f98 100644 --- a/modules/nf-core/tabix/tabix/main.nf +++ b/modules/nf-core/tabix/tabix/main.nf @@ -23,7 +23,6 @@ process TABIX_TABIX { prefix = task.ext.prefix ?: "${meta.id}" def regions_arg = regions ? "-R ${regions}" : "" def output_arg = regions ? "| bgzip --threads ${task.cpus} > ${prefix}.vcf.gz" : "" - """ tabix \\ ${regions_arg} \\ From c8f6234d783dd167f6f317461538fc7d783f0429 Mon Sep 17 00:00:00 2001 From: Luisa Santus Date: Tue, 31 Mar 2026 10:50:01 +0200 Subject: [PATCH 15/21] Update modules/nf-core/tabix/tabix/meta.yml MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: Famke Bäuerle <45968370+famosab@users.noreply.github.com> --- modules/nf-core/tabix/tabix/meta.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/nf-core/tabix/tabix/meta.yml b/modules/nf-core/tabix/tabix/meta.yml index bbf38874c63c..6607f1b8ff29 100644 --- a/modules/nf-core/tabix/tabix/meta.yml +++ b/modules/nf-core/tabix/tabix/meta.yml @@ -61,7 +61,7 @@ output: e.g. [ id:'test', single_end:false ] - "${prefix}.vcf.gz": type: file - description: bgzipped VCF of extracted regions. Emitted when a regions file is provided. + description: Bgzipped VCF of extracted regions. Emitted when a regions file is provided. pattern: "*.vcf.gz" ontologies: - edam: http://edamontology.org/format_3016 # VCF From e689684d485f4fe4ce9183661d4c4701d351ac95 Mon Sep 17 00:00:00 2001 From: Luisa Santus Date: Tue, 31 Mar 2026 10:50:30 +0200 Subject: [PATCH 16/21] Update modules/nf-core/tabix/tabix/main.nf MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: Famke Bäuerle <45968370+famosab@users.noreply.github.com> --- modules/nf-core/tabix/tabix/main.nf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/nf-core/tabix/tabix/main.nf b/modules/nf-core/tabix/tabix/main.nf index e8ab351e3f98..1af81d3ca6a8 100644 --- a/modules/nf-core/tabix/tabix/main.nf +++ b/modules/nf-core/tabix/tabix/main.nf @@ -37,7 +37,7 @@ process TABIX_TABIX { prefix = task.ext.prefix ?: "${meta.id}" def ext = args.contains("-C ") || args.contains("--csi") ? "csi" : "tbi" def index = regions ? "" : "touch ${tab}.${ext}" - def vcf = regions ? "touch ${prefix}.vcf.gz" : "" + def vcf = regions ? "echo | gzip > ${prefix}.vcf.gz" : "" """ ${index} ${vcf} From 349c8175b7d879c2c851ce5894e34b708acaed07 Mon Sep 17 00:00:00 2001 From: Luisa Santus Date: Tue, 31 Mar 2026 10:50:56 +0200 Subject: [PATCH 17/21] Update modules/nf-core/tabix/tabix/tests/main.nf.test MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: Famke Bäuerle <45968370+famosab@users.noreply.github.com> --- modules/nf-core/tabix/tabix/tests/main.nf.test | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/nf-core/tabix/tabix/tests/main.nf.test b/modules/nf-core/tabix/tabix/tests/main.nf.test index b35e6cbcc286..2710289ef298 100644 --- a/modules/nf-core/tabix/tabix/tests/main.nf.test +++ b/modules/nf-core/tabix/tabix/tests/main.nf.test @@ -160,7 +160,7 @@ nextflow_process { then { assertAll ( { assert process.success }, - { assert snapshot(sanitizeOutput(process.out, unstableKeys: ["vcf"])).match() } + { assert snapshot(sanitizeOutput(process.out)).match() } ) } } From 56846d4772b99c94e75516b8f206a84e5f0f64c2 Mon Sep 17 00:00:00 2001 From: Luisa Santus Date: Tue, 31 Mar 2026 11:19:10 +0200 Subject: [PATCH 18/21] add ntf-vcf --- modules/nf-core/tabix/tabix/meta.yml | 19 +++++---- .../nf-core/tabix/tabix/tests/main.nf.test | 8 +++- .../tabix/tabix/tests/main.nf.test.snap | 41 ++++++++----------- 3 files changed, 33 insertions(+), 35 deletions(-) diff --git a/modules/nf-core/tabix/tabix/meta.yml b/modules/nf-core/tabix/tabix/meta.yml index 6607f1b8ff29..8226b6e96879 100644 --- a/modules/nf-core/tabix/tabix/meta.yml +++ b/modules/nf-core/tabix/tabix/meta.yml @@ -14,7 +14,8 @@ tools: homepage: https://www.htslib.org/doc/tabix.html documentation: https://www.htslib.org/doc/tabix.1.html doi: 10.1093/bioinformatics/btq671 - licence: ["MIT"] + licence: + - "MIT" identifier: biotools:tabix input: - - meta: @@ -27,7 +28,7 @@ input: description: TAB-delimited genome position file compressed with bgzip pattern: "*.{bed.gz,gff.gz,sam.gz,vcf.gz}" ontologies: [] - - index: + - tai: type: file description: | Tabix index for the input file. Required when extracting regions. @@ -40,7 +41,8 @@ input: Optional file of regions to extract (BED or chr:start-end format). Pass [] to create an index instead of extracting regions. pattern: "*.{bed,txt,tsv}" - ontologies: [] + ontologies: + - edam: http://edamontology.org/format_3475 output: index: - - meta: @@ -50,7 +52,8 @@ output: e.g. [ id:'test', single_end:false ] - "*.{tbi,csi}": type: file - description: Tabix index file (tbi or csi). Emitted when no regions file is provided. + description: Tabix index file (tbi or csi). Emitted when no regions file + is provided. pattern: "*.{tbi,csi}" ontologies: [] vcf: @@ -61,10 +64,12 @@ output: e.g. [ id:'test', single_end:false ] - "${prefix}.vcf.gz": type: file - description: Bgzipped VCF of extracted regions. Emitted when a regions file is provided. + description: Bgzipped VCF of extracted regions. Emitted when a regions + file is provided. pattern: "*.vcf.gz" ontologies: - - edam: http://edamontology.org/format_3016 # VCF + - edam: http://edamontology.org/format_3016 + - edam: http://edamontology.org/format_3989 versions_tabix: - - ${task.process}: type: string @@ -75,7 +80,6 @@ output: - tabix -h 2>&1 | grep -oP 'Version:\s*\K[^\s]+': type: eval description: The expression to obtain the version of the tool - topics: versions: - - ${task.process}: @@ -87,7 +91,6 @@ topics: - tabix -h 2>&1 | grep -oP 'Version:\s*\K[^\s]+': type: eval description: The expression to obtain the version of the tool - authors: - "@joseespinosa" - "@drpatelh" diff --git a/modules/nf-core/tabix/tabix/tests/main.nf.test b/modules/nf-core/tabix/tabix/tests/main.nf.test index 2710289ef298..8b00e63ef227 100644 --- a/modules/nf-core/tabix/tabix/tests/main.nf.test +++ b/modules/nf-core/tabix/tabix/tests/main.nf.test @@ -117,7 +117,7 @@ nextflow_process { test("sarscov2_vcf_tbi_regions") { when { params { - module_args = '' + module_args = '-h' } process { """ @@ -134,7 +134,11 @@ nextflow_process { then { assertAll ( { assert process.success }, - { assert snapshot(sanitizeOutput(process.out, unstableKeys: ["vcf"])).match() } + { assert snapshot( + process.out.index, + path(process.out.vcf[0][1]).vcf.getVariantsMD5(), + process.out.versions_tabix + ).match() } ) } } diff --git a/modules/nf-core/tabix/tabix/tests/main.nf.test.snap b/modules/nf-core/tabix/tabix/tests/main.nf.test.snap index 08afa437e50b..036e1c04cfb1 100644 --- a/modules/nf-core/tabix/tabix/tests/main.nf.test.snap +++ b/modules/nf-core/tabix/tabix/tests/main.nf.test.snap @@ -10,7 +10,7 @@ { "id": "vcf_regions_stub" }, - "vcf_regions_stub.vcf.gz" + "vcf_regions_stub.vcf.gz:md5,68b329da9893e34099c7d8ad5cb9c940" ] ], "versions_tabix": [ @@ -22,7 +22,7 @@ ] } ], - "timestamp": "2026-03-27T17:51:54.703596", + "timestamp": "2026-03-31T11:09:55.221403", "meta": { "nf-test": "0.9.4", "nextflow": "25.10.4" @@ -69,7 +69,7 @@ ] } ], - "timestamp": "2026-03-27T14:59:10.252663", + "timestamp": "2026-03-31T11:09:38.656426", "meta": { "nf-test": "0.9.4", "nextflow": "25.10.4" @@ -77,28 +77,19 @@ }, "sarscov2_vcf_tbi_regions": { "content": [ - { - "index": [ - - ], - "vcf": [ - [ - { - "id": "vcf_regions" - }, - "vcf_regions.vcf.gz" - ] - ], - "versions_tabix": [ - [ - "TABIX_TABIX", - "tabix", - "1.21" - ] + [ + + ], + "d41d8cd98f00b204e9800998ecf8427e", + [ + [ + "TABIX_TABIX", + "tabix", + "1.21" ] - } + ] ], - "timestamp": "2026-03-27T17:43:18.320775", + "timestamp": "2026-03-31T11:15:49.071688", "meta": { "nf-test": "0.9.4", "nextflow": "25.10.4" @@ -145,7 +136,7 @@ ] } ], - "timestamp": "2026-03-27T14:59:06.309394", + "timestamp": "2026-03-31T11:09:34.72853", "meta": { "nf-test": "0.9.4", "nextflow": "25.10.4" @@ -192,7 +183,7 @@ ] } ], - "timestamp": "2026-03-27T14:24:22.674157", + "timestamp": "2026-03-31T11:09:42.627078", "meta": { "nf-test": "0.9.4", "nextflow": "25.10.4" From 288fee06e64c81080298f173c180246d6bdb6a0f Mon Sep 17 00:00:00 2001 From: Luisa Santus Date: Tue, 31 Mar 2026 13:06:14 +0200 Subject: [PATCH 19/21] add regions --- modules/nf-core/tabix/tabix/tests/main.nf.test | 12 ++++++++++-- .../nf-core/tabix/tabix/tests/main.nf.test.snap | 16 ++++++++-------- 2 files changed, 18 insertions(+), 10 deletions(-) diff --git a/modules/nf-core/tabix/tabix/tests/main.nf.test b/modules/nf-core/tabix/tabix/tests/main.nf.test index 8b00e63ef227..1d268a3d719f 100644 --- a/modules/nf-core/tabix/tabix/tests/main.nf.test +++ b/modules/nf-core/tabix/tabix/tests/main.nf.test @@ -115,6 +115,10 @@ nextflow_process { } test("sarscov2_vcf_tbi_regions") { + setup { + def regions = file("${workDir}/test.regions.txt") + regions.text = "MT192765.1\t196\t198\nMT192765.1\t4787\t4789\n" + } when { params { module_args = '-h' @@ -125,7 +129,7 @@ nextflow_process { [ id:'vcf_regions' ], file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz', checkIfExists: true), file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz.tbi', checkIfExists: true), - file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/bed/test.bed', checkIfExists: true) + file("${workDir}/test.regions.txt", checkIfExists: true) ] """ } @@ -145,6 +149,10 @@ nextflow_process { test("sarscov2_vcf_tbi_regions_stub") { options "-stub" + setup { + def regions = file("${workDir}/test.regions.txt") + regions.text = "MT192765.1\t196\t198\nMT192765.1\t4787\t4789\n" + } when { params { module_args = '' @@ -155,7 +163,7 @@ nextflow_process { [ id:'vcf_regions_stub' ], file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz', checkIfExists: true), file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz.tbi', checkIfExists: true), - file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/bed/test.bed', checkIfExists: true) + file("${workDir}/test.regions.txt", checkIfExists: true) ] """ } diff --git a/modules/nf-core/tabix/tabix/tests/main.nf.test.snap b/modules/nf-core/tabix/tabix/tests/main.nf.test.snap index 036e1c04cfb1..a819bf30dbe8 100644 --- a/modules/nf-core/tabix/tabix/tests/main.nf.test.snap +++ b/modules/nf-core/tabix/tabix/tests/main.nf.test.snap @@ -22,7 +22,7 @@ ] } ], - "timestamp": "2026-03-31T11:09:55.221403", + "timestamp": "2026-03-31T12:50:00.816329", "meta": { "nf-test": "0.9.4", "nextflow": "25.10.4" @@ -69,7 +69,7 @@ ] } ], - "timestamp": "2026-03-31T11:09:38.656426", + "timestamp": "2026-03-31T12:49:41.921478", "meta": { "nf-test": "0.9.4", "nextflow": "25.10.4" @@ -80,7 +80,7 @@ [ ], - "d41d8cd98f00b204e9800998ecf8427e", + "c57539b0a39bee90f9dc6f4c60b268ed", [ [ "TABIX_TABIX", @@ -89,7 +89,7 @@ ] ] ], - "timestamp": "2026-03-31T11:15:49.071688", + "timestamp": "2026-03-31T12:52:35.688937", "meta": { "nf-test": "0.9.4", "nextflow": "25.10.4" @@ -136,7 +136,7 @@ ] } ], - "timestamp": "2026-03-31T11:09:34.72853", + "timestamp": "2026-03-31T12:49:36.968706", "meta": { "nf-test": "0.9.4", "nextflow": "25.10.4" @@ -183,7 +183,7 @@ ] } ], - "timestamp": "2026-03-31T11:09:42.627078", + "timestamp": "2026-03-31T12:49:46.679693", "meta": { "nf-test": "0.9.4", "nextflow": "25.10.4" @@ -230,7 +230,7 @@ ] } ], - "timestamp": "2026-03-27T14:36:41.192364", + "timestamp": "2026-03-31T12:50:04.730679", "meta": { "nf-test": "0.9.4", "nextflow": "25.10.4" @@ -277,7 +277,7 @@ ] } ], - "timestamp": "2026-03-27T14:24:27.355489", + "timestamp": "2026-03-31T12:49:50.339466", "meta": { "nf-test": "0.9.4", "nextflow": "25.10.4" From 54a15c239fe4fb029f9b6225650525548ae4b60e Mon Sep 17 00:00:00 2001 From: Luisa Santus Date: Tue, 31 Mar 2026 15:19:16 +0200 Subject: [PATCH 20/21] add new test --- modules/nf-core/tabix/tabix/tests/main.nf.test | 12 ++---------- .../nf-core/tabix/tabix/tests/main.nf.test.snap | 14 +++++++------- 2 files changed, 9 insertions(+), 17 deletions(-) diff --git a/modules/nf-core/tabix/tabix/tests/main.nf.test b/modules/nf-core/tabix/tabix/tests/main.nf.test index 1d268a3d719f..05c8279280ea 100644 --- a/modules/nf-core/tabix/tabix/tests/main.nf.test +++ b/modules/nf-core/tabix/tabix/tests/main.nf.test @@ -115,10 +115,6 @@ nextflow_process { } test("sarscov2_vcf_tbi_regions") { - setup { - def regions = file("${workDir}/test.regions.txt") - regions.text = "MT192765.1\t196\t198\nMT192765.1\t4787\t4789\n" - } when { params { module_args = '-h' @@ -129,7 +125,7 @@ nextflow_process { [ id:'vcf_regions' ], file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz', checkIfExists: true), file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz.tbi', checkIfExists: true), - file("${workDir}/test.regions.txt", checkIfExists: true) + file('https://raw.githubusercontent.com/luisas/test-datasets/refs/heads/add-bedgraph-subset-illumina/data/genomics/sarscov2/illumina/bed/test.bed', checkIfExists: true) ] """ } @@ -149,10 +145,6 @@ nextflow_process { test("sarscov2_vcf_tbi_regions_stub") { options "-stub" - setup { - def regions = file("${workDir}/test.regions.txt") - regions.text = "MT192765.1\t196\t198\nMT192765.1\t4787\t4789\n" - } when { params { module_args = '' @@ -163,7 +155,7 @@ nextflow_process { [ id:'vcf_regions_stub' ], file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz', checkIfExists: true), file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz.tbi', checkIfExists: true), - file("${workDir}/test.regions.txt", checkIfExists: true) + file('https://raw.githubusercontent.com/luisas/test-datasets/refs/heads/add-bedgraph-subset-illumina/data/genomics/sarscov2/illumina/bed/test.bed', checkIfExists: true) ] """ } diff --git a/modules/nf-core/tabix/tabix/tests/main.nf.test.snap b/modules/nf-core/tabix/tabix/tests/main.nf.test.snap index a819bf30dbe8..344ced64ab08 100644 --- a/modules/nf-core/tabix/tabix/tests/main.nf.test.snap +++ b/modules/nf-core/tabix/tabix/tests/main.nf.test.snap @@ -22,7 +22,7 @@ ] } ], - "timestamp": "2026-03-31T12:50:00.816329", + "timestamp": "2026-03-31T14:27:49.271599", "meta": { "nf-test": "0.9.4", "nextflow": "25.10.4" @@ -69,7 +69,7 @@ ] } ], - "timestamp": "2026-03-31T12:49:41.921478", + "timestamp": "2026-03-31T14:27:32.1232", "meta": { "nf-test": "0.9.4", "nextflow": "25.10.4" @@ -89,7 +89,7 @@ ] ] ], - "timestamp": "2026-03-31T12:52:35.688937", + "timestamp": "2026-03-31T14:38:35.804865", "meta": { "nf-test": "0.9.4", "nextflow": "25.10.4" @@ -136,7 +136,7 @@ ] } ], - "timestamp": "2026-03-31T12:49:36.968706", + "timestamp": "2026-03-31T14:27:28.328609", "meta": { "nf-test": "0.9.4", "nextflow": "25.10.4" @@ -183,7 +183,7 @@ ] } ], - "timestamp": "2026-03-31T12:49:46.679693", + "timestamp": "2026-03-31T14:27:35.800413", "meta": { "nf-test": "0.9.4", "nextflow": "25.10.4" @@ -230,7 +230,7 @@ ] } ], - "timestamp": "2026-03-31T12:50:04.730679", + "timestamp": "2026-03-31T14:27:53.111352", "meta": { "nf-test": "0.9.4", "nextflow": "25.10.4" @@ -277,7 +277,7 @@ ] } ], - "timestamp": "2026-03-31T12:49:50.339466", + "timestamp": "2026-03-31T14:27:39.647126", "meta": { "nf-test": "0.9.4", "nextflow": "25.10.4" From 603d2b24d581a6bdccc23a93b1dab7e49d75e298 Mon Sep 17 00:00:00 2001 From: Luisa Santus Date: Wed, 1 Apr 2026 15:22:12 +0200 Subject: [PATCH 21/21] add flexible tab suffix detection --- modules/nf-core/tabix/tabix/main.nf | 14 +++--- modules/nf-core/tabix/tabix/meta.yml | 14 +++--- .../nf-core/tabix/tabix/tests/main.nf.test | 2 +- .../tabix/tabix/tests/main.nf.test.snap | 50 +++++++++---------- 4 files changed, 40 insertions(+), 40 deletions(-) diff --git a/modules/nf-core/tabix/tabix/main.nf b/modules/nf-core/tabix/tabix/main.nf index 1af81d3ca6a8..36e64c318f1a 100644 --- a/modules/nf-core/tabix/tabix/main.nf +++ b/modules/nf-core/tabix/tabix/main.nf @@ -11,8 +11,8 @@ process TABIX_TABIX { tuple val(meta), path(tab), path(tai), path(regions) output: - tuple val(meta), path("*.{tbi,csi}"), emit: index, optional: true - tuple val(meta), path("${prefix}.vcf.gz"), emit: vcf, optional: true + tuple val(meta), path("*.{tbi,csi}"), emit: index, optional: true + tuple val(meta), path("${prefix}.*gz"), emit: extracted, optional: true tuple val("${task.process}"), val('tabix'), eval("tabix -h 2>&1 | grep -oP 'Version:\\s*\\K[^\\s]+'") , topic: versions , emit: versions_tabix when: @@ -21,8 +21,9 @@ process TABIX_TABIX { script: def args = task.ext.args ?: '' prefix = task.ext.prefix ?: "${meta.id}" + def tab_suffix = tab.name.indexOf('.') >= 0 ? tab.name.substring(tab.name.indexOf('.')) : '' def regions_arg = regions ? "-R ${regions}" : "" - def output_arg = regions ? "| bgzip --threads ${task.cpus} > ${prefix}.vcf.gz" : "" + def output_arg = regions ? "| bgzip --threads ${task.cpus} > ${prefix}${tab_suffix}" : "" """ tabix \\ ${regions_arg} \\ @@ -35,11 +36,12 @@ process TABIX_TABIX { stub: def args = task.ext.args ?: '' prefix = task.ext.prefix ?: "${meta.id}" + def tab_suffix = tab.name.indexOf('.') >= 0 ? tab.name.substring(tab.name.indexOf('.')) : '' def ext = args.contains("-C ") || args.contains("--csi") ? "csi" : "tbi" - def index = regions ? "" : "touch ${tab}.${ext}" - def vcf = regions ? "echo | gzip > ${prefix}.vcf.gz" : "" + def index = regions ? "" : "touch ${tab}.${ext}" + def extracted = regions ? "echo | gzip > ${prefix}${tab_suffix}" : "" """ ${index} - ${vcf} + ${extracted} """ } diff --git a/modules/nf-core/tabix/tabix/meta.yml b/modules/nf-core/tabix/tabix/meta.yml index 8226b6e96879..5c32b22a26bc 100644 --- a/modules/nf-core/tabix/tabix/meta.yml +++ b/modules/nf-core/tabix/tabix/meta.yml @@ -56,20 +56,18 @@ output: is provided. pattern: "*.{tbi,csi}" ontologies: [] - vcf: + extracted: - - meta: type: map description: | Groovy Map containing sample information e.g. [ id:'test', single_end:false ] - - "${prefix}.vcf.gz": + - "${prefix}.*gz": type: file - description: Bgzipped VCF of extracted regions. Emitted when a regions - file is provided. - pattern: "*.vcf.gz" - ontologies: - - edam: http://edamontology.org/format_3016 - - edam: http://edamontology.org/format_3989 + description: Bgzipped file of extracted regions, preserving the input file + extension. Emitted when a regions file is provided. + pattern: "*.*gz" + ontologies: [] versions_tabix: - - ${task.process}: type: string diff --git a/modules/nf-core/tabix/tabix/tests/main.nf.test b/modules/nf-core/tabix/tabix/tests/main.nf.test index 05c8279280ea..280eac25959d 100644 --- a/modules/nf-core/tabix/tabix/tests/main.nf.test +++ b/modules/nf-core/tabix/tabix/tests/main.nf.test @@ -136,7 +136,7 @@ nextflow_process { { assert process.success }, { assert snapshot( process.out.index, - path(process.out.vcf[0][1]).vcf.getVariantsMD5(), + path(process.out.extracted[0][1]).vcf.getVariantsMD5(), process.out.versions_tabix ).match() } ) diff --git a/modules/nf-core/tabix/tabix/tests/main.nf.test.snap b/modules/nf-core/tabix/tabix/tests/main.nf.test.snap index 344ced64ab08..27e977e0f81e 100644 --- a/modules/nf-core/tabix/tabix/tests/main.nf.test.snap +++ b/modules/nf-core/tabix/tabix/tests/main.nf.test.snap @@ -2,16 +2,16 @@ "sarscov2_vcf_tbi_regions_stub": { "content": [ { - "index": [ - - ], - "vcf": [ + "extracted": [ [ { "id": "vcf_regions_stub" }, "vcf_regions_stub.vcf.gz:md5,68b329da9893e34099c7d8ad5cb9c940" ] + ], + "index": [ + ], "versions_tabix": [ [ @@ -22,7 +22,7 @@ ] } ], - "timestamp": "2026-03-31T14:27:49.271599", + "timestamp": "2026-04-01T15:11:16.940666", "meta": { "nf-test": "0.9.4", "nextflow": "25.10.4" @@ -48,6 +48,9 @@ "tabix", "1.21" ] + ], + "extracted": [ + ], "index": [ [ @@ -56,9 +59,6 @@ }, "genome.gff3.gz.tbi:md5,f79a67d95a98076e04fbe0455d825926" ] - ], - "vcf": [ - ], "versions_tabix": [ [ @@ -69,7 +69,7 @@ ] } ], - "timestamp": "2026-03-31T14:27:32.1232", + "timestamp": "2026-04-01T15:10:57.509986", "meta": { "nf-test": "0.9.4", "nextflow": "25.10.4" @@ -115,6 +115,9 @@ "tabix", "1.21" ] + ], + "extracted": [ + ], "index": [ [ @@ -123,9 +126,6 @@ }, "test.bed.gz.tbi:md5,9a761d51cc81835fd1199201fdbcdd5d" ] - ], - "vcf": [ - ], "versions_tabix": [ [ @@ -136,7 +136,7 @@ ] } ], - "timestamp": "2026-03-31T14:27:28.328609", + "timestamp": "2026-04-01T15:10:52.971222", "meta": { "nf-test": "0.9.4", "nextflow": "25.10.4" @@ -162,6 +162,9 @@ "tabix", "1.21" ] + ], + "extracted": [ + ], "index": [ [ @@ -170,9 +173,6 @@ }, "test.vcf.gz.tbi:md5,d22e5b84e4fcd18792179f72e6da702e" ] - ], - "vcf": [ - ], "versions_tabix": [ [ @@ -183,7 +183,7 @@ ] } ], - "timestamp": "2026-03-31T14:27:35.800413", + "timestamp": "2026-04-01T15:11:01.818669", "meta": { "nf-test": "0.9.4", "nextflow": "25.10.4" @@ -209,6 +209,9 @@ "tabix", "1.21" ] + ], + "extracted": [ + ], "index": [ [ @@ -217,9 +220,6 @@ }, "test.vcf.gz.csi:md5,d41d8cd98f00b204e9800998ecf8427e" ] - ], - "vcf": [ - ], "versions_tabix": [ [ @@ -230,7 +230,7 @@ ] } ], - "timestamp": "2026-03-31T14:27:53.111352", + "timestamp": "2026-04-01T15:11:22.403222", "meta": { "nf-test": "0.9.4", "nextflow": "25.10.4" @@ -256,6 +256,9 @@ "tabix", "1.21" ] + ], + "extracted": [ + ], "index": [ [ @@ -264,9 +267,6 @@ }, "test.vcf.gz.csi:md5,04b41c1efd9ab3c6b1e008a286e27d2b" ] - ], - "vcf": [ - ], "versions_tabix": [ [ @@ -277,7 +277,7 @@ ] } ], - "timestamp": "2026-03-31T14:27:39.647126", + "timestamp": "2026-04-01T15:11:05.735059", "meta": { "nf-test": "0.9.4", "nextflow": "25.10.4"