nf-core · SkyLexS · Mar 3, 2026 · Mar 4, 2026 · Mar 4, 2026 · Mar 24, 2026
diff --git a/modules/nf-core/bigslice/main.nf b/modules/nf-core/bigslice/main.nf
@@ -8,25 +8,28 @@ process BIGSLICE {
         : 'biocontainers/bigslice:2.0.2--pyh8ed023e_0'}"
 
     input:
-    tuple val(meta), path(bgc, stageAs: 'bgc_files/*')
-    path hmmdb
+    tuple val(meta), path(bgc, stageAs: 'bgc_files/s*/*')
+    path(hmmdb)
+    val(export_tsv)
 
     output:
-    tuple val(meta), path("${prefix}/result/data.db"), emit: db
-    tuple val(meta), path("${prefix}/result/tmp/**/*.fa"), emit: fa
+    tuple val(meta), path("${prefix}/result")                  , emit: output
+    tuple val(meta), path("${prefix}/result/tsv_export")       , emit: tsv, optional: true
     // WARN: Version information not provided by tool on CLI. Please update this string when bumping container versions.
     tuple val("${task.process}"), val('bigslice'), val("2.0.2"), topic: versions, emit: versions_bigslice
 
     when:
     task.ext.when == null || task.ext.when
 
     script:
-    def args = task.ext.args ?: ''
-    prefix = task.ext.prefix ?: "${meta.id}"
+    def args   = task.ext.args ?: ''
+    def args2  = task.ext.args2 ?: ''
+    prefix     = task.ext.prefix ?: "${meta.id}"
     def sample = meta.id
+    def export_tsv_cmd = export_tsv ? "bigslice --export-tsv ${prefix}/result/tsv_export --program_db_folder ${hmmdb} ${args2} ${prefix}" : ''
     """
     mkdir -p input/dataset/${sample} input/taxonomy
-    cp bgc_files/* input/dataset/${sample}/
+    find bgc_files -name '*.gbk' | xargs -I{} cp {} input/dataset/${sample}/
 
     printf "# dataset_name\\tdataset_path\\ttaxonomy_path\\tdescription\\n" > input/datasets.tsv
     printf "dataset\\tdataset\\ttaxonomy/taxonomy.tsv\\tBGC dataset\\n" >> input/datasets.tsv
@@ -39,16 +42,22 @@ process BIGSLICE {
         -i input \\
         --program_db_folder ${hmmdb} \\
         ${prefix}
+
+    ${export_tsv_cmd}
     """
 
     stub:
-    def args = task.ext.args ?: ''
-    prefix = task.ext.prefix ?: "${meta.id}"
+    def args   = task.ext.args ?: ''
+    prefix     = task.ext.prefix ?: "${meta.id}"
     """
     echo ${args}
 
     mkdir -p ${prefix}/result/tmp/2e555308dfc411186cf012334262f127
     touch ${prefix}/result/data.db
     touch ${prefix}/result/tmp/2e555308dfc411186cf012334262f127/test.fa
+    if ${export_tsv}; then
+        mkdir -p ${prefix}/result/tsv_export
+        touch ${prefix}/result/tsv_export/bgcs.tsv
+    fi
     """
 }
diff --git a/modules/nf-core/bigslice/meta.yml b/modules/nf-core/bigslice/meta.yml
@@ -8,59 +8,62 @@ keywords:
   - analysis
 tools:
   - "bigslice":
-      description: A highly scalable, user-interactive tool for the large scale analysis
-        of Biosynthetic Gene Clusters data
+      description: A highly scalable, user-interactive tool for the large scale
+        analysis of Biosynthetic Gene Clusters data
       homepage: "https://github.com/medema-group/bigslice"
       documentation: "https://github.com/medema-group/bigslice"
       tool_dev_url: "https://github.com/medema-group/bigslice"
       doi: "10.1093/gigascience/giaa154"
-      licence: ["AGPL v3-or-later"]
+      licence:
+        - "AGPL v3-or-later"
       identifier: ""
-
 input:
   - - meta:
         type: map
         description: |
           Groovy Map containing sample information
           e.g. `[ id:'sample1' ]`
     - bgc:
-        type: directory
+        type: file
         description: |
-          Path to a folder containing genomic regions in GenBank format, structured for BiG-SLiCE.
-          Each genome should have its own subfolder with region `.gbk` files.
-          The folder should also contain a datasets.tsv, and a taxonomy folder, with TSV taxonomy files per dataset.
-          See the tool's wiki for more information: https://github.com/medema-group/bigslice/wiki/Input-folder
-        pattern: "*"
+          List of GenBank (.gbk) files containing genomic region annotations for BiG-SLiCE input.
+          Each file represents a BGC region. The module internally organises them into the required
+          BiG-SLiCE input folder structure (datasets.tsv and taxonomy TSV).
+        pattern: "*.gbk"
+        ontologies: []
   - hmmdb:
       type: directory
       description: |
         Path to the BiG-SLiCE HMM database folder containing biosynthetic and sub Pfams for annotation, in the required BiG-SLiCE format.
         An example directory in compressed archive format can be found here: https://github.com/medema-group/bigslice/releases/download/v2.0.0rc/bigslice-models.2022-11-30.tar.gz
-
+  - export_tsv:
+      type: boolean
+      description: |
+        If true, runs a second BiG-SLiCE invocation to export all results from the SQLite database
+        to TSV files under `tsv_export/`. Additional arguments for this step can be passed via `task.ext.args2`.
 output:
-  db:
+  output:
     - - meta:
           type: map
           description: Groovy Map containing sample/dataset information
-      - ${prefix}/result/data.db:
-          type: file
+      - ${prefix}/result:
+          type: directory
           description: |
-            The results SQLite database. Contains various tables relevant to result
-            BGCs, CDSs, GCFs, HMMs and HSPs.
-          pattern: "data.db"
-          ontologies:
-            - edam: "http://edamontology.org/format_3621" # SQLite format
-  fa:
+            BiG-SLiCE result directory containing the SQLite database (`data.db`),
+            predicted feature FASTA files (`tmp/**/*.fa`), and optionally TSV exports
+            (`tsv_export/`) when `export_tsv` is `true`.
+          pattern: "result"
+  tsv:
     - - meta:
           type: map
           description: Groovy Map containing sample/dataset information
-      - ${prefix}/result/tmp/**/*.fa:
-          type: file
+      - ${prefix}/result/tsv_export:
+          type: directory
           description: |
-            Predicted features as FASTA files. One file per hit HMM.
-          pattern: "*.fa"
-          ontologies:
-            - edam: "http://edamontology.org/format_1929" # FASTA
+            Directory containing TSV exports of all parsed BGC metadata, vectorized
+            features and clustering results. Only present when `export_tsv` input is
+            set to `true`.
+          pattern: "tsv_export"
   versions_bigslice:
     - - ${task.process}:
           type: string
@@ -70,8 +73,7 @@ output:
           description: The name of the tool
       - 2.0.2:
           type: string
-          description: The expression to obtain the version of the tool
-
+          description: The version of the tool
 topics:
   versions:
     - - ${task.process}:
@@ -82,9 +84,10 @@ topics:
           description: The name of the tool
       - 2.0.2:
           type: string
-          description: The expression to obtain the version of the tool
-
+          description: The version of the tool
 authors:
   - "@vagkaratzas"
+  - "@SkyLex"
 maintainers:
   - "@vagkaratzas"
+  - "@SkyLex"
diff --git a/modules/nf-core/bigslice/tests/main.nf.test b/modules/nf-core/bigslice/tests/main.nf.test
@@ -67,16 +67,76 @@ nextflow_process {
                     [ meta, gbk_files ]
                 }
                 input[1] = UNTAR_HMMDB.out.untar.map{ it -> it[1] }
+                input[2] = false
                 """
             }
         }
 
         then {
             assert process.success
+            def resultDir = file(process.out.output[0][1])
+            def allNames = []
+            def tmpFaCount = 0
+            resultDir.eachFileRecurse { f ->
+                if (!f.isDirectory()) {
+                    def rel = resultDir.toPath().relativize(f.toPath()).toString()
+                    if (rel.startsWith('tmp/') || rel.startsWith('tmp\\')) {
+                        if (f.name.endsWith('.fa')) tmpFaCount++
+                    } else {
+                        allNames.add(f.name)
+                    }
+                }
+            }
+            assertAll(
+                { assert resultDir.isDirectory() },
+                { assert tmpFaCount > 0 },
+                { assert snapshot(
+                    allNames.sort(),
+                    process.out.findAll { key, val -> key.startsWith("versions")}
+                ).match() }
+            )
+        }
+
+    }
+
+    test("streptomyces_coelicolor - bigslice - gbk - export_tsv") {
+
+        when {
+            process {
+                """
+                // Flatten the GBK directory into a list of individual GBK files with meta
+                input[0] = UNTAR_GBK.out.untar.map { meta, dir ->
+                    def gbk_files = []
+                    dir.eachFileRecurse { if (it.name.endsWith('.gbk')) gbk_files << it }
+                    [ meta, gbk_files ]
+                }
+                input[1] = UNTAR_HMMDB.out.untar.map{ it -> it[1] }
+                input[2] = true
+                """
+            }
+        }
+
+        then {
+            assert process.success
+            def resultDir = file(process.out.output[0][1])
+            def allNames = []
+            def tmpFaCount = 0
+            resultDir.eachFileRecurse { f ->
+                if (!f.isDirectory()) {
+                    def rel = resultDir.toPath().relativize(f.toPath()).toString()
+                    if (rel.startsWith('tmp/') || rel.startsWith('tmp\\')) {
+                        if (f.name.endsWith('.fa')) tmpFaCount++
+                    } else {
+                        allNames.add(f.name)
+                    }
+                }
+            }
             assertAll(
+                { assert resultDir.isDirectory() },
+                { assert tmpFaCount > 0 },
+                { assert file(process.out.tsv[0][1]).isDirectory() },
                 { assert snapshot(
-                    file(process.out.db[0][1]).name,
-                    process.out.fa[0][1].size(),
+                    allNames.sort(),
                     process.out.findAll { key, val -> key.startsWith("versions")}
                 ).match() }
             )
@@ -98,6 +158,7 @@ nextflow_process {
                     [ meta, gbk_files ]
                 }
                 input[1] = UNTAR_HMMDB.out.untar.map{ it -> it[1] }
+                input[2] = false
                 """
             }
         }

diff --git a/modules/nf-core/bigslice/tests/main.nf.test.snap b/modules/nf-core/bigslice/tests/main.nf.test.snap
@@ -1,4 +1,30 @@
 {
+    "streptomyces_coelicolor - bigslice - gbk - export_tsv": {
+        "content": [
+            [
+                "bgc_features_1.pkl",
+                "bgc_metadata.tsv",
+                "data.db",
+                "gcf_membership.tsv",
+                "gcf_models_1.pkl",
+                "run_metadata.tsv"
+            ],
+            {
+                "versions_bigslice": [
+                    [
+                        "BIGSLICE",
+                        "bigslice",
+                        "2.0.2"
+                    ]
+                ]
+            }
+        ],
+        "meta": {
+            "nf-test": "0.9.3",
+            "nextflow": "25.10.3"
+        },
+        "timestamp": "2026-04-03T01:11:26.257005672"
+    },
     "streptomyces_coelicolor - bigslice - gbk - stub": {
         "content": [
             {
@@ -7,16 +33,18 @@
                         {
                             "id": "test_gbk"
                         },
-                        "data.db:md5,d41d8cd98f00b204e9800998ecf8427e"
+                        [
+                            "data.db:md5,d41d8cd98f00b204e9800998ecf8427e",
+                            [
+                                [
+                                    "test.fa:md5,d41d8cd98f00b204e9800998ecf8427e"
+                                ]
+                            ]
+                        ]
                     ]
                 ],
                 "1": [
-                    [
-                        {
-                            "id": "test_gbk"
-                        },
-                        "test.fa:md5,d41d8cd98f00b204e9800998ecf8427e"
-                    ]
+
                 ],
                 "2": [
                     [
@@ -25,21 +53,23 @@
                         "2.0.2"
                     ]
                 ],
-                "db": [
+                "output": [
                     [
                         {
                             "id": "test_gbk"
                         },
-                        "data.db:md5,d41d8cd98f00b204e9800998ecf8427e"
+                        [
+                            "data.db:md5,d41d8cd98f00b204e9800998ecf8427e",
+                            [
+                                [
+                                    "test.fa:md5,d41d8cd98f00b204e9800998ecf8427e"
+                                ]
+                            ]
+                        ]
                     ]
                 ],
-                "fa": [
-                    [
-                        {
-                            "id": "test_gbk"
-                        },
-                        "test.fa:md5,d41d8cd98f00b204e9800998ecf8427e"
-                    ]
+                "tsv": [
+
                 ],
                 "versions_bigslice": [
                     [
@@ -63,12 +93,15 @@
             "nf-test": "0.9.3",
             "nextflow": "25.10.3"
         },
-        "timestamp": "2026-03-04T09:47:43.387153103"
+        "timestamp": "2026-04-02T22:45:23.737040708"
     },
     "streptomyces_coelicolor - bigslice - gbk": {
         "content": [
-            "data.db",
-            40,
+            [
+                "bgc_features_1.pkl",
+                "data.db",
+                "gcf_models_1.pkl"
+            ],
             {
                 "versions_bigslice": [
                     [
@@ -83,6 +116,6 @@
             "nf-test": "0.9.3",
             "nextflow": "25.10.3"
         },
-        "timestamp": "2026-03-04T09:47:30.918713387"
+        "timestamp": "2026-04-03T01:10:19.794409662"
     }
 }