Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
27 changes: 18 additions & 9 deletions modules/nf-core/bigslice/main.nf
Original file line number Diff line number Diff line change
Expand Up @@ -8,25 +8,28 @@ process BIGSLICE {
: 'biocontainers/bigslice:2.0.2--pyh8ed023e_0'}"

input:
tuple val(meta), path(bgc, stageAs: 'bgc_files/*')
path hmmdb
tuple val(meta), path(bgc, stageAs: 'bgc_files/s*/*')
path(hmmdb)
val(export_tsv)
Comment on lines +11 to +13
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can we put all these inputs into one tuple? That will make sure you're sure that EVERY time everything comes together in the right combination.

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

hmmdb is a shared reference database (not sample-specific) and export_tsv is a boolean flag (not a file), so neither belongs in the sample tuple. In nf-core, tuples group a meta map with the data files of that specific sample mixing in shared resources or behaviour flags would break this convention.


output:
tuple val(meta), path("${prefix}/result/data.db"), emit: db
tuple val(meta), path("${prefix}/result/tmp/**/*.fa"), emit: fa
tuple val(meta), path("${prefix}/result") , emit: output
tuple val(meta), path("${prefix}/result/tsv_export") , emit: tsv, optional: true
// WARN: Version information not provided by tool on CLI. Please update this string when bumping container versions.
tuple val("${task.process}"), val('bigslice'), val("2.0.2"), topic: versions, emit: versions_bigslice

when:
task.ext.when == null || task.ext.when

script:
def args = task.ext.args ?: ''
prefix = task.ext.prefix ?: "${meta.id}"
def args = task.ext.args ?: ''
def args2 = task.ext.args2 ?: ''
prefix = task.ext.prefix ?: "${meta.id}"
def sample = meta.id
def export_tsv_cmd = export_tsv ? "bigslice --export-tsv ${prefix}/result/tsv_export --program_db_folder ${hmmdb} ${args2} ${prefix}" : ''
"""
mkdir -p input/dataset/${sample} input/taxonomy
cp bgc_files/* input/dataset/${sample}/
find bgc_files -name '*.gbk' | xargs -I{} cp {} input/dataset/${sample}/

printf "# dataset_name\\tdataset_path\\ttaxonomy_path\\tdescription\\n" > input/datasets.tsv
printf "dataset\\tdataset\\ttaxonomy/taxonomy.tsv\\tBGC dataset\\n" >> input/datasets.tsv
Expand All @@ -39,16 +42,22 @@ process BIGSLICE {
-i input \\
--program_db_folder ${hmmdb} \\
${prefix}

${export_tsv_cmd}
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Should we add this tsv then as optional output and have it be accessible for downstream analyses?

"""

stub:
def args = task.ext.args ?: ''
prefix = task.ext.prefix ?: "${meta.id}"
def args = task.ext.args ?: ''
prefix = task.ext.prefix ?: "${meta.id}"
"""
echo ${args}

mkdir -p ${prefix}/result/tmp/2e555308dfc411186cf012334262f127
touch ${prefix}/result/data.db
touch ${prefix}/result/tmp/2e555308dfc411186cf012334262f127/test.fa
if ${export_tsv}; then
mkdir -p ${prefix}/result/tsv_export
touch ${prefix}/result/tsv_export/bgcs.tsv
fi
"""
}
63 changes: 33 additions & 30 deletions modules/nf-core/bigslice/meta.yml
Original file line number Diff line number Diff line change
Expand Up @@ -8,59 +8,62 @@ keywords:
- analysis
tools:
- "bigslice":
description: A highly scalable, user-interactive tool for the large scale analysis
of Biosynthetic Gene Clusters data
description: A highly scalable, user-interactive tool for the large scale
analysis of Biosynthetic Gene Clusters data
homepage: "https://github.com/medema-group/bigslice"
documentation: "https://github.com/medema-group/bigslice"
tool_dev_url: "https://github.com/medema-group/bigslice"
doi: "10.1093/gigascience/giaa154"
licence: ["AGPL v3-or-later"]
licence:
- "AGPL v3-or-later"
identifier: ""

input:
- - meta:
type: map
description: |
Groovy Map containing sample information
e.g. `[ id:'sample1' ]`
- bgc:
type: directory
type: file
description: |
Path to a folder containing genomic regions in GenBank format, structured for BiG-SLiCE.
Each genome should have its own subfolder with region `.gbk` files.
The folder should also contain a datasets.tsv, and a taxonomy folder, with TSV taxonomy files per dataset.
See the tool's wiki for more information: https://github.com/medema-group/bigslice/wiki/Input-folder
pattern: "*"
List of GenBank (.gbk) files containing genomic region annotations for BiG-SLiCE input.
Each file represents a BGC region. The module internally organises them into the required
BiG-SLiCE input folder structure (datasets.tsv and taxonomy TSV).
pattern: "*.gbk"
ontologies: []
- hmmdb:
type: directory
description: |
Path to the BiG-SLiCE HMM database folder containing biosynthetic and sub Pfams for annotation, in the required BiG-SLiCE format.
An example directory in compressed archive format can be found here: https://github.com/medema-group/bigslice/releases/download/v2.0.0rc/bigslice-models.2022-11-30.tar.gz

- export_tsv:
type: boolean
description: |
If true, runs a second BiG-SLiCE invocation to export all results from the SQLite database
to TSV files under `tsv_export/`. Additional arguments for this step can be passed via `task.ext.args2`.
output:
db:
output:
- - meta:
type: map
description: Groovy Map containing sample/dataset information
- ${prefix}/result/data.db:
type: file
- ${prefix}/result:
type: directory
description: |
The results SQLite database. Contains various tables relevant to result
BGCs, CDSs, GCFs, HMMs and HSPs.
pattern: "data.db"
ontologies:
- edam: "http://edamontology.org/format_3621" # SQLite format
fa:
BiG-SLiCE result directory containing the SQLite database (`data.db`),
predicted feature FASTA files (`tmp/**/*.fa`), and optionally TSV exports
(`tsv_export/`) when `export_tsv` is `true`.
pattern: "result"
tsv:
- - meta:
type: map
description: Groovy Map containing sample/dataset information
- ${prefix}/result/tmp/**/*.fa:
type: file
- ${prefix}/result/tsv_export:
type: directory
description: |
Predicted features as FASTA files. One file per hit HMM.
pattern: "*.fa"
ontologies:
- edam: "http://edamontology.org/format_1929" # FASTA
Directory containing TSV exports of all parsed BGC metadata, vectorized
features and clustering results. Only present when `export_tsv` input is
set to `true`.
pattern: "tsv_export"
versions_bigslice:
- - ${task.process}:
type: string
Expand All @@ -70,8 +73,7 @@ output:
description: The name of the tool
- 2.0.2:
type: string
description: The expression to obtain the version of the tool

description: The version of the tool
topics:
versions:
- - ${task.process}:
Expand All @@ -82,9 +84,10 @@ topics:
description: The name of the tool
- 2.0.2:
type: string
description: The expression to obtain the version of the tool

description: The version of the tool
authors:
- "@vagkaratzas"
- "@SkyLex"
maintainers:
- "@vagkaratzas"
- "@SkyLex"
65 changes: 63 additions & 2 deletions modules/nf-core/bigslice/tests/main.nf.test
Original file line number Diff line number Diff line change
Expand Up @@ -67,16 +67,76 @@ nextflow_process {
[ meta, gbk_files ]
}
input[1] = UNTAR_HMMDB.out.untar.map{ it -> it[1] }
input[2] = false
"""
}
}

then {
assert process.success
def resultDir = file(process.out.output[0][1])
def allNames = []
def tmpFaCount = 0
resultDir.eachFileRecurse { f ->
if (!f.isDirectory()) {
def rel = resultDir.toPath().relativize(f.toPath()).toString()
if (rel.startsWith('tmp/') || rel.startsWith('tmp\\')) {
if (f.name.endsWith('.fa')) tmpFaCount++
} else {
allNames.add(f.name)
}
}
}
assertAll(
{ assert resultDir.isDirectory() },
{ assert tmpFaCount > 0 },
{ assert snapshot(
allNames.sort(),
process.out.findAll { key, val -> key.startsWith("versions")}
).match() }
Comment on lines +91 to +96
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can you add other files to this snapshot as well? Ideally we want all outputs to be at least present by name in the snapshot.

)
}

}

test("streptomyces_coelicolor - bigslice - gbk - export_tsv") {

when {
process {
"""
// Flatten the GBK directory into a list of individual GBK files with meta
input[0] = UNTAR_GBK.out.untar.map { meta, dir ->
def gbk_files = []
dir.eachFileRecurse { if (it.name.endsWith('.gbk')) gbk_files << it }
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

[ meta, gbk_files ]
}
input[1] = UNTAR_HMMDB.out.untar.map{ it -> it[1] }
input[2] = true
"""
}
}

then {
assert process.success
def resultDir = file(process.out.output[0][1])
def allNames = []
def tmpFaCount = 0
resultDir.eachFileRecurse { f ->
if (!f.isDirectory()) {
def rel = resultDir.toPath().relativize(f.toPath()).toString()
if (rel.startsWith('tmp/') || rel.startsWith('tmp\\')) {
if (f.name.endsWith('.fa')) tmpFaCount++
} else {
allNames.add(f.name)
}
}
}
assertAll(
{ assert resultDir.isDirectory() },
{ assert tmpFaCount > 0 },
{ assert file(process.out.tsv[0][1]).isDirectory() },
{ assert snapshot(
file(process.out.db[0][1]).name,
process.out.fa[0][1].size(),
allNames.sort(),
process.out.findAll { key, val -> key.startsWith("versions")}
).match() }
)
Expand All @@ -98,6 +158,7 @@ nextflow_process {
[ meta, gbk_files ]
}
input[1] = UNTAR_HMMDB.out.untar.map{ it -> it[1] }
input[2] = false
"""
}
}
Expand Down
73 changes: 53 additions & 20 deletions modules/nf-core/bigslice/tests/main.nf.test.snap
Original file line number Diff line number Diff line change
@@ -1,4 +1,30 @@
{
"streptomyces_coelicolor - bigslice - gbk - export_tsv": {
"content": [
[
"bgc_features_1.pkl",
"bgc_metadata.tsv",
"data.db",
"gcf_membership.tsv",
"gcf_models_1.pkl",
"run_metadata.tsv"
],
{
"versions_bigslice": [
[
"BIGSLICE",
"bigslice",
"2.0.2"
]
]
}
],
"meta": {
"nf-test": "0.9.3",
"nextflow": "25.10.3"
},
"timestamp": "2026-04-03T01:11:26.257005672"
},
"streptomyces_coelicolor - bigslice - gbk - stub": {
"content": [
{
Expand All @@ -7,16 +33,18 @@
{
"id": "test_gbk"
},
"data.db:md5,d41d8cd98f00b204e9800998ecf8427e"
[
"data.db:md5,d41d8cd98f00b204e9800998ecf8427e",
[
[
"test.fa:md5,d41d8cd98f00b204e9800998ecf8427e"
]
]
]
]
],
"1": [
[
{
"id": "test_gbk"
},
"test.fa:md5,d41d8cd98f00b204e9800998ecf8427e"
]

],
"2": [
[
Expand All @@ -25,21 +53,23 @@
"2.0.2"
]
],
"db": [
"output": [
[
{
"id": "test_gbk"
},
"data.db:md5,d41d8cd98f00b204e9800998ecf8427e"
[
"data.db:md5,d41d8cd98f00b204e9800998ecf8427e",
[
[
"test.fa:md5,d41d8cd98f00b204e9800998ecf8427e"
]
]
]
]
],
"fa": [
[
{
"id": "test_gbk"
},
"test.fa:md5,d41d8cd98f00b204e9800998ecf8427e"
]
"tsv": [

],
"versions_bigslice": [
[
Expand All @@ -63,12 +93,15 @@
"nf-test": "0.9.3",
"nextflow": "25.10.3"
},
"timestamp": "2026-03-04T09:47:43.387153103"
"timestamp": "2026-04-02T22:45:23.737040708"
},
"streptomyces_coelicolor - bigslice - gbk": {
"content": [
"data.db",
40,
[
"bgc_features_1.pkl",
"data.db",
"gcf_models_1.pkl"
],
{
"versions_bigslice": [
[
Expand All @@ -83,6 +116,6 @@
"nf-test": "0.9.3",
"nextflow": "25.10.3"
},
"timestamp": "2026-03-04T09:47:30.918713387"
"timestamp": "2026-04-03T01:10:19.794409662"
}
}
Loading