diff --git a/modules/nf-core/krakenuniq/build/main.nf b/modules/nf-core/krakenuniq/build/main.nf index 7fe9120d1da7..6f1c8665904a 100644 --- a/modules/nf-core/krakenuniq/build/main.nf +++ b/modules/nf-core/krakenuniq/build/main.nf @@ -21,23 +21,30 @@ process KRAKENUNIQ_BUILD { script: def args = task.ext.args ?: '' prefix = task.ext.prefix ?: "${meta.id}" - custom_db = custom_library_dir ? "mkdir ${prefix} && mv library taxonomy ${custom_seqid2taxid} ${prefix}" : "" - run_cleanup = keep_intermediate ? "" : "find -L ${prefix} -type f -not -name \"*.kdb\" -type f -not -name \"*idx\" -not -name \"taxDB\" -not -name \"*.counts\" -delete" + def run_cleanup = keep_intermediate ? "" : """ + find -L ${prefix} -type f \\ + -not -name "*.kdb" \\ + -not -name "*idx" \\ + -not -name "taxDB" \\ + -not -name "*.counts" \\ + -delete && \\ + find -type d -empty -delete + """ """ - ${custom_db} + mkdir ${prefix} && cp -r library taxonomy ${custom_seqid2taxid} ${prefix} krakenuniq-build \\ ${args} \\ --threads ${task.cpus} \\ --db ${prefix} + rm -r ${prefix}/{library,taxonomy,${custom_seqid2taxid}} ${run_cleanup} """ stub: prefix = task.ext.prefix ?: "${meta.id}" - run_cleanup = keep_intermediate ? "" : "find -L ${prefix} -type f -not -name \"*.kdb\" -type f -not -name \"*idx\" -not -name \"taxDB\" -delete" """ mkdir ${prefix}/ touch ${prefix}/database-build.log diff --git a/modules/nf-core/krakenuniq/build/tests/main.nf.test b/modules/nf-core/krakenuniq/build/tests/main.nf.test index da55baf00ac4..45451c30d725 100644 --- a/modules/nf-core/krakenuniq/build/tests/main.nf.test +++ b/modules/nf-core/krakenuniq/build/tests/main.nf.test @@ -12,12 +12,12 @@ nextflow_process { tag "krakenuniq/download" setup { - run("KRAKENUNIQ_DOWNLOAD"){ + run("KRAKENUNIQ_DOWNLOAD") { script "../../download/main.nf" process { - """ - input[0] = 'taxonomy' - """ + """ + input[0] = 'taxonomy' + """ } } } @@ -31,13 +31,9 @@ nextflow_process { ch_seqid2mapid = Channel.fromPath(params.modules_testdata_base_path + 'genomics/sarscov2/metagenome/seqid2taxid.map', checkIfExists: true) ch_input = ch_fastas - .combine(KRAKENUNIQ_DOWNLOAD.out.output) - .combine(ch_seqid2mapid) - .map { - fna, tax, map -> - - [ [id:'test'] , fna, tax, map ] - } + .combine(KRAKENUNIQ_DOWNLOAD.out.output.map{ dir -> [ dir.listFiles().toList() ] }) + .combine(ch_seqid2mapid) + .map { fna, tax, map -> [ [id:'test'], fna, tax, map ] } input[0] = ch_input input[1] = false @@ -46,24 +42,34 @@ nextflow_process { } then { - def stablefiles = [] - file(process.out.db.get(0).get(1)).eachFileRecurse{ file -> if (!file.isDirectory() && !["database-build.log", "database.report.tsv", "timestamp", "taxdump.tar.gz", "names.dmp", "nodes.dmp"].find {file.toString().endsWith(it)}) {stablefiles.add(file)} } - def unstablefiles = [] - file(process.out.db.get(0).get(1)).eachFileRecurse{ file -> if (["database-build.log", "database.report.tsv", "timestamp", "taxdump.tar.gz", "names.dmp", "nodes.dmp"].find {file.toString().endsWith(it)}) {unstablefiles.add(file.getName().toString())} } + def inputs = ["timestamp", "taxdump.tar.gz", "names.dmp", "nodes.dmp"] + def unstableOutputs = [ + "database-build.log", // Every line has a timestamp + "database.report.tsv", // Timestamp and test filepath on first line + "taxDB" // Present species and species labels change: Probably because of KRAKENUNIQ_DOWNLOAD which fetches from NCBI. + ] + + def dbDir = file(process.out.db.get(0).get(1)) + def allFiles = [] + dbDir.eachFileRecurse { f -> if (!f.isDirectory()) allFiles << f } + + def stableFiles = allFiles.findAll { f -> !unstableOutputs.any { f.name.endsWith(it) } } + def unstableFiles = allFiles.findAll { f -> unstableOutputs.any { f.name.endsWith(it) } } + .collect { it.name } + assertAll( { assert process.success }, + { assert !allFiles.any { it.name in inputs } }, { assert snapshot( - stablefiles.sort(), - unstablefiles.sort(), - process.out.findAll { key, val -> key.startsWith('versions') } - ).match() - } + stableFiles.sort(), + unstableFiles.sort(), + process.out.findAll { key, val -> key.startsWith('versions') } + ).match() } ) } - } -test("sarscov2 - fasta - nocleanup") { + test("sarscov2 - fasta - nocleanup") { when { process { @@ -72,13 +78,9 @@ test("sarscov2 - fasta - nocleanup") { ch_seqid2mapid = Channel.fromPath(params.modules_testdata_base_path + 'genomics/sarscov2/metagenome/seqid2taxid.map', checkIfExists: true) ch_input = ch_fastas - .combine(KRAKENUNIQ_DOWNLOAD.out.output) - .combine(ch_seqid2mapid) - .map { - fna, tax, map -> - - [ [id:'test'] , fna, tax, map ] - } + .combine(KRAKENUNIQ_DOWNLOAD.out.output.map{ dir -> [ dir.listFiles().toList() ] }) + .combine(ch_seqid2mapid) + .map { fna, tax, map -> [ [id:'test'], fna, tax, map ] } input[0] = ch_input input[1] = true @@ -87,21 +89,31 @@ test("sarscov2 - fasta - nocleanup") { } then { - def stablefiles = [] - file(process.out.db.get(0).get(1)).eachFileRecurse{ file -> if (!file.isDirectory() && !["database-build.log", "database.report.tsv", "timestamp", "taxdump.tar.gz", "names.dmp", "nodes.dmp"].find {file.toString().endsWith(it)}) {stablefiles.add(file)} } - def unstablefiles = [] - file(process.out.db.get(0).get(1)).eachFileRecurse{ file -> if (["database-build.log", "database.report.tsv", "timestamp", "taxdump.tar.gz", "names.dmp", "nodes.dmp"].find {file.toString().endsWith(it)}) {unstablefiles.add(file.getName().toString())} } + def inputs = ["timestamp", "taxdump.tar.gz", "names.dmp", "nodes.dmp"] + def unstableOutputs = [ + "database-build.log", // Every line has a timestamp + "database.report.tsv", // Timestamp and test filepath on first line + "taxDB" // Present species and species labels change: Probably because of KRAKENUNIQ_DOWNLOAD which fetches from NCBI. + ] + + def dbDir = file(process.out.db.get(0).get(1)) + def allFiles = [] + dbDir.eachFileRecurse { f -> if (!f.isDirectory()) allFiles << f } + + def stableFiles = allFiles.findAll { f -> !unstableOutputs.any { f.name.endsWith(it) } } + def unstableFiles = allFiles.findAll { f -> unstableOutputs.any { f.name.endsWith(it) } } + .collect { it.name } + assertAll( { assert process.success }, + { assert !allFiles.any { it.name in inputs } }, { assert snapshot( - stablefiles.sort(), - unstablefiles.sort(), - process.out.findAll { key, val -> key.startsWith('versions') } - ).match() - } + stableFiles.sort(), + unstableFiles.sort(), + process.out.findAll { key, val -> key.startsWith('versions') } + ).match() } ) } - } test("sarscov2 - fasta - stub") { @@ -110,7 +122,7 @@ test("sarscov2 - fasta - nocleanup") { when { process { """ - input[0] = [[id:'test'], [] , [] , []] + input[0] = [[id:'test'], [], [], []] input[1] = false """ } @@ -120,7 +132,7 @@ test("sarscov2 - fasta - nocleanup") { assertAll( { assert process.success }, { assert snapshot(process.out).match() } - ) + ) } } } diff --git a/modules/nf-core/krakenuniq/build/tests/main.nf.test.snap b/modules/nf-core/krakenuniq/build/tests/main.nf.test.snap index cf902f8a687b..9310d953cb03 100644 --- a/modules/nf-core/krakenuniq/build/tests/main.nf.test.snap +++ b/modules/nf-core/krakenuniq/build/tests/main.nf.test.snap @@ -65,30 +65,12 @@ "database.kdb.counts:md5,a82149480a435210ec1e870f06b6cdb3", "database.kraken.tsv:md5,be9bd0cb3a8bea30989e65aa42a73727", "database0.kdb:md5,46542be19979e867075f9e455a314e00", - "library-files.txt:md5,8a9817d2de8465eb436aa3aa8696a717", - "genome.fasta:md5,6e9fe4042a72f2345f644f239272b7e6", - "seqid2taxid.map:md5,fe738f2333d57dde0db32239c04a93b8", - "taxDB:md5,2a6c6234f8f4edba2b94ec75ea9f1b4c", - "citations.dmp:md5,7b2e5d0fbea3da110c4a0e3bb7c3720d", - "division.dmp:md5,f93fc4c838fdaa4b8923f003e8c11712", - "gc.prt:md5,accfbf13bca0899e7ba9849195af241a", - "gencode.dmp:md5,a5f39fc7a6608775d4c62a96cf1d170c", - "images.dmp:md5,3855c67cfa06ef8ae713cab73f009e3d", - "merged.dmp:md5,5e78eb1c4a235889458ef9f83a64e81e", - "readme.txt:md5,051060a90b7b89ad8e0fa33fe280211c" + "library-files.txt:md5,8a9817d2de8465eb436aa3aa8696a717" ], [ - "database-build.log", "database-build.log", "database.report.tsv", - "delnodes.dmp", - "names.dmp", - "names.dmp", - "nodes.dmp", - "nodes.dmp", - "taxdump.tar.gz", - "taxdump.tar.gz", - "timestamp" + "taxDB" ], { "versions_krakenuniq": [ @@ -100,10 +82,10 @@ ] } ], - "timestamp": "2026-04-25T13:34:38.850675715", + "timestamp": "2026-05-13T14:59:07.323282", "meta": { "nf-test": "0.9.5", - "nextflow": "25.10.4" + "nextflow": "26.04.0" } }, "sarscov2 - fasta": { @@ -112,11 +94,10 @@ "database.idx:md5,f5298aa4215c956d2f737b5988721a24", "database.kdb:md5,46542be19979e867075f9e455a314e00", "database.kdb.counts:md5,a82149480a435210ec1e870f06b6cdb3", - "database0.kdb:md5,46542be19979e867075f9e455a314e00", - "taxDB:md5,2a6c6234f8f4edba2b94ec75ea9f1b4c" + "database0.kdb:md5,46542be19979e867075f9e455a314e00" ], [ - + "taxDB" ], { "versions_krakenuniq": [ @@ -128,10 +109,10 @@ ] } ], - "timestamp": "2026-04-25T13:33:43.689590026", + "timestamp": "2026-05-13T14:57:23.476447", "meta": { "nf-test": "0.9.5", - "nextflow": "25.10.4" + "nextflow": "26.04.0" } } } \ No newline at end of file