From f949409c5c2b930f6a89070c775d305022ed7126 Mon Sep 17 00:00:00 2001
From: KurayiChawatama <kurichawaz@gmail.com>
Date: Thu, 12 Mar 2026 10:40:49 +0300
Subject: [PATCH 01/28] Add scdblfinder module skeleton generated by nf-core
 tools

---
 modules/local/scdblfinder/environment.yml    | 10 +++
 modules/local/scdblfinder/main.nf            | 83 ++++++++++++++++++++
 modules/local/scdblfinder/meta.yml           | 77 ++++++++++++++++++
 modules/local/scdblfinder/tests/main.nf.test | 78 ++++++++++++++++++
 4 files changed, 248 insertions(+)
 create mode 100644 modules/local/scdblfinder/environment.yml
 create mode 100644 modules/local/scdblfinder/main.nf
 create mode 100644 modules/local/scdblfinder/meta.yml
 create mode 100644 modules/local/scdblfinder/tests/main.nf.test

diff --git a/modules/local/scdblfinder/environment.yml b/modules/local/scdblfinder/environment.yml
new file mode 100644
index 00000000..2dbc8004
--- /dev/null
+++ b/modules/local/scdblfinder/environment.yml
@@ -0,0 +1,10 @@
+---
+# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json
+channels:
+  - conda-forge
+  - bioconda
+dependencies:
+  # TODO nf-core: List required Conda package(s).
+  #               Software MUST be pinned to channel (i.e. "bioconda"), version (i.e. "1.10").
+  #               For Conda, the build (i.e. "h9402c20_2") must be EXCLUDED to support installation on different operating systems.
+  - "bioconda::bioconductor-scdblfinder=1.24.0"
diff --git a/modules/local/scdblfinder/main.nf b/modules/local/scdblfinder/main.nf
new file mode 100644
index 00000000..8b093cd4
--- /dev/null
+++ b/modules/local/scdblfinder/main.nf
@@ -0,0 +1,83 @@
+// TODO nf-core: If in doubt look at other nf-core/modules to see how we are doing things! :)
+//               https://github.com/nf-core/modules/tree/master/modules/nf-core/
+//               You can also ask for help via your pull request or on the #modules channel on the nf-core Slack workspace:
+//               https://nf-co.re/join
+// TODO nf-core: A module file SHOULD only define input and output files as command-line parameters.
+//               All other parameters MUST be provided using the "task.ext" directive, see here:
+//               https://www.nextflow.io/docs/latest/process.html#ext
+//               where "task.ext" is a string.
+//               Any parameters that need to be evaluated in the context of a particular sample
+//               e.g. single-end/paired-end data MUST also be defined and evaluated appropriately.
+// TODO nf-core: Software that can be piped together SHOULD be added to separate module files
+//               unless there is a run-time, storage advantage in implementing in this way
+//               e.g. it's ok to have a single module for bwa to output BAM instead of SAM:
+//                 bwa mem | samtools view -B -T ref.fasta
+// TODO nf-core: Optional inputs are not currently supported by Nextflow. However, using an empty
+//               list (`[]`) instead of a file can be used to work around this issue.
+
+process SCDBLFINDER {
+    tag "$meta.id"
+    label 'process_medium'
+
+    // TODO nf-core: See section in main README for further information regarding finding and adding container addresses to the section below.
+    conda "${moduleDir}/environment.yml"
+    container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
+        'https://depot.galaxyproject.org/singularity/YOUR-TOOL-HERE':
+        'biocontainers/YOUR-TOOL-HERE' }"
+
+    input:// TODO nf-core: Where applicable all sample-specific information e.g. "id", "single_end", "read_group"
+    //               MUST be provided as an input via a Groovy Map called "meta".
+    //               This information may not be required in some instances e.g. indexing reference genome files:
+    //               https://github.com/nf-core/modules/blob/master/modules/nf-core/bwa/index/main.nf
+    // TODO nf-core: Where applicable please provide/convert compressed files as input/output
+    //               e.g. "*.fastq.gz" and NOT "*.fastq", "*.bam" and NOT "*.sam" etc.
+    tuple val(meta), path(bam)
+
+    output:
+    // TODO nf-core: Named file extensions MUST be emitted for ALL output channels
+    tuple val(meta), path("*.bam"), emit: bam
+    // TODO nf-core: List additional required output channels/values here
+    // TODO nf-core: Update the command here to obtain the version number of the software used in this module
+    // TODO nf-core: If multiple software packages are used in this module, all MUST be added here
+    //               by copying the line below and replacing the current tool with the extra tool(s)
+    tuple val("${task.process}"), val('scdblfinder'), eval("scdblfinder --version"), topic: versions, emit: versions_scdblfinder
+
+    when:
+    task.ext.when == null || task.ext.when
+
+    script:
+    def args = task.ext.args ?: ''
+    def prefix = task.ext.prefix ?: "${meta.id}"
+    // TODO nf-core: Where possible, a command MUST be provided to obtain the version number of the software e.g. 1.10
+    //               If the software is unable to output a version number on the command-line then it can be manually specified
+    //               e.g. https://github.com/nf-core/modules/blob/master/modules/nf-core/homer/annotatepeaks/main.nf
+    //               Each software used MUST provide the software name and version number in the YAML version file (versions.yml)
+    // TODO nf-core: It MUST be possible to pass additional parameters to the tool as a command-line string via the "task.ext.args" directive
+    // TODO nf-core: If the tool supports multi-threading then you MUST provide the appropriate parameter
+    //               using the Nextflow "task" variable e.g. "--threads $task.cpus"
+    // TODO nf-core: Please replace the example samtools command below with your module's command
+    // TODO nf-core: Please indent the command appropriately (4 spaces!!) to help with readability ;)
+    """
+    scdblfinder \\
+        $args \\
+        -@ $task.cpus \\
+        -o ${prefix}.bam \\
+        $bam
+    """
+
+    stub:
+    def args = task.ext.args ?: ''
+    def prefix = task.ext.prefix ?: "${meta.id}"
+    // TODO nf-core: A stub section should mimic the execution of the original module as best as possible
+    //               Have a look at the following examples:
+    //               Simple example: https://github.com/nf-core/modules/blob/818474a292b4860ae8ff88e149fbcda68814114d/modules/nf-core/bcftools/annotate/main.nf#L47-L63
+    //               Complex example: https://github.com/nf-core/modules/blob/818474a292b4860ae8ff88e149fbcda68814114d/modules/nf-core/bedtools/split/main.nf#L38-L54
+    // TODO nf-core: If the module doesn't use arguments ($args), you SHOULD remove:
+    //               - The definition of args `def args = task.ext.args ?: ''` above.
+    //               - The use of the variable in the script `echo $args ` below.
+    """
+    echo $args
+    
+    touch ${prefix}.bam
+    """
+}
diff --git a/modules/local/scdblfinder/meta.yml b/modules/local/scdblfinder/meta.yml
new file mode 100644
index 00000000..b621994a
--- /dev/null
+++ b/modules/local/scdblfinder/meta.yml
@@ -0,0 +1,77 @@
+# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/meta-schema.json
+# # TODO nf-core: Add a description of the module and list keywords
+name: "scdblfinder"
+description: write your description here
+keywords:
+  - sort
+  - example
+  - genomics
+tools:
+  ## TODO nf-core: Add a description and other details for the software below
+  - "scdblfinder":
+      description: "scDblFinder"
+      homepage: "None"
+      documentation: "None"
+      tool_dev_url: "None"
+      doi: ""
+      licence: ["GPL v3 + file LICENSE"]
+      identifier: null
+
+input:
+  ### TODO nf-core: Add a description of all of the variables used as input
+  - - meta:
+        type: map
+        description: |
+          Groovy Map containing sample information
+          e.g. `[ id:'sample1' ]`
+    - bam:
+        type: file
+        description: Sorted BAM/CRAM/SAM file
+        pattern: "*.{bam,cram,sam}"
+        ontologies:
+          - edam: "http://edamontology.org/format_2572" # BAM
+          - edam: "http://edamontology.org/format_2573" # CRAM
+          - edam: "http://edamontology.org/format_3462" # SAM
+
+output:
+  ### TODO nf-core: Add a description of all of the variables used as output
+  bam:
+    - - meta:
+          type: map
+          description: |
+            Groovy Map containing sample information
+            e.g. `[ id:'sample1' ]`
+      - "*.bam":
+          type: file
+          description: Sorted BAM/CRAM/SAM file
+          pattern: "*.{bam,cram,sam}"
+          ontologies:
+            - edam: "http://edamontology.org/format_2572" # BAM
+            - edam: "http://edamontology.org/format_2573" # CRAM
+            - edam: "http://edamontology.org/format_3462" # SAM
+  versions_scdblfinder:
+    - - "${task.process}":
+          type: string
+          description: The name of the process
+      - "scdblfinder":
+          type: string
+          description: The name of the tool
+      - "scdblfinder --version":
+          type: eval
+          description: The expression to obtain the version of the tool
+
+topics:
+  versions:
+    - - ${task.process}:
+          type: string
+          description: The name of the process
+      - scdblfinder:
+          type: string
+          description: The name of the tool
+      - scdblfinder --version:
+          type: eval
+          description: The expression to obtain the version of the tool
+authors:
+  - "@KurayiChawatama"
+maintainers:
+  - "@KurayiChawatama"
diff --git a/modules/local/scdblfinder/tests/main.nf.test b/modules/local/scdblfinder/tests/main.nf.test
new file mode 100644
index 00000000..c4625e94
--- /dev/null
+++ b/modules/local/scdblfinder/tests/main.nf.test
@@ -0,0 +1,78 @@
+// TODO nf-core: Once you have added the required tests, please run the following command to build this file:
+// nf-core modules test scdblfinder
+nextflow_process {
+
+    name "Test Process SCDBLFINDER"
+    script "../main.nf"
+    process "SCDBLFINDER"
+
+    tag "modules"
+    tag "modules_"
+    tag "scdblfinder"
+
+    // TODO nf-core: Change the test name preferably indicating the test-data and file-format used
+    test("sarscov2 - bam") {
+
+        // TODO nf-core: If you are created a test for a chained module
+        // (the module requires running more than one process to generate the required output)
+        // add the 'setup' method here.
+        // You can find more information about how to use a 'setup' method in the docs (https://nf-co.re/docs/contributing/modules#steps-for-creating-nf-test-for-chained-modules).
+
+        when {
+            process {
+                """
+                // TODO nf-core: define inputs of the process here. Example:
+                
+                input[0] = [
+                    [ id:'test' ],
+                    file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam', checkIfExists: true),
+                ]
+                """
+            }
+        }
+
+        then {
+            assert process.success
+            assertAll(
+                { assert snapshot(
+                    process.out,
+                    path(process.out.versions[0]).yaml
+                ).match() }
+                //TODO nf-core: Add all required assertions to verify the test output.
+                // See https://nf-co.re/docs/contributing/tutorials/nf-test_assertions for more information and examples.
+            )
+        }
+
+    }
+
+    // TODO nf-core: Change the test name preferably indicating the test-data and file-format used but keep the " - stub" suffix.
+    test("sarscov2 - bam - stub") {
+
+        options "-stub"
+
+        when {
+            process {
+                """
+                // TODO nf-core: define inputs of the process here. Example:
+                
+                input[0] = [
+                    [ id:'test' ],
+                    file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam', checkIfExists: true),
+                ]
+                """
+            }
+        }
+
+        then {
+            assert process.success
+            assertAll(
+                { assert snapshot(
+                    process.out,
+                    path(process.out.versions[0]).yaml
+                ).match() }
+            )
+        }
+
+    }
+
+}

From 0c1c2ee6fb38f752164008b47cad27c4b1d13fce Mon Sep 17 00:00:00 2001
From: KurayiChawatama <kurichawaz@gmail.com>
Date: Thu, 12 Mar 2026 11:09:07 +0300
Subject: [PATCH 02/28] Fix scdblfinder: remove mockDoubletSCE and use real SCE
 object directly

---
 modules/local/scdblfinder/environment.yml     |  7 +-
 modules/local/scdblfinder/main.nf             | 60 +++---------
 modules/local/scdblfinder/meta.yml            | 89 ++++++++---------
 .../local/scdblfinder/templates/scdblfinder.R | 95 +++++++++++++++++++
 modules/local/scdblfinder/tests/main.nf.test  | 38 +++-----
 5 files changed, 164 insertions(+), 125 deletions(-)
 create mode 100644 modules/local/scdblfinder/templates/scdblfinder.R

diff --git a/modules/local/scdblfinder/environment.yml b/modules/local/scdblfinder/environment.yml
index 2dbc8004..509ce20c 100644
--- a/modules/local/scdblfinder/environment.yml
+++ b/modules/local/scdblfinder/environment.yml
@@ -4,7 +4,8 @@ channels:
   - conda-forge
   - bioconda
 dependencies:
-  # TODO nf-core: List required Conda package(s).
-  #               Software MUST be pinned to channel (i.e. "bioconda"), version (i.e. "1.10").
-  #               For Conda, the build (i.e. "h9402c20_2") must be EXCLUDED to support installation on different operating systems.
   - "bioconda::bioconductor-scdblfinder=1.24.0"
+  - "bioconda::bioconductor-singlecellexperiment=1.26.0"
+  - "bioconda::bioconductor-biocparallel=1.38.0"
+  - "conda-forge::r-anndatar=0.3.2"
+  - "conda-forge::r-tidyverse=2.0.0"
diff --git a/modules/local/scdblfinder/main.nf b/modules/local/scdblfinder/main.nf
index 8b093cd4..a0903896 100644
--- a/modules/local/scdblfinder/main.nf
+++ b/modules/local/scdblfinder/main.nf
@@ -19,65 +19,31 @@ process SCDBLFINDER {
     tag "$meta.id"
     label 'process_medium'
 
-    // TODO nf-core: See section in main README for further information regarding finding and adding container addresses to the section below.
     conda "${moduleDir}/environment.yml"
     container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
-        'https://depot.galaxyproject.org/singularity/YOUR-TOOL-HERE':
-        'biocontainers/YOUR-TOOL-HERE' }"
+        'https://depot.galaxyproject.org/singularity/mulled-v2-d8c5d0c7834f29eb8adde3fe8c4e9b6fbf89db2f:9fecf4e535ec29b85ab3c03bd26e5cca8e7d29a9-0' :
+        'quay.io/biocontainers/mulled-v2-d8c5d0c7834f29eb8adde3fe8c4e9b6fbf89db2f:9fecf4e535ec29b85ab3c03bd26e5cca8e7d29a9-0' }"
 
-    input:// TODO nf-core: Where applicable all sample-specific information e.g. "id", "single_end", "read_group"
-    //               MUST be provided as an input via a Groovy Map called "meta".
-    //               This information may not be required in some instances e.g. indexing reference genome files:
-    //               https://github.com/nf-core/modules/blob/master/modules/nf-core/bwa/index/main.nf
-    // TODO nf-core: Where applicable please provide/convert compressed files as input/output
-    //               e.g. "*.fastq.gz" and NOT "*.fastq", "*.bam" and NOT "*.sam" etc.
-    tuple val(meta), path(bam)
+    input:
+    tuple val(meta), path(h5ad)
 
     output:
-    // TODO nf-core: Named file extensions MUST be emitted for ALL output channels
-    tuple val(meta), path("*.bam"), emit: bam
-    // TODO nf-core: List additional required output channels/values here
-    // TODO nf-core: Update the command here to obtain the version number of the software used in this module
-    // TODO nf-core: If multiple software packages are used in this module, all MUST be added here
-    //               by copying the line below and replacing the current tool with the extra tool(s)
-    tuple val("${task.process}"), val('scdblfinder'), eval("scdblfinder --version"), topic: versions, emit: versions_scdblfinder
+    tuple val(meta), path("${prefix}.h5ad"), emit: h5ad
+    tuple val(meta), path("${prefix}.csv"), emit: predictions
+    path "versions.yml", emit: versions
 
     when:
     task.ext.when == null || task.ext.when
 
     script:
-    def args = task.ext.args ?: ''
-    def prefix = task.ext.prefix ?: "${meta.id}"
-    // TODO nf-core: Where possible, a command MUST be provided to obtain the version number of the software e.g. 1.10
-    //               If the software is unable to output a version number on the command-line then it can be manually specified
-    //               e.g. https://github.com/nf-core/modules/blob/master/modules/nf-core/homer/annotatepeaks/main.nf
-    //               Each software used MUST provide the software name and version number in the YAML version file (versions.yml)
-    // TODO nf-core: It MUST be possible to pass additional parameters to the tool as a command-line string via the "task.ext.args" directive
-    // TODO nf-core: If the tool supports multi-threading then you MUST provide the appropriate parameter
-    //               using the Nextflow "task" variable e.g. "--threads $task.cpus"
-    // TODO nf-core: Please replace the example samtools command below with your module's command
-    // TODO nf-core: Please indent the command appropriately (4 spaces!!) to help with readability ;)
-    """
-    scdblfinder \\
-        $args \\
-        -@ $task.cpus \\
-        -o ${prefix}.bam \\
-        $bam
-    """
+    prefix = task.ext.prefix ?: "${meta.id}"
+    template('scdblfinder.R')
 
     stub:
-    def args = task.ext.args ?: ''
-    def prefix = task.ext.prefix ?: "${meta.id}"
-    // TODO nf-core: A stub section should mimic the execution of the original module as best as possible
-    //               Have a look at the following examples:
-    //               Simple example: https://github.com/nf-core/modules/blob/818474a292b4860ae8ff88e149fbcda68814114d/modules/nf-core/bcftools/annotate/main.nf#L47-L63
-    //               Complex example: https://github.com/nf-core/modules/blob/818474a292b4860ae8ff88e149fbcda68814114d/modules/nf-core/bedtools/split/main.nf#L38-L54
-    // TODO nf-core: If the module doesn't use arguments ($args), you SHOULD remove:
-    //               - The definition of args `def args = task.ext.args ?: ''` above.
-    //               - The use of the variable in the script `echo $args ` below.
+    prefix = task.ext.prefix ?: "${meta.id}"
     """
-    echo $args
-    
-    touch ${prefix}.bam
+    touch ${prefix}.h5ad
+    touch ${prefix}.csv
+    touch versions.yml
     """
 }
diff --git a/modules/local/scdblfinder/meta.yml b/modules/local/scdblfinder/meta.yml
index b621994a..367e73c5 100644
--- a/modules/local/scdblfinder/meta.yml
+++ b/modules/local/scdblfinder/meta.yml
@@ -1,76 +1,67 @@
 # yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/meta-schema.json
-# # TODO nf-core: Add a description of the module and list keywords
 name: "scdblfinder"
-description: write your description here
+description: Detect doublets in single-cell RNA-seq data using scDblFinder
 keywords:
-  - sort
-  - example
-  - genomics
+  - doublet-detection
+  - single-cell
+  - scrnaseq
+  - quality-control
 tools:
-  ## TODO nf-core: Add a description and other details for the software below
   - "scdblfinder":
-      description: "scDblFinder"
-      homepage: "None"
-      documentation: "None"
-      tool_dev_url: "None"
-      doi: ""
-      licence: ["GPL v3 + file LICENSE"]
-      identifier: null
+      description: "scDblFinder: Computational identification of doublets in single-cell transcriptomics data"
+      homepage: "https://bioconductor.org/packages/scDblFinder"
+      documentation: "https://bioconductor.org/packages/release/bioc/vignettes/scDblFinder/inst/doc/scDblFinder.html"
+      tool_dev_url: "https://github.com/plger/scDblFinder"
+      doi: "10.12688/f1000research.73600.2"
+      licence: ["GPL-3.0"]
+      identifier: biotools:scdblfinder
 
 input:
-  ### TODO nf-core: Add a description of all of the variables used as input
   - - meta:
         type: map
         description: |
           Groovy Map containing sample information
           e.g. `[ id:'sample1' ]`
-    - bam:
+    - h5ad:
         type: file
-        description: Sorted BAM/CRAM/SAM file
-        pattern: "*.{bam,cram,sam}"
+        description: AnnData object in h5ad format
+        pattern: "*.{h5ad}"
         ontologies:
-          - edam: "http://edamontology.org/format_2572" # BAM
-          - edam: "http://edamontology.org/format_2573" # CRAM
-          - edam: "http://edamontology.org/format_3462" # SAM
+          - edam: "http://edamontology.org/format_3590" # HDF5 format
 
 output:
-  ### TODO nf-core: Add a description of all of the variables used as output
-  bam:
+  h5ad:
     - - meta:
           type: map
           description: |
             Groovy Map containing sample information
             e.g. `[ id:'sample1' ]`
-      - "*.bam":
+      - "*.h5ad":
           type: file
-          description: Sorted BAM/CRAM/SAM file
-          pattern: "*.{bam,cram,sam}"
+          description: AnnData object with doublet annotations
+          pattern: "*.h5ad"
           ontologies:
-            - edam: "http://edamontology.org/format_2572" # BAM
-            - edam: "http://edamontology.org/format_2573" # CRAM
-            - edam: "http://edamontology.org/format_3462" # SAM
-  versions_scdblfinder:
-    - - "${task.process}":
-          type: string
-          description: The name of the process
-      - "scdblfinder":
-          type: string
-          description: The name of the tool
-      - "scdblfinder --version":
-          type: eval
-          description: The expression to obtain the version of the tool
-
-topics:
+            - edam: "http://edamontology.org/format_3590" # HDF5 format
+  predictions:
+    - - meta:
+          type: map
+          description: |
+            Groovy Map containing sample information
+            e.g. `[ id:'sample1' ]`
+      - "*.csv":
+          type: file
+          description: CSV file containing doublet predictions (boolean)
+          pattern: "*.csv"
+          ontologies:
+            - edam: "http://edamontology.org/format_3752" # CSV
   versions:
-    - - ${task.process}:
-          type: string
-          description: The name of the process
-      - scdblfinder:
-          type: string
-          description: The name of the tool
-      - scdblfinder --version:
-          type: eval
-          description: The expression to obtain the version of the tool
+    - versions.yml:
+        type: file
+        description: File containing software versions
+        pattern: "versions.yml"
+        ontologies:
+          - edam: http://edamontology.org/format_3750 # YAML
+
 authors:
   - "@KurayiChawatama"
 maintainers:
diff --git a/modules/local/scdblfinder/templates/scdblfinder.R b/modules/local/scdblfinder/templates/scdblfinder.R
new file mode 100644
index 00000000..8f83ec99
--- /dev/null
+++ b/modules/local/scdblfinder/templates/scdblfinder.R
@@ -0,0 +1,95 @@
+#!/usr/bin/env Rscript
+
+library(scDblFinder)
+library(tidyverse)
+library(SingleCellExperiment)
+library(BiocParallel)
+library(anndataR)
+
+adata <- read_h5ad("${h5ad}")
+sce <- adata$as_SingleCellExperiment()
+
+# Set the param to a specified RNG seed for reproducibility
+bp <- MulticoreParam(workers = multicoreWorkers(), RNGseed=123)
+
+
+# 10 Genomics Doublet Rate calculator used to get multiplet rate if not provided
+# 10X multiplet rate table(https://rpubs.com/kenneditodd/doublet_finder_example)
+multiplet_rates_10x <- data.frame(
+  "Multiplet_rate" = c(0.004, 0.008, 0.0160, 0.023, 0.031,
+                        0.039, 0.046, 0.054, 0.061, 0.069, 0.076),
+  "Loaded_cells" = c(800, 1600, 3200, 4800, 6400, 8000, 9600,
+                     11200, 12800, 14400, 16000),
+  "Recovered_cells" = c(500, 1000, 2000, 3000, 4000, 5000, 6000,
+                        7000, 8000, 9000, 10000)
+)
+
+# Adjust to use the number of cells in the SCE object
+multiplet_rate <- multiplet_rates_10x %>%
+  dplyr::filter(Recovered_cells < ncol(sce)) %>%
+  dplyr::slice(which.max(Recovered_cells)) %>%
+  dplyr::pull(Multiplet_rate) %>%
+  as.numeric()
+
+message(paste0("Setting multiplet rate to ", multiplet_rate, " for ", ncol(sce), " cells"))
+
+# Run scDblFinder on the REAL data (not mock data!)
+# scDblFinder creates artificial doublets internally
+set.seed(123)
+sce <- scDblFinder(
+    sce,
+    BPPARAM = bp,
+    dbr = multiplet_rate,
+    artificialDoublets = ncol(sce)
+)
+
+# Generate a summary table
+message("scDblFinder results summary:")
+print(table(sce\$scDblFinder.class))
+
+# Rename scDblFinder.* columns for consistency with other doublet methods
+scdbl_cols <- grep("^scDblFinder\\\\.", colnames(colData(sce)), value = TRUE)
+new_scdbl_cols <- paste0("scdblfinder_", gsub("^scDblFinder\\\\.", "", gsub("\\\\.", "_", scdbl_cols)))
+
+# Rename columns in colData(sce)
+for (i in seq_along(scdbl_cols)) {
+  colData(sce)[[new_scdbl_cols[i]]] <- colData(sce)[[scdbl_cols[i]]]
+  colData(sce)[[scdbl_cols[i]]] <- NULL  # Remove the original column
+}
+
+# Convert back to AnnData and save
+adata_processed <- as_AnnData(sce)
+write_h5ad(adata_processed, "${prefix}.h5ad")
+
+# Extract predictions for doublet removal step
+# Create a binary doublet call based on class
+predictions <- data.frame(
+    doublet = colData(sce)\$scdblfinder_class == "doublet",
+    row.names = colnames(sce)
+)
+colnames(predictions) <- "${prefix}"
+
+# Save predictions to CSV
+write.csv(predictions, "${prefix}.csv")
+
+################################################
+################################################
+## VERSIONS FILE                              ##
+################################################
+################################################
+
+r.version <- strsplit(version[['version.string']], ' ')[[1]][3]
+scDblFinder.version <- as.character(packageVersion('scDblFinder'))
+
+writeLines(
+    c(
+        '"${task.process}":',
+        paste('    R:', r.version),
+        paste('    scDblFinder:', scDblFinder.version)
+    ),
+'versions.yml')
+
+################################################
+################################################
+################################################
+################################################
diff --git a/modules/local/scdblfinder/tests/main.nf.test b/modules/local/scdblfinder/tests/main.nf.test
index c4625e94..c0168559 100644
--- a/modules/local/scdblfinder/tests/main.nf.test
+++ b/modules/local/scdblfinder/tests/main.nf.test
@@ -1,5 +1,3 @@
-// TODO nf-core: Once you have added the required tests, please run the following command to build this file:
-// nf-core modules test scdblfinder
 nextflow_process {
 
     name "Test Process SCDBLFINDER"
@@ -7,25 +5,17 @@ nextflow_process {
     process "SCDBLFINDER"
 
     tag "modules"
-    tag "modules_"
+    tag "modules_local"
     tag "scdblfinder"
 
-    // TODO nf-core: Change the test name preferably indicating the test-data and file-format used
-    test("sarscov2 - bam") {
-
-        // TODO nf-core: If you are created a test for a chained module
-        // (the module requires running more than one process to generate the required output)
-        // add the 'setup' method here.
-        // You can find more information about how to use a 'setup' method in the docs (https://nf-co.re/docs/contributing/modules#steps-for-creating-nf-test-for-chained-modules).
+    test("homo_sapiens - h5ad") {
 
         when {
             process {
                 """
-                // TODO nf-core: define inputs of the process here. Example:
-                
                 input[0] = [
                     [ id:'test' ],
-                    file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam', checkIfExists: true),
+                    file(params.modules_testdata_base_path + 'genomics/homo_sapiens/scrnaseq/h5ad/SRR28679759_filtered_matrix.h5ad', checkIfExists: true)
                 ]
                 """
             }
@@ -35,29 +25,28 @@ nextflow_process {
             assert process.success
             assertAll(
                 { assert snapshot(
-                    process.out,
-                    path(process.out.versions[0]).yaml
+                    process.out.versions,
+                    process.out.predictions,
+                    // Hashing does not work due to this issue:
+                    // https://github.com/scverse/anndataR/issues/272
+                    file(process.out.h5ad.get(0).get(1)).exists(),
+                    file(process.out.h5ad.get(0).get(1)).size()
                 ).match() }
-                //TODO nf-core: Add all required assertions to verify the test output.
-                // See https://nf-co.re/docs/contributing/tutorials/nf-test_assertions for more information and examples.
             )
         }
 
     }
 
-    // TODO nf-core: Change the test name preferably indicating the test-data and file-format used but keep the " - stub" suffix.
-    test("sarscov2 - bam - stub") {
+    test("homo_sapiens - h5ad - stub") {
 
         options "-stub"
 
         when {
             process {
                 """
-                // TODO nf-core: define inputs of the process here. Example:
-                
                 input[0] = [
                     [ id:'test' ],
-                    file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam', checkIfExists: true),
+                    file(params.modules_testdata_base_path + 'genomics/homo_sapiens/scrnaseq/h5ad/SRR28679759_filtered_matrix.h5ad', checkIfExists: true)
                 ]
                 """
             }
@@ -66,10 +55,7 @@ nextflow_process {
         then {
             assert process.success
             assertAll(
-                { assert snapshot(
-                    process.out,
-                    path(process.out.versions[0]).yaml
-                ).match() }
+                { assert snapshot(process.out).match() }
             )
         }
 

From 8ba85f78cbf888f35c237c3888e307f07d5867cb Mon Sep 17 00:00:00 2001
From: KurayiChawatama <kurichawaz@gmail.com>
Date: Thu, 12 Mar 2026 11:21:20 +0300
Subject: [PATCH 03/28] Integrate scdblfinder into pipeline configuration and
 tests

---
 conf/modules.config                          | 10 ++++++++++
 conf/test.config                             |  2 +-
 conf/test_full.config                        |  2 +-
 nextflow_schema.json                         |  4 ++--
 subworkflows/local/doublet_detection/main.nf |  9 +++++++++
 5 files changed, 23 insertions(+), 4 deletions(-)

diff --git a/conf/modules.config b/conf/modules.config
index beae2c2f..0504d6db 100644
--- a/conf/modules.config
+++ b/conf/modules.config
@@ -213,6 +213,16 @@ process {
         ]
     }
 
+    withName: SCDBLFINDER {
+        ext.prefix = { meta.id + '_scdblfinder' }
+        publishDir = [
+            path: { "${params.outdir}/quality_control/doublet_detection/scdblfinder" },
+            mode: params.publish_dir_mode,
+            enabled: params.save_intermediates,
+            saveAs: { filename -> filename.equals('versions.yml') ? null : filename },
+        ]
+    }
+
     withName: DOUBLET_REMOVAL {
         publishDir = [
             path: { "${params.outdir}/quality_control/doublet_detection" },
diff --git a/conf/test.config b/conf/test.config
index 189363f9..c58705ae 100644
--- a/conf/test.config
+++ b/conf/test.config
@@ -25,7 +25,7 @@ params {
     // Input data
     input               = params.pipelines_testdata_base_path + 'samplesheet.csv'
     integration_methods = 'scvi,harmony,bbknn,combat'
-    doublet_detection   = 'solo,scrublet,scds'
+    doublet_detection   = 'solo,scrublet,scds,scdblfinder'
     celltypist_model    = 'Adult_Human_Skin'
     celldex_reference   = 'https://raw.githubusercontent.com/nf-core/test-datasets/scdownstream/singleR/references.csv'
     integration_hvgs    = 500
diff --git a/conf/test_full.config b/conf/test_full.config
index 8262e5bf..7d64f3f0 100644
--- a/conf/test_full.config
+++ b/conf/test_full.config
@@ -25,7 +25,7 @@ params {
     // Input data for full size test
     input               = params.pipelines_testdata_base_path + 'samplesheet.csv'
     integration_methods = 'scvi,harmony,bbknn,combat'
-    doublet_detection   = 'solo,scrublet,doubletdetection,scds'
+    doublet_detection   = 'solo,scrublet,doubletdetection,scds,scdblfinder'
     celltypist_model    = 'Adult_Human_Skin'
     celldex_reference   = 'hpca__2024-02-26,monaco_immune__2024-02-26' // Feature: Support offline.
     celldex_reference_label = 'label.main,label.fine'
diff --git a/nextflow_schema.json b/nextflow_schema.json
index 5157e224..2233d8a6 100644
--- a/nextflow_schema.json
+++ b/nextflow_schema.json
@@ -105,8 +105,8 @@
                     "type": "string",
                     "default": "scrublet",
                     "description": "Specify the tools to use for doublet detection. Setting to 'none' will skip this step",
-                    "help_text": "If you want to use multiple tools, separate them with a comma. Available methods are: solo, scrublet, doubletdetection, scds",
-                    "pattern": "^(none|((solo|scrublet|doubletdetection|scds)?,?)*[^,]+$)"
+                    "help_text": "If you want to use multiple tools, separate them with a comma. Available methods are: solo, scrublet, doubletdetection, scds, scdblfinder",
+                    "pattern": "^(none|((solo|scrublet|doubletdetection|scds|scdblfinder)?,?)*[^,]+$)"
                 },
                 "doublet_detection_threshold": {
                     "type": "integer",
diff --git a/subworkflows/local/doublet_detection/main.nf b/subworkflows/local/doublet_detection/main.nf
index c7371ff3..110a3c4a 100644
--- a/subworkflows/local/doublet_detection/main.nf
+++ b/subworkflows/local/doublet_detection/main.nf
@@ -2,6 +2,7 @@ include { SCVITOOLS_SOLO   } from '../../../modules/nf-core/scvitools/solo'
 include { SCANPY_SCRUBLET  } from '../../../modules/nf-core/scanpy/scrublet'
 include { DOUBLETDETECTION } from '../../../modules/nf-core/doubletdetection'
 include { SCDS             } from '../../../modules/local/doublet_detection/scds'
+include { SCDBLFINDER      } from '../../../modules/local/scdblfinder'
 include { DOUBLET_REMOVAL  } from '../../../modules/local/doublet_detection/doublet_removal'
 
 workflow DOUBLET_DETECTION {
@@ -56,6 +57,14 @@ workflow DOUBLET_DETECTION {
             ch_versions = DOUBLETDETECTION.out.versions
         }
 
+        if (methods.contains('scdblfinder')) {
+            SCDBLFINDER (
+                ch_h5ad
+            )
+            ch_predictions = ch_predictions.mix(SCDBLFINDER.out.predictions)
+            ch_versions = ch_versions.mix(SCDBLFINDER.out.versions)
+        }
+
         DOUBLET_REMOVAL (
             ch_h5ad.join(ch_predictions.groupTuple()),
             threshold,

From 5a109a5597e9749688b842bc7a3732e418950b1c Mon Sep 17 00:00:00 2001
From: KurayiChawatama <kurichawaz@gmail.com>
Date: Thu, 12 Mar 2026 13:11:33 +0300
Subject: [PATCH 04/28] Fix scdblfinder module implementation and tests

---
 modules/local/scdblfinder/environment.yml     |  14 +-
 modules/local/scdblfinder/main.nf             |  23 +---
 .../local/scdblfinder/templates/scdblfinder.R |  46 +++++--
 modules/local/scdblfinder/tests/main.nf.test  |  55 ++++----
 .../local/scdblfinder/tests/main.nf.test.snap | 122 ++++++++++++++++++
 5 files changed, 197 insertions(+), 63 deletions(-)
 create mode 100644 modules/local/scdblfinder/tests/main.nf.test.snap

diff --git a/modules/local/scdblfinder/environment.yml b/modules/local/scdblfinder/environment.yml
index 509ce20c..ea37ba79 100644
--- a/modules/local/scdblfinder/environment.yml
+++ b/modules/local/scdblfinder/environment.yml
@@ -1,11 +1,11 @@
----
-# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json
+name: scdblfinder
 channels:
   - conda-forge
   - bioconda
 dependencies:
-  - "bioconda::bioconductor-scdblfinder=1.24.0"
-  - "bioconda::bioconductor-singlecellexperiment=1.26.0"
-  - "bioconda::bioconductor-biocparallel=1.38.0"
-  - "conda-forge::r-anndatar=0.3.2"
-  - "conda-forge::r-tidyverse=2.0.0"
+  - bioconda::bioconductor-scdblfinder=1.24.0
+  - bioconda::bioconductor-singlecellexperiment=1.32.0
+  - bioconda::bioconductor-biocparallel=1.44.0
+  - bioconda::bioconductor-anndatar=1.0.2
+  - bioconda::bioconductor-rhdf5=2.54.1
+  - conda-forge::r-tidyverse=2.0.0
\ No newline at end of file
diff --git a/modules/local/scdblfinder/main.nf b/modules/local/scdblfinder/main.nf
index a0903896..18727835 100644
--- a/modules/local/scdblfinder/main.nf
+++ b/modules/local/scdblfinder/main.nf
@@ -1,28 +1,11 @@
-// TODO nf-core: If in doubt look at other nf-core/modules to see how we are doing things! :)
-//               https://github.com/nf-core/modules/tree/master/modules/nf-core/
-//               You can also ask for help via your pull request or on the #modules channel on the nf-core Slack workspace:
-//               https://nf-co.re/join
-// TODO nf-core: A module file SHOULD only define input and output files as command-line parameters.
-//               All other parameters MUST be provided using the "task.ext" directive, see here:
-//               https://www.nextflow.io/docs/latest/process.html#ext
-//               where "task.ext" is a string.
-//               Any parameters that need to be evaluated in the context of a particular sample
-//               e.g. single-end/paired-end data MUST also be defined and evaluated appropriately.
-// TODO nf-core: Software that can be piped together SHOULD be added to separate module files
-//               unless there is a run-time, storage advantage in implementing in this way
-//               e.g. it's ok to have a single module for bwa to output BAM instead of SAM:
-//                 bwa mem | samtools view -B -T ref.fasta
-// TODO nf-core: Optional inputs are not currently supported by Nextflow. However, using an empty
-//               list (`[]`) instead of a file can be used to work around this issue.
-
 process SCDBLFINDER {
     tag "$meta.id"
-    label 'process_medium'
+    label 'process_low'
 
     conda "${moduleDir}/environment.yml"
     container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
-        'https://depot.galaxyproject.org/singularity/mulled-v2-d8c5d0c7834f29eb8adde3fe8c4e9b6fbf89db2f:9fecf4e535ec29b85ab3c03bd26e5cca8e7d29a9-0' :
-        'quay.io/biocontainers/mulled-v2-d8c5d0c7834f29eb8adde3fe8c4e9b6fbf89db2f:9fecf4e535ec29b85ab3c03bd26e5cca8e7d29a9-0' }"
+        'oras://community.wave.seqera.io/library/bioconductor-anndatar_bioconductor-biocparallel_bioconductor-rhdf5_bioconductor-scdblfinder_pruned:28444625ead66428' :
+        'community.wave.seqera.io/library/bioconductor-anndatar_bioconductor-biocparallel_bioconductor-rhdf5_bioconductor-scdblfinder_pruned:0f9db6b0855861de' }"
 
     input:
     tuple val(meta), path(h5ad)
diff --git a/modules/local/scdblfinder/templates/scdblfinder.R b/modules/local/scdblfinder/templates/scdblfinder.R
index 8f83ec99..48e3b1a2 100644
--- a/modules/local/scdblfinder/templates/scdblfinder.R
+++ b/modules/local/scdblfinder/templates/scdblfinder.R
@@ -7,7 +7,7 @@ library(BiocParallel)
 library(anndataR)
 
 adata <- read_h5ad("${h5ad}")
-sce <- adata$as_SingleCellExperiment()
+sce <- adata\$as_SingleCellExperiment()
 
 # Set the param to a specified RNG seed for reproducibility
 bp <- MulticoreParam(workers = multicoreWorkers(), RNGseed=123)
@@ -33,28 +33,42 @@ multiplet_rate <- multiplet_rates_10x %>%
 
 message(paste0("Setting multiplet rate to ", multiplet_rate, " for ", ncol(sce), " cells"))
 
-# Run scDblFinder on the REAL data (not mock data!)
-# scDblFinder creates artificial doublets internally
+# Save original cell names and count before overwriting sce
+original_cell_names <- colnames(sce)
+n_cells <- ncol(sce)
+
+# Run scDblFinder on the counts matrix (first assay)
+# scDblFinder creates artificial doublets internally and returns a new SCE
 set.seed(123)
 sce <- scDblFinder(
-    sce,
+    assays(sce)[[1]],
     BPPARAM = bp,
     dbr = multiplet_rate,
-    artificialDoublets = ncol(sce)
+    artificialDoublets = n_cells
 )
 
+# Restore original cell names
+if (!is.null(original_cell_names) && length(original_cell_names) == ncol(sce)) {
+    colnames(sce) <- original_cell_names
+}
+
 # Generate a summary table
 message("scDblFinder results summary:")
 print(table(sce\$scDblFinder.class))
 
 # Rename scDblFinder.* columns for consistency with other doublet methods
 scdbl_cols <- grep("^scDblFinder\\\\.", colnames(colData(sce)), value = TRUE)
-new_scdbl_cols <- paste0("scdblfinder_", gsub("^scDblFinder\\\\.", "", gsub("\\\\.", "_", scdbl_cols)))
 
-# Rename columns in colData(sce)
+# First remove "scDblFinder." prefix, THEN replace remaining dots with underscores
+new_scdbl_cols <- paste0("scdblfinder_", gsub("\\\\.", "_", gsub("^scDblFinder\\\\.", "", scdbl_cols)))
+
+# Rename columns in colData(sce) - create new columns first, then delete old ones
 for (i in seq_along(scdbl_cols)) {
   colData(sce)[[new_scdbl_cols[i]]] <- colData(sce)[[scdbl_cols[i]]]
-  colData(sce)[[scdbl_cols[i]]] <- NULL  # Remove the original column
+}
+# Now delete old columns
+for (col in scdbl_cols) {
+  colData(sce)[[col]] <- NULL
 }
 
 # Convert back to AnnData and save
@@ -63,10 +77,18 @@ write_h5ad(adata_processed, "${prefix}.h5ad")
 
 # Extract predictions for doublet removal step
 # Create a binary doublet call based on class
-predictions <- data.frame(
-    doublet = colData(sce)\$scdblfinder_class == "doublet",
-    row.names = colnames(sce)
-)
+# Ensure we have valid row names
+if (is.null(colnames(sce)) || length(colnames(sce)) != ncol(sce)) {
+    colnames(sce) <- paste0("cell_", seq_len(ncol(sce)))
+}
+
+# Create predictions vector
+doublet_calls <- colData(sce)\$scdblfinder_class == "doublet"
+
+# Create data frame without row.names first, then add them
+predictions <- data.frame(doublet = doublet_calls)
+row.names(predictions) <- colnames(sce)
+
 colnames(predictions) <- "${prefix}"
 
 # Save predictions to CSV
diff --git a/modules/local/scdblfinder/tests/main.nf.test b/modules/local/scdblfinder/tests/main.nf.test
index c0168559..1ed6f7ec 100644
--- a/modules/local/scdblfinder/tests/main.nf.test
+++ b/modules/local/scdblfinder/tests/main.nf.test
@@ -1,61 +1,68 @@
 nextflow_process {
 
     name "Test Process SCDBLFINDER"
-    script "../main.nf"
+    script "modules/local/scdblfinder/main.nf"
     process "SCDBLFINDER"
 
     tag "modules"
     tag "modules_local"
-    tag "scdblfinder"
 
-    test("homo_sapiens - h5ad") {
+    test("Should run without failures") {
 
         when {
+            params {
+                outdir = "$outputDir"
+            }
             process {
                 """
-                input[0] = [
-                    [ id:'test' ],
-                    file(params.modules_testdata_base_path + 'genomics/homo_sapiens/scrnaseq/h5ad/SRR28679759_filtered_matrix.h5ad', checkIfExists: true)
-                ]
+                input[0] = channel.of([
+                        [ id: 'test' ],
+                        file(params.modules_testdata_base_path + 'genomics/homo_sapiens/scrnaseq/h5ad/SRR28679759_filtered_matrix.h5ad', checkIfExists: true)
+                    ]
+                )
                 """
             }
         }
 
         then {
-            assert process.success
             assertAll(
-                { assert snapshot(
-                    process.out.versions,
-                    process.out.predictions,
-                    // Hashing does not work due to this issue:
-                    // https://github.com/scverse/anndataR/issues/272
-                    file(process.out.h5ad.get(0).get(1)).exists(),
-                    file(process.out.h5ad.get(0).get(1)).size()
-                ).match() }
+            { assert process.success },
+            { assert snapshot(
+                process.out.versions,
+                process.out.predictions,
+                // Hashing does not work due to this issue:
+                // https://github.com/scverse/anndataR/issues/272
+                file(process.out.h5ad.get(0).get(1)).exists(),
+                file(process.out.h5ad.get(0).get(1)).size()
+            ).match() }
             )
         }
 
     }
 
-    test("homo_sapiens - h5ad - stub") {
+    test("Should run without failures - stub") {
 
-        options "-stub"
+        options '-stub'
 
         when {
+            params {
+                outdir = "$outputDir"
+            }
             process {
                 """
-                input[0] = [
-                    [ id:'test' ],
-                    file(params.modules_testdata_base_path + 'genomics/homo_sapiens/scrnaseq/h5ad/SRR28679759_filtered_matrix.h5ad', checkIfExists: true)
-                ]
+                input[0] = channel.of([
+                        [ id: 'test' ],
+                        file(params.modules_testdata_base_path + 'genomics/homo_sapiens/scrnaseq/h5ad/SRR28679759_filtered_matrix.h5ad', checkIfExists: true)
+                    ]
+                )
                 """
             }
         }
 
         then {
-            assert process.success
             assertAll(
-                { assert snapshot(process.out).match() }
+            { assert process.success },
+            { assert snapshot(process.out).match() }
             )
         }
 
diff --git a/modules/local/scdblfinder/tests/main.nf.test.snap b/modules/local/scdblfinder/tests/main.nf.test.snap
new file mode 100644
index 00000000..721b5284
--- /dev/null
+++ b/modules/local/scdblfinder/tests/main.nf.test.snap
@@ -0,0 +1,122 @@
+{
+    "homo_sapiens - h5ad - stub": {
+        "content": [
+            {
+                "0": [
+                    [
+                        {
+                            "id": "test"
+                        },
+                        "test_scdblfinder.h5ad:md5,d41d8cd98f00b204e9800998ecf8427e"
+                    ]
+                ],
+                "1": [
+                    [
+                        {
+                            "id": "test"
+                        },
+                        "test_scdblfinder.csv:md5,d41d8cd98f00b204e9800998ecf8427e"
+                    ]
+                ],
+                "2": [
+                    "versions.yml:md5,d41d8cd98f00b204e9800998ecf8427e"
+                ],
+                "h5ad": [
+                    [
+                        {
+                            "id": "test"
+                        },
+                        "test_scdblfinder.h5ad:md5,d41d8cd98f00b204e9800998ecf8427e"
+                    ]
+                ],
+                "predictions": [
+                    [
+                        {
+                            "id": "test"
+                        },
+                        "test_scdblfinder.csv:md5,d41d8cd98f00b204e9800998ecf8427e"
+                    ]
+                ],
+                "versions": [
+                    "versions.yml:md5,d41d8cd98f00b204e9800998ecf8427e"
+                ]
+            }
+        ],
+        "timestamp": "2026-03-12T11:44:51.894263326",
+        "meta": {
+            "nf-test": "0.9.4",
+            "nextflow": "25.10.4"
+        }
+    },
+    "Should run without failures - stub": {
+        "content": [
+            {
+                "0": [
+                    [
+                        {
+                            "id": "test"
+                        },
+                        "test_scdblfinder.h5ad:md5,d41d8cd98f00b204e9800998ecf8427e"
+                    ]
+                ],
+                "1": [
+                    [
+                        {
+                            "id": "test"
+                        },
+                        "test_scdblfinder.csv:md5,d41d8cd98f00b204e9800998ecf8427e"
+                    ]
+                ],
+                "2": [
+                    "versions.yml:md5,d41d8cd98f00b204e9800998ecf8427e"
+                ],
+                "h5ad": [
+                    [
+                        {
+                            "id": "test"
+                        },
+                        "test_scdblfinder.h5ad:md5,d41d8cd98f00b204e9800998ecf8427e"
+                    ]
+                ],
+                "predictions": [
+                    [
+                        {
+                            "id": "test"
+                        },
+                        "test_scdblfinder.csv:md5,d41d8cd98f00b204e9800998ecf8427e"
+                    ]
+                ],
+                "versions": [
+                    "versions.yml:md5,d41d8cd98f00b204e9800998ecf8427e"
+                ]
+            }
+        ],
+        "timestamp": "2026-03-12T12:34:23.397301125",
+        "meta": {
+            "nf-test": "0.9.4",
+            "nextflow": "25.10.4"
+        }
+    },
+    "Should run without failures": {
+        "content": [
+            [
+                "versions.yml:md5,ce056c78586769ad5433f5fbb86f70c7"
+            ],
+            [
+                [
+                    {
+                        "id": "test"
+                    },
+                    "test_scdblfinder.csv:md5,130130ae215768e16e0df93a064dc5e9"
+                ]
+            ],
+            true,
+            5101352
+        ],
+        "timestamp": "2026-03-12T13:08:16.847676966",
+        "meta": {
+            "nf-test": "0.9.4",
+            "nextflow": "25.10.4"
+        }
+    }
+}
\ No newline at end of file

From 0fb9d77ae5096b2f8fbc33274d073bb3397d2ffc Mon Sep 17 00:00:00 2001
From: KurayiChawatama <kurichawaz@gmail.com>
Date: Thu, 12 Mar 2026 13:25:14 +0300
Subject: [PATCH 05/28] Update documentation to include scDblFinder

---
 CHANGELOG.md                      | 1 +
 README.md                         | 1 +
 docs/output.md                    | 2 +-
 modules/local/scdblfinder/main.nf | 2 +-
 4 files changed, 4 insertions(+), 2 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 1ae737c6..888d9ccd 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -10,6 +10,7 @@ Initial release of nf-core/scdownstream, created with the [nf-core](https://nf-c
 ### `Added`
 
 - Added `singleR` module for automated cell type annotation.
+- Added `scDblFinder` module for doublet detection.
 
 ### `Fixed`
 
diff --git a/README.md b/README.md
index 9eb07ccb..6edbd11e 100644
--- a/README.md
+++ b/README.md
@@ -49,6 +49,7 @@ Steps marked with the boat icon are not yet implemented. For the other steps, th
       - [scrublet](https://scanpy.readthedocs.io/en/stable/api/generated/scanpy.pp.scrublet.html)
       - [DoubletDetection](https://doubletdetection.readthedocs.io/en/v2.5.2/doubletdetection.doubletdetection.html)
       - [SCDS](https://bioconductor.org/packages/devel/bioc/vignettes/scds/inst/doc/scds.html)
+      - [scDblFinder](https://bioconductor.org/packages/release/bioc/html/scDblFinder.html)
 2. Sample aggregation
    1. Merge into a single h5ad file
    2. Present QC for merged counts ([`MultiQC`](http://multiqc.info/))
diff --git a/docs/output.md b/docs/output.md
index 27fad158..04ffcff4 100644
--- a/docs/output.md
+++ b/docs/output.md
@@ -57,7 +57,7 @@ The pipeline is built using [Nextflow](https://www.nextflow.io/) and processes d
   - `custom_thresholds/`: Results of applying user-defined QC thresholds.
   - `doublet_detection/`: Directories related to doublet detection.
     - `input_rds/`: RDS version of the h5ad file that is used as input to the doublet detection tools.
-    - `(doubletdetection|scds|scrublet|solo)/`: Results of doublet detection. Each directory contains a filtered `h5ad`/`rds` and a `csv`/`pkl` file with the doublet annotations.
+    - `(doubletdetection|scdblfinder|scds|scrublet|solo)/`: Results of doublet detection. Each directory contains a filtered `h5ad`/`rds` and a `csv`/`pkl` file with the doublet annotations.
     - `${sample_id}.h5ad`: The h5ad without doublets.
   - `qc_preprocessed/`: QC plots for the preprocessed data.
 
diff --git a/modules/local/scdblfinder/main.nf b/modules/local/scdblfinder/main.nf
index 18727835..415593ed 100644
--- a/modules/local/scdblfinder/main.nf
+++ b/modules/local/scdblfinder/main.nf
@@ -1,6 +1,6 @@
 process SCDBLFINDER {
     tag "$meta.id"
-    label 'process_low'
+    label 'process_medium'
 
     conda "${moduleDir}/environment.yml"
     container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?

From 9af9d361e74d1af7462ad5c3d9cb671f2391392a Mon Sep 17 00:00:00 2001
From: KurayiChawatama <kurichawaz@gmail.com>
Date: Thu, 12 Mar 2026 13:35:52 +0300
Subject: [PATCH 06/28] added more documentation for scdblfinder

---
 ro-crate-metadata.json | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/ro-crate-metadata.json b/ro-crate-metadata.json
index 0ce2b699..07662401 100644
--- a/ro-crate-metadata.json
+++ b/ro-crate-metadata.json
@@ -23,7 +23,7 @@
             "@type": "Dataset",
             "creativeWorkStatus": "InProgress",
             "datePublished": "2025-11-20T09:32:29+00:00",
-            "description": "<h1>\n  <picture>\n    <source media=\"(prefers-color-scheme: dark)\" srcset=\"docs/images/nf-core-scdownstream_logo_dark.png\">\n    <img alt=\"nf-core/scdownstream\" src=\"docs/images/nf-core-scdownstream_logo_light.png\">\n  </picture>\n</h1>\n\n[![Open in GitHub Codespaces](https://img.shields.io/badge/Open_In_GitHub_Codespaces-black?labelColor=grey&logo=github)](https://github.com/codespaces/new/nf-core/scdownstream)\n[![GitHub Actions CI Status](https://github.com/nf-core/scdownstream/actions/workflows/nf-test.yml/badge.svg)](https://github.com/nf-core/scdownstream/actions/workflows/nf-test.yml)\n[![GitHub Actions Linting Status](https://github.com/nf-core/scdownstream/actions/workflows/linting.yml/badge.svg)](https://github.com/nf-core/scdownstream/actions/workflows/linting.yml)[![AWS CI](https://img.shields.io/badge/CI%20tests-full%20size-FF9900?labelColor=000000&logo=Amazon%20AWS)](https://nf-co.re/scdownstream/results)[![Cite with Zenodo](http://img.shields.io/badge/DOI-10.5281/zenodo.XXXXXXX-1073c8?labelColor=000000)](https://doi.org/10.5281/zenodo.XXXXXXX)\n[![nf-test](https://img.shields.io/badge/unit_tests-nf--test-337ab7.svg)](https://www.nf-test.com)\n\n[![Nextflow](https://img.shields.io/badge/version-%E2%89%A525.10.0-green?style=flat&logo=nextflow&logoColor=white&color=%230DC09D&link=https%3A%2F%2Fnextflow.io)](https://www.nextflow.io/)\n[![nf-core template version](https://img.shields.io/badge/nf--core_template-3.5.1-green?style=flat&logo=nfcore&logoColor=white&color=%2324B064&link=https%3A%2F%2Fnf-co.re)](https://github.com/nf-core/tools/releases/tag/3.5.1)\n[![run with conda](http://img.shields.io/badge/run%20with-conda-3EB049?labelColor=000000&logo=anaconda)](https://docs.conda.io/en/latest/)\n[![run with docker](https://img.shields.io/badge/run%20with-docker-0db7ed?labelColor=000000&logo=docker)](https://www.docker.com/)\n[![run with singularity](https://img.shields.io/badge/run%20with-singularity-1d355c.svg?labelColor=000000)](https://sylabs.io/docs/)\n[![Launch on Seqera Platform](https://img.shields.io/badge/Launch%20%F0%9F%9A%80-Seqera%20Platform-%234256e7)](https://cloud.seqera.io/launch?pipeline=https://github.com/nf-core/scdownstream)\n\n[![Get help on Slack](http://img.shields.io/badge/slack-nf--core%20%23scdownstream-4A154B?labelColor=000000&logo=slack)](https://nfcore.slack.com/channels/scdownstream)[![Follow on Bluesky](https://img.shields.io/badge/bluesky-%40nf__core-1185fe?labelColor=000000&logo=bluesky)](https://bsky.app/profile/nf-co.re)[![Follow on Mastodon](https://img.shields.io/badge/mastodon-nf__core-6364ff?labelColor=FFFFFF&logo=mastodon)](https://mstdn.science/@nf_core)[![Watch on YouTube](http://img.shields.io/badge/youtube-nf--core-FF0000?labelColor=000000&logo=youtube)](https://www.youtube.com/c/nf-core)\n\n## Introduction\n\n**nf-core/scdownstream** is a bioinformatics pipeline that can be used to process already quantified single-cell RNA-seq data. It takes a samplesheet and h5ad-, SingleCellExperiment/Seurat- or CSV files as input and performs quality control, integration, dimensionality reduction and clustering. It produces an integrated h5ad and SingleCellExperiment file and an extensive QC report.\n\nThe pipeline is based on the learnings and implementations from the following pipelines (alphabetical):\n\n- [panpipes](https://github.com/DendrouLab/panpipes)\n- [scFlow](https://combiz.github.io/scFlow/)\n- [scRAFIKI](https://github.com/Mye-InfoBank/scRAFIKI)\n- [YASCP](https://github.com/wtsi-hgi/yascp)\n\n# ![nf-core/scdownstream](docs/images/metromap.png)\n\nSteps marked with the boat icon are not yet implemented. For the other steps, the pipeline uses the following tools:\n\n1. Per-sample preprocessing\n   1. Convert all RDS files to h5ad format\n   2. Create filtered matrix (if not provided)\n   3. Present QC for raw counts ([`MultiQC`](http://multiqc.info/))\n   4. Remove ambient RNA\n      - [decontX](https://bioconductor.org/packages/release/bioc/html/decontX.html)\n      - [soupX](https://cran.r-project.org/web/packages/SoupX/readme/README.html)\n      - [cellbender](https://cellbender.readthedocs.io/en/latest/)\n      - [scAR](https://docs.scvi-tools.org/en/stable/user_guide/models/scar.html)\n   5. Apply user-defined QC filters (can be defined per sample in the samplesheet)\n   6. Doublet detection (Majority vote possible)\n      - [SOLO](https://docs.scvi-tools.org/en/stable/user_guide/models/solo.html)\n      - [scrublet](https://scanpy.readthedocs.io/en/stable/api/generated/scanpy.pp.scrublet.html)\n      - [DoubletDetection](https://doubletdetection.readthedocs.io/en/v2.5.2/doubletdetection.doubletdetection.html)\n      - [SCDS](https://bioconductor.org/packages/devel/bioc/vignettes/scds/inst/doc/scds.html)\n2. Sample aggregation\n   1. Merge into a single h5ad file\n   2. Present QC for merged counts ([`MultiQC`](http://multiqc.info/))\n   3. Integration\n      - [scVI](https://docs.scvi-tools.org/en/stable/user_guide/models/scvi.html)\n      - [scANVI](https://docs.scvi-tools.org/en/stable/user_guide/models/scanvi.html)\n      - [Harmony](https://portals.broadinstitute.org/harmony/articles/quickstart.html)\n      - [BBKNN](https://github.com/Teichlab/bbknn)\n      - [Combat](https://scanpy.readthedocs.io/en/latest/api/generated/scanpy.pp.combat.html)\n      - [Seurat](https://satijalab.org/seurat/articles/integration_introduction)\n3. Cell type annotation\n   - [celltypist](https://www.celltypist.org/)\n4. Clustering and dimensionality reduction\n   1. [Leiden clustering](https://scanpy.readthedocs.io/en/stable/generated/scanpy.tl.leiden.html)\n   2. [UMAP](https://scanpy.readthedocs.io/en/stable/generated/scanpy.tl.umap.html)\n5. Create report ([`MultiQC`](http://multiqc.info/))\n\n## Usage\n\n> [!NOTE]\n> If you are new to Nextflow and nf-core, please refer to [this page](https://nf-co.re/docs/usage/installation) on how to set-up Nextflow. Make sure to [test your setup](https://nf-co.re/docs/usage/introduction#how-to-run-a-pipeline) with `-profile test` before running the workflow on actual data.\n\n> [!NOTE]\n> If you are confused by the terms `filtered` and `unfiltered`, please check out the respective [documentation](https://nf-co.re/scdownstream/dev/docs/usage/#filtered-and-unfiltered-matrices).\n\nFirst, prepare a samplesheet with your input data that looks as follows:\n\n```csv title=\"samplesheet.csv\"\nsample,unfiltered\nsample1,/absolute/path/to/sample1.h5ad\nsample2,/absolute/path/to/sample3.h5\nsample3,relative/path/to/sample2.rds\nsample4,/absolute/path/to/sample3.csv\n```\n\nEach entry represents a h5ad, h5, RDS or CSV file. RDS files may contain any object that can be converted to a SingleCellExperiment using the [Seurat `as.SingleCellExperiment`](https://satijalab.org/seurat/reference/as.singlecellexperiment) function.\nCSV files should contain a matrix with genes as columns and cells as rows. The first column should contain cell names/barcodes.\n\n-->\n\nNow, you can run the pipeline using:\n\n```bash\nnextflow run nf-core/scdownstream \\\n   -profile <docker/singularity/.../institute> \\\n   --input samplesheet.csv \\\n   --outdir <OUTDIR>\n```\n\n> [!WARNING]\n> Please provide pipeline parameters via the CLI or Nextflow `-params-file` option. Custom config files including those provided by the `-c` Nextflow option can be used to provide any configuration _**except for parameters**_; see [docs](https://nf-co.re/docs/usage/getting_started/configuration#custom-configuration-files).\n\nFor more details and further functionality, please refer to the [usage documentation](https://nf-co.re/scdownstream/usage) and the [parameter documentation](https://nf-co.re/scdownstream/parameters).\n\n## Pipeline output\n\nTo see the results of an example test run with a full size dataset refer to the [results](https://nf-co.re/scdownstream/results) tab on the nf-core website pipeline page.\nFor more details about the output files and reports, please refer to the\n[output documentation](https://nf-co.re/scdownstream/output).\n\n## Credits\n\nnf-core/scdownstream was originally written by [Nico Trummer](https://github.com/nictru).\n\nWe thank the following people for their extensive assistance in the development of this pipeline (alphabetical):\n\n- [Fabian Rost](https://github.com/fbnrst)\n- [Fabiola Curion](https://github.com/bio-la)\n- [Gregor Sturm](https://github.com/grst)\n- [Jonathan Talbot-Martin](https://github.com/jtalbotmartin)\n- [Lukas Heumos](https://github.com/zethson)\n- [Matiss Ozols](https://github.com/maxozo)\n- [Nathan Skene](https://github.com/NathanSkene)\n- [Nurun Fancy](https://github.com/nfancy)\n- [Riley Grindle](https://github.com/Riley-Grindle)\n- [Ryan Seaman](https://github.com/RPSeaman)\n- [Steffen M\u00f6ller](https://github.com/smoe)\n- [Wojtek Sowinski](https://github.com/WojtekSowinski)\n\n## Contributions and Support\n\nIf you would like to contribute to this pipeline, please see the [contributing guidelines](.github/CONTRIBUTING.md).\n\nFor further information or help, don't hesitate to get in touch on the [Slack `#scdownstream` channel](https://nfcore.slack.com/channels/scdownstream) (you can join with [this invite](https://nf-co.re/join/slack)).\n\n## Citations\n\n<!-- TODO nf-core: Add citation for pipeline after first release. Uncomment lines below and update Zenodo doi and badge at the top of this file. -->\n<!-- If you use nf-core/scdownstream for your analysis, please cite it using the following doi: [10.5281/zenodo.XXXXXX](https://doi.org/10.5281/zenodo.XXXXXX) -->\n\n<!-- TODO nf-core: Add bibliography of tools and data used in your pipeline -->\n\nAn extensive list of references for the tools used by the pipeline can be found in the [`CITATIONS.md`](CITATIONS.md) file.\n\nYou can cite the `nf-core` publication as follows:\n\n> **The nf-core framework for community-curated bioinformatics pipelines.**\n>\n> Philip Ewels, Alexander Peltzer, Sven Fillinger, Harshil Patel, Johannes Alneberg, Andreas Wilm, Maxime Ulysse Garcia, Paolo Di Tommaso & Sven Nahnsen.\n>\n> _Nat Biotechnol._ 2020 Feb 13. doi: [10.1038/s41587-020-0439-x](https://dx.doi.org/10.1038/s41587-020-0439-x).\n",
+            "description": "<h1>\n  <picture>\n    <source media=\"(prefers-color-scheme: dark)\" srcset=\"docs/images/nf-core-scdownstream_logo_dark.png\">\n    <img alt=\"nf-core/scdownstream\" src=\"docs/images/nf-core-scdownstream_logo_light.png\">\n  </picture>\n</h1>\n\n[![Open in GitHub Codespaces](https://img.shields.io/badge/Open_In_GitHub_Codespaces-black?labelColor=grey&logo=github)](https://github.com/codespaces/new/nf-core/scdownstream)\n[![GitHub Actions CI Status](https://github.com/nf-core/scdownstream/actions/workflows/nf-test.yml/badge.svg)](https://github.com/nf-core/scdownstream/actions/workflows/nf-test.yml)\n[![GitHub Actions Linting Status](https://github.com/nf-core/scdownstream/actions/workflows/linting.yml/badge.svg)](https://github.com/nf-core/scdownstream/actions/workflows/linting.yml)[![AWS CI](https://img.shields.io/badge/CI%20tests-full%20size-FF9900?labelColor=000000&logo=Amazon%20AWS)](https://nf-co.re/scdownstream/results)[![Cite with Zenodo](http://img.shields.io/badge/DOI-10.5281/zenodo.XXXXXXX-1073c8?labelColor=000000)](https://doi.org/10.5281/zenodo.XXXXXXX)\n[![nf-test](https://img.shields.io/badge/unit_tests-nf--test-337ab7.svg)](https://www.nf-test.com)\n\n[![Nextflow](https://img.shields.io/badge/version-%E2%89%A525.10.0-green?style=flat&logo=nextflow&logoColor=white&color=%230DC09D&link=https%3A%2F%2Fnextflow.io)](https://www.nextflow.io/)\n[![nf-core template version](https://img.shields.io/badge/nf--core_template-3.5.1-green?style=flat&logo=nfcore&logoColor=white&color=%2324B064&link=https%3A%2F%2Fnf-co.re)](https://github.com/nf-core/tools/releases/tag/3.5.1)\n[![run with conda](http://img.shields.io/badge/run%20with-conda-3EB049?labelColor=000000&logo=anaconda)](https://docs.conda.io/en/latest/)\n[![run with docker](https://img.shields.io/badge/run%20with-docker-0db7ed?labelColor=000000&logo=docker)](https://www.docker.com/)\n[![run with singularity](https://img.shields.io/badge/run%20with-singularity-1d355c.svg?labelColor=000000)](https://sylabs.io/docs/)\n[![Launch on Seqera Platform](https://img.shields.io/badge/Launch%20%F0%9F%9A%80-Seqera%20Platform-%234256e7)](https://cloud.seqera.io/launch?pipeline=https://github.com/nf-core/scdownstream)\n\n[![Get help on Slack](http://img.shields.io/badge/slack-nf--core%20%23scdownstream-4A154B?labelColor=000000&logo=slack)](https://nfcore.slack.com/channels/scdownstream)[![Follow on Bluesky](https://img.shields.io/badge/bluesky-%40nf__core-1185fe?labelColor=000000&logo=bluesky)](https://bsky.app/profile/nf-co.re)[![Follow on Mastodon](https://img.shields.io/badge/mastodon-nf__core-6364ff?labelColor=FFFFFF&logo=mastodon)](https://mstdn.science/@nf_core)[![Watch on YouTube](http://img.shields.io/badge/youtube-nf--core-FF0000?labelColor=000000&logo=youtube)](https://www.youtube.com/c/nf-core)\n\n## Introduction\n\n**nf-core/scdownstream** is a bioinformatics pipeline that can be used to process already quantified single-cell RNA-seq data. It takes a samplesheet and h5ad-, SingleCellExperiment/Seurat- or CSV files as input and performs quality control, integration, dimensionality reduction and clustering. It produces an integrated h5ad and SingleCellExperiment file and an extensive QC report.\n\nThe pipeline is based on the learnings and implementations from the following pipelines (alphabetical):\n\n- [panpipes](https://github.com/DendrouLab/panpipes)\n- [scFlow](https://combiz.github.io/scFlow/)\n- [scRAFIKI](https://github.com/Mye-InfoBank/scRAFIKI)\n- [YASCP](https://github.com/wtsi-hgi/yascp)\n\n# ![nf-core/scdownstream](docs/images/metromap.png)\n\nSteps marked with the boat icon are not yet implemented. For the other steps, the pipeline uses the following tools:\n\n1. Per-sample preprocessing\n   1. Convert all RDS files to h5ad format\n   2. Create filtered matrix (if not provided)\n   3. Present QC for raw counts ([`MultiQC`](http://multiqc.info/))\n   4. Remove ambient RNA\n      - [decontX](https://bioconductor.org/packages/release/bioc/html/decontX.html)\n      - [soupX](https://cran.r-project.org/web/packages/SoupX/readme/README.html)\n      - [cellbender](https://cellbender.readthedocs.io/en/latest/)\n      - [scAR](https://docs.scvi-tools.org/en/stable/user_guide/models/scar.html)\n   5. Apply user-defined QC filters (can be defined per sample in the samplesheet)\n   6. Doublet detection (Majority vote possible)\n      - [SOLO](https://docs.scvi-tools.org/en/stable/user_guide/models/solo.html)\n      - [scrublet](https://scanpy.readthedocs.io/en/stable/api/generated/scanpy.pp.scrublet.html)\n      - [DoubletDetection](https://doubletdetection.readthedocs.io/en/v2.5.2/doubletdetection.doubletdetection.html)\n      - [SCDS](https://bioconductor.org/packages/devel/bioc/vignettes/scds/inst/doc/scds.html)\n      - [scDblFinder](https://bioconductor.org/packages/release/bioc/html/scDblFinder.html)\n2. Sample aggregation\n   1. Merge into a single h5ad file\n   2. Present QC for merged counts ([`MultiQC`](http://multiqc.info/))\n   3. Integration\n      - [scVI](https://docs.scvi-tools.org/en/stable/user_guide/models/scvi.html)\n      - [scANVI](https://docs.scvi-tools.org/en/stable/user_guide/models/scanvi.html)\n      - [Harmony](https://portals.broadinstitute.org/harmony/articles/quickstart.html)\n      - [BBKNN](https://github.com/Teichlab/bbknn)\n      - [Combat](https://scanpy.readthedocs.io/en/latest/api/generated/scanpy.pp.combat.html)\n      - [Seurat](https://satijalab.org/seurat/articles/integration_introduction)\n3. Cell type annotation\n   - [celltypist](https://www.celltypist.org/)\n4. Clustering and dimensionality reduction\n   1. [Leiden clustering](https://scanpy.readthedocs.io/en/stable/generated/scanpy.tl.leiden.html)\n   2. [UMAP](https://scanpy.readthedocs.io/en/stable/generated/scanpy.tl.umap.html)\n5. Create report ([`MultiQC`](http://multiqc.info/))\n\n## Usage\n\n> [!NOTE]\n> If you are new to Nextflow and nf-core, please refer to [this page](https://nf-co.re/docs/usage/installation) on how to set-up Nextflow. Make sure to [test your setup](https://nf-co.re/docs/usage/introduction#how-to-run-a-pipeline) with `-profile test` before running the workflow on actual data.\n\n> [!NOTE]\n> If you are confused by the terms `filtered` and `unfiltered`, please check out the respective [documentation](https://nf-co.re/scdownstream/dev/docs/usage/#filtered-and-unfiltered-matrices).\n\nFirst, prepare a samplesheet with your input data that looks as follows:\n\n```csv title=\"samplesheet.csv\"\nsample,unfiltered\nsample1,/absolute/path/to/sample1.h5ad\nsample2,/absolute/path/to/sample3.h5\nsample3,relative/path/to/sample2.rds\nsample4,/absolute/path/to/sample3.csv\n```\n\nEach entry represents a h5ad, h5, RDS or CSV file. RDS files may contain any object that can be converted to a SingleCellExperiment using the [Seurat `as.SingleCellExperiment`](https://satijalab.org/seurat/reference/as.singlecellexperiment) function.\nCSV files should contain a matrix with genes as columns and cells as rows. The first column should contain cell names/barcodes.\n\n-->\n\nNow, you can run the pipeline using:\n\n```bash\nnextflow run nf-core/scdownstream \\\n   -profile <docker/singularity/.../institute> \\\n   --input samplesheet.csv \\\n   --outdir <OUTDIR>\n```\n\n> [!WARNING]\n> Please provide pipeline parameters via the CLI or Nextflow `-params-file` option. Custom config files including those provided by the `-c` Nextflow option can be used to provide any configuration _**except for parameters**_; see [docs](https://nf-co.re/docs/usage/getting_started/configuration#custom-configuration-files).\n\nFor more details and further functionality, please refer to the [usage documentation](https://nf-co.re/scdownstream/usage) and the [parameter documentation](https://nf-co.re/scdownstream/parameters).\n\n## Pipeline output\n\nTo see the results of an example test run with a full size dataset refer to the [results](https://nf-co.re/scdownstream/results) tab on the nf-core website pipeline page.\nFor more details about the output files and reports, please refer to the\n[output documentation](https://nf-co.re/scdownstream/output).\n\n## Credits\n\nnf-core/scdownstream was originally written by [Nico Trummer](https://github.com/nictru).\n\nWe thank the following people for their extensive assistance in the development of this pipeline (alphabetical):\n\n- [Fabian Rost](https://github.com/fbnrst)\n- [Fabiola Curion](https://github.com/bio-la)\n- [Gregor Sturm](https://github.com/grst)\n- [Jonathan Talbot-Martin](https://github.com/jtalbotmartin)\n- [Lukas Heumos](https://github.com/zethson)\n- [Matiss Ozols](https://github.com/maxozo)\n- [Nathan Skene](https://github.com/NathanSkene)\n- [Nurun Fancy](https://github.com/nfancy)\n- [Riley Grindle](https://github.com/Riley-Grindle)\n- [Ryan Seaman](https://github.com/RPSeaman)\n- [Steffen M\u00f6ller](https://github.com/smoe)\n- [Wojtek Sowinski](https://github.com/WojtekSowinski)\n\n## Contributions and Support\n\nIf you would like to contribute to this pipeline, please see the [contributing guidelines](.github/CONTRIBUTING.md).\n\nFor further information or help, don't hesitate to get in touch on the [Slack `#scdownstream` channel](https://nfcore.slack.com/channels/scdownstream) (you can join with [this invite](https://nf-co.re/join/slack)).\n\n## Citations\n\n<!-- TODO nf-core: Add citation for pipeline after first release. Uncomment lines below and update Zenodo doi and badge at the top of this file. -->\n<!-- If you use nf-core/scdownstream for your analysis, please cite it using the following doi: [10.5281/zenodo.XXXXXX](https://doi.org/10.5281/zenodo.XXXXXX) -->\n\n<!-- TODO nf-core: Add bibliography of tools and data used in your pipeline -->\n\nAn extensive list of references for the tools used by the pipeline can be found in the [`CITATIONS.md`](CITATIONS.md) file.\n\nYou can cite the `nf-core` publication as follows:\n\n> **The nf-core framework for community-curated bioinformatics pipelines.**\n>\n> Philip Ewels, Alexander Peltzer, Sven Fillinger, Harshil Patel, Johannes Alneberg, Andreas Wilm, Maxime Ulysse Garcia, Paolo Di Tommaso & Sven Nahnsen.\n>\n> _Nat Biotechnol._ 2020 Feb 13. doi: [10.1038/s41587-020-0439-x](https://dx.doi.org/10.1038/s41587-020-0439-x).\n",
             "hasPart": [
                 {
                     "@id": "main.nf"

From bd40dce9ce7f706abfd6d0be3f9ec9913076d754 Mon Sep 17 00:00:00 2001
From: KurayiChawatama <kurichawaz@gmail.com>
Date: Thu, 12 Mar 2026 14:31:57 +0300
Subject: [PATCH 07/28] added scdblfinder citation to  citations md

---
 CITATIONS.md | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/CITATIONS.md b/CITATIONS.md
index bee9e80f..5ce4742e 100644
--- a/CITATIONS.md
+++ b/CITATIONS.md
@@ -47,6 +47,10 @@
 
   > Cannoodt R, Zappia L, Morgan M, Deconinck L (2025). anndataR: AnnData interoperability in R. R package version 0.99.0
 
+- [scDblFinder](https://pubmed.ncbi.nlm.nih.gov/35118618/)
+
+  > Germain P, Lun A, Garcia Meixide C, Macnair W, Robinson M. Doublet identification in single-cell sequencing data using scDblFinder. F1000Res. 2022;11:979. doi: 10.12688/f1000research.73600.2.
+  
 ## Software packaging/containerisation tools
 
 - [Anaconda](https://anaconda.com)

From 807f00f277fff41a774e82fd238e534a9b9650ea Mon Sep 17 00:00:00 2001
From: KurayiChawatama <kurichawaz@gmail.com>
Date: Thu, 12 Mar 2026 14:39:21 +0300
Subject: [PATCH 08/28] removed template comment from meta yml

---
 modules/local/scdblfinder/meta.yml | 1 -
 1 file changed, 1 deletion(-)

diff --git a/modules/local/scdblfinder/meta.yml b/modules/local/scdblfinder/meta.yml
index 367e73c5..33810388 100644
--- a/modules/local/scdblfinder/meta.yml
+++ b/modules/local/scdblfinder/meta.yml
@@ -1,4 +1,3 @@
-# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/meta-schema.json
 name: "scdblfinder"
 description: Detect doublets in single-cell RNA-seq data using scDblFinder
 keywords:

From 275fac8cb2ef149439ab4f08f04ce153213a6a2e Mon Sep 17 00:00:00 2001
From: KurayiChawatama <kurichawaz@gmail.com>
Date: Thu, 12 Mar 2026 14:46:08 +0300
Subject: [PATCH 09/28] moved scdblfinder module to doublet detection
 dirtectory

---
 .../local/{ => doublet_detection}/scdblfinder/environment.yml   | 0
 modules/local/{ => doublet_detection}/scdblfinder/main.nf       | 0
 modules/local/{ => doublet_detection}/scdblfinder/meta.yml      | 0
 .../{ => doublet_detection}/scdblfinder/templates/scdblfinder.R | 0
 .../{ => doublet_detection}/scdblfinder/tests/main.nf.test      | 2 +-
 .../{ => doublet_detection}/scdblfinder/tests/main.nf.test.snap | 0
 subworkflows/local/doublet_detection/main.nf                    | 2 +-
 7 files changed, 2 insertions(+), 2 deletions(-)
 rename modules/local/{ => doublet_detection}/scdblfinder/environment.yml (100%)
 rename modules/local/{ => doublet_detection}/scdblfinder/main.nf (100%)
 rename modules/local/{ => doublet_detection}/scdblfinder/meta.yml (100%)
 rename modules/local/{ => doublet_detection}/scdblfinder/templates/scdblfinder.R (100%)
 rename modules/local/{ => doublet_detection}/scdblfinder/tests/main.nf.test (96%)
 rename modules/local/{ => doublet_detection}/scdblfinder/tests/main.nf.test.snap (100%)

diff --git a/modules/local/scdblfinder/environment.yml b/modules/local/doublet_detection/scdblfinder/environment.yml
similarity index 100%
rename from modules/local/scdblfinder/environment.yml
rename to modules/local/doublet_detection/scdblfinder/environment.yml
diff --git a/modules/local/scdblfinder/main.nf b/modules/local/doublet_detection/scdblfinder/main.nf
similarity index 100%
rename from modules/local/scdblfinder/main.nf
rename to modules/local/doublet_detection/scdblfinder/main.nf
diff --git a/modules/local/scdblfinder/meta.yml b/modules/local/doublet_detection/scdblfinder/meta.yml
similarity index 100%
rename from modules/local/scdblfinder/meta.yml
rename to modules/local/doublet_detection/scdblfinder/meta.yml
diff --git a/modules/local/scdblfinder/templates/scdblfinder.R b/modules/local/doublet_detection/scdblfinder/templates/scdblfinder.R
similarity index 100%
rename from modules/local/scdblfinder/templates/scdblfinder.R
rename to modules/local/doublet_detection/scdblfinder/templates/scdblfinder.R
diff --git a/modules/local/scdblfinder/tests/main.nf.test b/modules/local/doublet_detection/scdblfinder/tests/main.nf.test
similarity index 96%
rename from modules/local/scdblfinder/tests/main.nf.test
rename to modules/local/doublet_detection/scdblfinder/tests/main.nf.test
index 1ed6f7ec..0ed11140 100644
--- a/modules/local/scdblfinder/tests/main.nf.test
+++ b/modules/local/doublet_detection/scdblfinder/tests/main.nf.test
@@ -1,7 +1,7 @@
 nextflow_process {
 
     name "Test Process SCDBLFINDER"
-    script "modules/local/scdblfinder/main.nf"
+    script "modules/local/doublet_detection/scdblfinder/main.nf"
     process "SCDBLFINDER"
 
     tag "modules"
diff --git a/modules/local/scdblfinder/tests/main.nf.test.snap b/modules/local/doublet_detection/scdblfinder/tests/main.nf.test.snap
similarity index 100%
rename from modules/local/scdblfinder/tests/main.nf.test.snap
rename to modules/local/doublet_detection/scdblfinder/tests/main.nf.test.snap
diff --git a/subworkflows/local/doublet_detection/main.nf b/subworkflows/local/doublet_detection/main.nf
index 110a3c4a..cc195cfc 100644
--- a/subworkflows/local/doublet_detection/main.nf
+++ b/subworkflows/local/doublet_detection/main.nf
@@ -2,7 +2,7 @@ include { SCVITOOLS_SOLO   } from '../../../modules/nf-core/scvitools/solo'
 include { SCANPY_SCRUBLET  } from '../../../modules/nf-core/scanpy/scrublet'
 include { DOUBLETDETECTION } from '../../../modules/nf-core/doubletdetection'
 include { SCDS             } from '../../../modules/local/doublet_detection/scds'
-include { SCDBLFINDER      } from '../../../modules/local/scdblfinder'
+include { SCDBLFINDER      } from '../../../modules/local/doublet_detection/scdblfinder'
 include { DOUBLET_REMOVAL  } from '../../../modules/local/doublet_detection/doublet_removal'
 
 workflow DOUBLET_DETECTION {

From 993a8f2f5f88a0d92482494c587a9def5680fa80 Mon Sep 17 00:00:00 2001
From: KurayiChawatama <kurichawaz@gmail.com>
Date: Thu, 12 Mar 2026 15:33:52 +0300
Subject: [PATCH 10/28] updated docs ouput to include scdblfinder

---
 docs/output.md | 1 +
 1 file changed, 1 insertion(+)

diff --git a/docs/output.md b/docs/output.md
index 04ffcff4..e08397b8 100644
--- a/docs/output.md
+++ b/docs/output.md
@@ -25,6 +25,7 @@ The pipeline is built using [Nextflow](https://www.nextflow.io/) and processes d
       - [scrublet](https://scanpy.readthedocs.io/en/stable/api/generated/scanpy.pp.scrublet.html)
       - [DoubletDetection](https://doubletdetection.readthedocs.io/en/v2.5.2/doubletdetection.doubletdetection.html)
       - [SCDS](https://bioconductor.org/packages/devel/bioc/vignettes/scds/inst/doc/scds.html)
+      - [scDblFinder](https://bioconductor.org/packages/release/bioc/html/scDblFinder.html)
 2. Sample aggregation
    1. Merge into a single h5ad file
    2. Present QC for merged counts ([`MultiQC`](http://multiqc.info/))

From a56e65693974fa9d005ba7e5d2900856ebca6e1f Mon Sep 17 00:00:00 2001
From: nf-core-bot <core@nf-co.re>
Date: Thu, 12 Mar 2026 13:02:17 +0000
Subject: [PATCH 11/28] [automated] Fix code linting

---
 CITATIONS.md                                                | 2 +-
 modules/local/doublet_detection/scdblfinder/environment.yml | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/CITATIONS.md b/CITATIONS.md
index 5ce4742e..10ad4fc5 100644
--- a/CITATIONS.md
+++ b/CITATIONS.md
@@ -50,7 +50,7 @@
 - [scDblFinder](https://pubmed.ncbi.nlm.nih.gov/35118618/)
 
   > Germain P, Lun A, Garcia Meixide C, Macnair W, Robinson M. Doublet identification in single-cell sequencing data using scDblFinder. F1000Res. 2022;11:979. doi: 10.12688/f1000research.73600.2.
-  
+
 ## Software packaging/containerisation tools
 
 - [Anaconda](https://anaconda.com)
diff --git a/modules/local/doublet_detection/scdblfinder/environment.yml b/modules/local/doublet_detection/scdblfinder/environment.yml
index ea37ba79..b3c8a625 100644
--- a/modules/local/doublet_detection/scdblfinder/environment.yml
+++ b/modules/local/doublet_detection/scdblfinder/environment.yml
@@ -8,4 +8,4 @@ dependencies:
   - bioconda::bioconductor-biocparallel=1.44.0
   - bioconda::bioconductor-anndatar=1.0.2
   - bioconda::bioconductor-rhdf5=2.54.1
-  - conda-forge::r-tidyverse=2.0.0
\ No newline at end of file
+  - conda-forge::r-tidyverse=2.0.0

From ec56f35da159f673149357714169f760bdb84224 Mon Sep 17 00:00:00 2001
From: Kurayi Chawatama <142725139+KurayiChawatama@users.noreply.github.com>
Date: Thu, 12 Mar 2026 16:09:58 +0300
Subject: [PATCH 12/28] Update
 modules/local/doublet_detection/scdblfinder/templates/scdblfinder.R

Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com>
---
 .../doublet_detection/scdblfinder/templates/scdblfinder.R   | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/modules/local/doublet_detection/scdblfinder/templates/scdblfinder.R b/modules/local/doublet_detection/scdblfinder/templates/scdblfinder.R
index 48e3b1a2..d7ca894a 100644
--- a/modules/local/doublet_detection/scdblfinder/templates/scdblfinder.R
+++ b/modules/local/doublet_detection/scdblfinder/templates/scdblfinder.R
@@ -10,7 +10,11 @@ adata <- read_h5ad("${h5ad}")
 sce <- adata\$as_SingleCellExperiment()
 
 # Set the param to a specified RNG seed for reproducibility
-bp <- MulticoreParam(workers = multicoreWorkers(), RNGseed=123)
+nxf_task_cpus <- as.integer(Sys.getenv("NXF_TASK_CPUS", unset = "1"))
+if (is.na(nxf_task_cpus) || nxf_task_cpus < 1L) {
+    nxf_task_cpus <- 1L
+}
+bp <- MulticoreParam(workers = nxf_task_cpus, RNGseed=123)
 
 
 # 10 Genomics Doublet Rate calculator used to get multiplet rate if not provided

From 0b0b19d70625b50dfd2211002f72469c45e5f06b Mon Sep 17 00:00:00 2001
From: KurayiChawatama <kurichawaz@gmail.com>
Date: Fri, 13 Mar 2026 11:17:42 +0300
Subject: [PATCH 13/28] added https version of the singularity container link

---
 modules/local/doublet_detection/scdblfinder/main.nf | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/modules/local/doublet_detection/scdblfinder/main.nf b/modules/local/doublet_detection/scdblfinder/main.nf
index 415593ed..8c9e691a 100644
--- a/modules/local/doublet_detection/scdblfinder/main.nf
+++ b/modules/local/doublet_detection/scdblfinder/main.nf
@@ -4,7 +4,7 @@ process SCDBLFINDER {
 
     conda "${moduleDir}/environment.yml"
     container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
-        'oras://community.wave.seqera.io/library/bioconductor-anndatar_bioconductor-biocparallel_bioconductor-rhdf5_bioconductor-scdblfinder_pruned:28444625ead66428' :
+        'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/99/993a012a69d920412b090701eb733ccf35c8655c3d012756ca6b0af1cfcd4780/data' :
         'community.wave.seqera.io/library/bioconductor-anndatar_bioconductor-biocparallel_bioconductor-rhdf5_bioconductor-scdblfinder_pruned:0f9db6b0855861de' }"
 
     input:

From 6d741bc065e653548086844c0a5256c109f9c846 Mon Sep 17 00:00:00 2001
From: KurayiChawatama <kurichawaz@gmail.com>
Date: Fri, 13 Mar 2026 11:32:19 +0300
Subject: [PATCH 14/28] refactor(scDblFinder): optimize multiplet rate
 calculation using findInterval

---
 .../scdblfinder/templates/scdblfinder.R                  | 9 ++++-----
 1 file changed, 4 insertions(+), 5 deletions(-)

diff --git a/modules/local/doublet_detection/scdblfinder/templates/scdblfinder.R b/modules/local/doublet_detection/scdblfinder/templates/scdblfinder.R
index d7ca894a..b3ade99d 100644
--- a/modules/local/doublet_detection/scdblfinder/templates/scdblfinder.R
+++ b/modules/local/doublet_detection/scdblfinder/templates/scdblfinder.R
@@ -29,12 +29,11 @@ multiplet_rates_10x <- data.frame(
 )
 
 # Adjust to use the number of cells in the SCE object
-multiplet_rate <- multiplet_rates_10x %>%
-  dplyr::filter(Recovered_cells < ncol(sce)) %>%
-  dplyr::slice(which.max(Recovered_cells)) %>%
-  dplyr::pull(Multiplet_rate) %>%
-  as.numeric()
+idx <- findInterval(ncol(sce), multiplet_rates_10x\$Recovered_cells)
+if (idx < 1L) idx <- 1L
+if (idx > nrow(multiplet_rates_10x)) idx <- nrow(multiplet_rates_10x)
 
+multiplet_rate <- as.numeric(multiplet_rates_10x\$Multiplet_rate[idx])
 message(paste0("Setting multiplet rate to ", multiplet_rate, " for ", ncol(sce), " cells"))
 
 # Save original cell names and count before overwriting sce

From 947ffa2c293ae182da91b342b15f19f905f4a339 Mon Sep 17 00:00:00 2001
From: KurayiChawatama <kurichawaz@gmail.com>
Date: Fri, 13 Mar 2026 11:46:18 +0300
Subject: [PATCH 15/28] added explanation for column name change

---
 .../scdblfinder/templates/scdblfinder.R             | 13 ++++++-------
 1 file changed, 6 insertions(+), 7 deletions(-)

diff --git a/modules/local/doublet_detection/scdblfinder/templates/scdblfinder.R b/modules/local/doublet_detection/scdblfinder/templates/scdblfinder.R
index b3ade99d..f70f4637 100644
--- a/modules/local/doublet_detection/scdblfinder/templates/scdblfinder.R
+++ b/modules/local/doublet_detection/scdblfinder/templates/scdblfinder.R
@@ -10,12 +10,8 @@ adata <- read_h5ad("${h5ad}")
 sce <- adata\$as_SingleCellExperiment()
 
 # Set the param to a specified RNG seed for reproducibility
-nxf_task_cpus <- as.integer(Sys.getenv("NXF_TASK_CPUS", unset = "1"))
-if (is.na(nxf_task_cpus) || nxf_task_cpus < 1L) {
-    nxf_task_cpus <- 1L
-}
-bp <- MulticoreParam(workers = nxf_task_cpus, RNGseed=123)
-
+num_threads <- max(1L, as.integer("${task.cpus}"))
+bp <- MulticoreParam(workers = num_threads, RNGseed = 123)
 
 # 10 Genomics Doublet Rate calculator used to get multiplet rate if not provided
 # 10X multiplet rate table(https://rpubs.com/kenneditodd/doublet_finder_example)
@@ -50,7 +46,10 @@ sce <- scDblFinder(
     artificialDoublets = n_cells
 )
 
-# Restore original cell names
+# Restore the input barcodes because running scDblFinder on the just the assay matrix above can
+# return a new SCE whose column names no longer match the original AnnData cell IDs.
+# Keeping the original names is required so the output h5ad obs_names and CSV rows
+# still map back to the same cells seen by downstream steps.
 if (!is.null(original_cell_names) && length(original_cell_names) == ncol(sce)) {
     colnames(sce) <- original_cell_names
 }

From 8c2e3827296073d02c6ae1ecfc236d8efb026b17 Mon Sep 17 00:00:00 2001
From: KurayiChawatama <kurichawaz@gmail.com>
Date: Fri, 13 Mar 2026 11:52:25 +0300
Subject: [PATCH 16/28] write updated SingleCellExperiment directly as h5ad
 without explicit conversion

---
 .../doublet_detection/scdblfinder/templates/scdblfinder.R    | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/modules/local/doublet_detection/scdblfinder/templates/scdblfinder.R b/modules/local/doublet_detection/scdblfinder/templates/scdblfinder.R
index f70f4637..a11a0b2c 100644
--- a/modules/local/doublet_detection/scdblfinder/templates/scdblfinder.R
+++ b/modules/local/doublet_detection/scdblfinder/templates/scdblfinder.R
@@ -73,9 +73,8 @@ for (col in scdbl_cols) {
   colData(sce)[[col]] <- NULL
 }
 
-# Convert back to AnnData and save
-adata_processed <- as_AnnData(sce)
-write_h5ad(adata_processed, "${prefix}.h5ad")
+# Write the updated SingleCellExperiment directly as h5ad
+write_h5ad(sce, "${prefix}.h5ad")
 
 # Extract predictions for doublet removal step
 # Create a binary doublet call based on class

From 212b3e2bc2b675406bb12628dac6a9a9b6879523 Mon Sep 17 00:00:00 2001
From: KurayiChawatama <kurichawaz@gmail.com>
Date: Fri, 13 Mar 2026 12:16:58 +0300
Subject: [PATCH 17/28] enhance h5ad writing with validation for cell barcodes
 and primary assay

---
 .../scdblfinder/templates/scdblfinder.R       | 20 +++++++++++++------
 .../scdblfinder/tests/main.nf.test            |  9 ++++-----
 .../scdblfinder/tests/main.nf.test.snap       |  6 ++----
 3 files changed, 20 insertions(+), 15 deletions(-)

diff --git a/modules/local/doublet_detection/scdblfinder/templates/scdblfinder.R b/modules/local/doublet_detection/scdblfinder/templates/scdblfinder.R
index a11a0b2c..6bc3540e 100644
--- a/modules/local/doublet_detection/scdblfinder/templates/scdblfinder.R
+++ b/modules/local/doublet_detection/scdblfinder/templates/scdblfinder.R
@@ -73,15 +73,23 @@ for (col in scdbl_cols) {
   colData(sce)[[col]] <- NULL
 }
 
-# Write the updated SingleCellExperiment directly as h5ad
-write_h5ad(sce, "${prefix}.h5ad")
+# The doublet calls must stay keyed by the original cell barcodes. If they are not
+# present here, something went wrong during conversion or scDblFinder processing and
+# we should fail instead of inventing replacement identifiers.
+if (is.null(colnames(sce)) || length(colnames(sce)) != ncol(sce)) {
+  stop("scDblFinder output is missing valid cell barcodes; cannot write aligned h5ad and prediction outputs.")
+}
+
+# Write the updated SingleCellExperiment directly as h5ad, explicitly mapping the
+# primary assay to AnnData X so downstream readers see a valid matrix field.
+primary_assay <- assayNames(sce)[1]
+if (is.na(primary_assay) || primary_assay == "") {
+  stop("scDblFinder output is missing a primary assay; cannot write h5ad output.")
+}
+write_h5ad(sce, "${prefix}.h5ad", x_mapping = primary_assay)
 
 # Extract predictions for doublet removal step
 # Create a binary doublet call based on class
-# Ensure we have valid row names
-if (is.null(colnames(sce)) || length(colnames(sce)) != ncol(sce)) {
-    colnames(sce) <- paste0("cell_", seq_len(ncol(sce)))
-}
 
 # Create predictions vector
 doublet_calls <- colData(sce)\$scdblfinder_class == "doublet"
diff --git a/modules/local/doublet_detection/scdblfinder/tests/main.nf.test b/modules/local/doublet_detection/scdblfinder/tests/main.nf.test
index 0ed11140..8172a071 100644
--- a/modules/local/doublet_detection/scdblfinder/tests/main.nf.test
+++ b/modules/local/doublet_detection/scdblfinder/tests/main.nf.test
@@ -29,12 +29,11 @@ nextflow_process {
             { assert process.success },
             { assert snapshot(
                 process.out.versions,
-                process.out.predictions,
-                // Hashing does not work due to this issue:
-                // https://github.com/scverse/anndataR/issues/272
-                file(process.out.h5ad.get(0).get(1)).exists(),
-                file(process.out.h5ad.get(0).get(1)).size()
+                process.out.predictions
             ).match() }
+            ,
+            { assert file(process.out.h5ad.get(0).get(1)).exists() },
+            { assert file(process.out.h5ad.get(0).get(1)).size() > 0 }
             )
         }
 
diff --git a/modules/local/doublet_detection/scdblfinder/tests/main.nf.test.snap b/modules/local/doublet_detection/scdblfinder/tests/main.nf.test.snap
index 721b5284..ba49cf65 100644
--- a/modules/local/doublet_detection/scdblfinder/tests/main.nf.test.snap
+++ b/modules/local/doublet_detection/scdblfinder/tests/main.nf.test.snap
@@ -109,11 +109,9 @@
                     },
                     "test_scdblfinder.csv:md5,130130ae215768e16e0df93a064dc5e9"
                 ]
-            ],
-            true,
-            5101352
+            ]
         ],
-        "timestamp": "2026-03-12T13:08:16.847676966",
+        "timestamp": "2026-03-13T12:03:27.014939887",
         "meta": {
             "nf-test": "0.9.4",
             "nextflow": "25.10.4"

From 9774828800af69041963b7a41d0f705d38b20aa6 Mon Sep 17 00:00:00 2001
From: KurayiChawatama <kurichawaz@gmail.com>
Date: Fri, 13 Mar 2026 12:18:37 +0300
Subject: [PATCH 18/28] add scdblfinder to input methods in doublet detection
 subworkflow test

---
 subworkflows/local/doublet_detection/tests/main.nf.test | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/subworkflows/local/doublet_detection/tests/main.nf.test b/subworkflows/local/doublet_detection/tests/main.nf.test
index 26fb4f2b..b8f0ef18 100644
--- a/subworkflows/local/doublet_detection/tests/main.nf.test
+++ b/subworkflows/local/doublet_detection/tests/main.nf.test
@@ -50,7 +50,7 @@ nextflow_workflow {
                         file(params.modules_testdata_base_path + 'genomics/homo_sapiens/scrnaseq/h5ad/SRR28679759_filtered_matrix.h5ad', checkIfExists: true)
                     ]
                 )
-                input[1] = ['scds', 'solo', 'scrublet']
+                input[1] = ['scds', 'solo', 'scrublet', 'scdblfinder']
                 input[2] = 2
                 input[3] = 1
                 """

From e32e393094310d743ba53dbdd6cfadb76ad69740 Mon Sep 17 00:00:00 2001
From: KurayiChawatama <kurichawaz@gmail.com>
Date: Fri, 13 Mar 2026 12:27:18 +0300
Subject: [PATCH 19/28] streamline renaming of scDblFinder columns with less
 clumsy code

---
 .../scdblfinder/templates/scdblfinder.R       | 22 +++++++------------
 1 file changed, 8 insertions(+), 14 deletions(-)

diff --git a/modules/local/doublet_detection/scdblfinder/templates/scdblfinder.R b/modules/local/doublet_detection/scdblfinder/templates/scdblfinder.R
index 6bc3540e..3e2dfd84 100644
--- a/modules/local/doublet_detection/scdblfinder/templates/scdblfinder.R
+++ b/modules/local/doublet_detection/scdblfinder/templates/scdblfinder.R
@@ -58,20 +58,14 @@ if (!is.null(original_cell_names) && length(original_cell_names) == ncol(sce)) {
 message("scDblFinder results summary:")
 print(table(sce\$scDblFinder.class))
 
-# Rename scDblFinder.* columns for consistency with other doublet methods
-scdbl_cols <- grep("^scDblFinder\\\\.", colnames(colData(sce)), value = TRUE)
-
-# First remove "scDblFinder." prefix, THEN replace remaining dots with underscores
-new_scdbl_cols <- paste0("scdblfinder_", gsub("\\\\.", "_", gsub("^scDblFinder\\\\.", "", scdbl_cols)))
-
-# Rename columns in colData(sce) - create new columns first, then delete old ones
-for (i in seq_along(scdbl_cols)) {
-  colData(sce)[[new_scdbl_cols[i]]] <- colData(sce)[[scdbl_cols[i]]]
-}
-# Now delete old columns
-for (col in scdbl_cols) {
-  colData(sce)[[col]] <- NULL
-}
+# Rename scDblFinder.* columns for consistency with other doublet methods.
+# Replace prefix first, then replace any remaining dots with underscores.
+idx <- grep("^scDblFinder\\\\.", colnames(colData(sce)))
+colnames(colData(sce))[idx] <- gsub(
+  "\\\\.",
+  "_",
+  sub("^scDblFinder\\\\.", "scdblfinder_", colnames(colData(sce))[idx])
+)
 
 # The doublet calls must stay keyed by the original cell barcodes. If they are not
 # present here, something went wrong during conversion or scDblFinder processing and

From addb64126710bd2d422d8268030a03c6746e860e Mon Sep 17 00:00:00 2001
From: KurayiChawatama <kurichawaz@gmail.com>
Date: Fri, 13 Mar 2026 12:36:19 +0300
Subject: [PATCH 20/28] removed explicit call of artifical doublet number in
 scdblfinder function

---
 .../doublet_detection/scdblfinder/templates/scdblfinder.R      | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/modules/local/doublet_detection/scdblfinder/templates/scdblfinder.R b/modules/local/doublet_detection/scdblfinder/templates/scdblfinder.R
index 3e2dfd84..0e4dc658 100644
--- a/modules/local/doublet_detection/scdblfinder/templates/scdblfinder.R
+++ b/modules/local/doublet_detection/scdblfinder/templates/scdblfinder.R
@@ -42,8 +42,7 @@ set.seed(123)
 sce <- scDblFinder(
     assays(sce)[[1]],
     BPPARAM = bp,
-    dbr = multiplet_rate,
-    artificialDoublets = n_cells
+    dbr = multiplet_rate
 )
 
 # Restore the input barcodes because running scDblFinder on the just the assay matrix above can

From 1ad461fb275c2608fa00af1f821880e29fc5c9d8 Mon Sep 17 00:00:00 2001
From: KurayiChawatama <kurichawaz@gmail.com>
Date: Fri, 13 Mar 2026 12:36:48 +0300
Subject: [PATCH 21/28] updated test snapshot to match previous commit

---
 .../doublet_detection/scdblfinder/tests/main.nf.test.snap     | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/modules/local/doublet_detection/scdblfinder/tests/main.nf.test.snap b/modules/local/doublet_detection/scdblfinder/tests/main.nf.test.snap
index ba49cf65..2c1f2b89 100644
--- a/modules/local/doublet_detection/scdblfinder/tests/main.nf.test.snap
+++ b/modules/local/doublet_detection/scdblfinder/tests/main.nf.test.snap
@@ -107,11 +107,11 @@
                     {
                         "id": "test"
                     },
-                    "test_scdblfinder.csv:md5,130130ae215768e16e0df93a064dc5e9"
+                    "test_scdblfinder.csv:md5,26628dd50c32c06df8fd1ffb973c9e3d"
                 ]
             ]
         ],
-        "timestamp": "2026-03-13T12:03:27.014939887",
+        "timestamp": "2026-03-13T12:35:33.96981645",
         "meta": {
             "nf-test": "0.9.4",
             "nextflow": "25.10.4"

From 63c0307fc41bc9f01a80246ccd4eba123e102a71 Mon Sep 17 00:00:00 2001
From: KurayiChawatama <kurichawaz@gmail.com>
Date: Fri, 13 Mar 2026 13:15:28 +0300
Subject: [PATCH 22/28] Enhance scDblFinder functionality and documentation

- Added optional `doublet_rate` column in input samplesheet for per-sample expected doublet rate in `scDblFinder`.
- Updated `scDblFinder` to utilize internal `dbr` estimation when `doublet_rate` is not provided.
- Modified input and output handling in `SCDBLFINDER` process to accommodate new `doublet_rate` parameter.
- Updated relevant documentation including CHANGELOG, README, and usage examples to reflect changes.
- Added tests to validate functionality with provided `doublet_rate`.
---
 CHANGELOG.md                                  |  3 +
 README.md                                     |  2 +
 assets/schema_input.json                      |  7 +++
 docs/usage.md                                 |  9 +--
 .../doublet_detection/scdblfinder/main.nf     |  2 +-
 .../doublet_detection/scdblfinder/meta.yml    |  5 ++
 .../scdblfinder/templates/scdblfinder.R       | 40 +++++-------
 .../scdblfinder/tests/main.nf.test            | 38 ++++++++++-
 .../scdblfinder/tests/main.nf.test.snap       | 63 +++++--------------
 ro-crate-metadata.json                        |  2 +-
 subworkflows/local/doublet_detection/main.nf  |  3 +-
 11 files changed, 96 insertions(+), 78 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 888d9ccd..a5b22f70 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -11,9 +11,12 @@ Initial release of nf-core/scdownstream, created with the [nf-core](https://nf-c
 
 - Added `singleR` module for automated cell type annotation.
 - Added `scDblFinder` module for doublet detection.
+- Added optional `doublet_rate` column in input samplesheet to provide per-sample expected doublet rate for `scDblFinder`.
 
 ### `Fixed`
 
+- Updated `scDblFinder` to use internal `dbr` estimation when `doublet_rate` is not provided, and to use provided `doublet_rate` when available.
+
 ### `Dependencies`
 
 ### `Deprecated`
diff --git a/README.md b/README.md
index 6edbd11e..5d987582 100644
--- a/README.md
+++ b/README.md
@@ -88,6 +88,8 @@ sample4,/absolute/path/to/sample3.csv
 Each entry represents a h5ad, h5, RDS or CSV file. RDS files may contain any object that can be converted to a SingleCellExperiment using the [Seurat `as.SingleCellExperiment`](https://satijalab.org/seurat/reference/as.singlecellexperiment) function.
 CSV files should contain a matrix with genes as columns and cells as rows. The first column should contain cell names/barcodes.
 
+For `scDblFinder`, you can optionally add a `doublet_rate` column (values between `0` and `1`) to the samplesheet. If omitted, `scDblFinder` estimates the doublet rate internally.
+
 -->
 
 Now, you can run the pipeline using:
diff --git a/assets/schema_input.json b/assets/schema_input.json
index dedad5d7..d2636e85 100644
--- a/assets/schema_input.json
+++ b/assets/schema_input.json
@@ -122,6 +122,13 @@
                 "errorMessage": "Number of cells expected from the experimental design, used as input to cellbender.",
                 "meta": ["expected_cells"]
             },
+            "doublet_rate": {
+                "type": "number",
+                "minimum": 0,
+                "maximum": 1,
+                "errorMessage": "doublet_rate must be a number between 0 and 1.",
+                "meta": ["doublet_rate"]
+            },
             "ambient_correction": {
                 "type": "boolean",
                 "default": true,
diff --git a/docs/usage.md b/docs/usage.md
index f635963e..8273218e 100644
--- a/docs/usage.md
+++ b/docs/usage.md
@@ -38,10 +38,10 @@ sample3,/absolute/path/to/sample3.csv
 There are a couple of optional columns that can be used for more advanced features:
 
 ```csv title="samplesheet.csv"
-sample,filtered,unfiltered,batch_col,label_col,condition_col,unknown_label,min_genes,min_cells,min_counts_cell,min_counts_gene,expected_cells,ambient_correction,ambient_corrected_integration
-sample1,/absolute/path/to/sample1_filtered.h5ad,/absolute/path/to/sample1.h5ad,batch,cell_type,condition,unknown,1,2,3,4,5000,true,false
-sample2,relative/path/to/sample2_filtered.rds,relative/path/to/sample2.rds,batch_id,annotation,condition,unannotated,5,6,7,8,3000,false,
-sample3,/absolute/path/to/sample3_filtered.csv,/absolute/path/to/sample3.csv,,,,,9,10,11,12,,true,true
+sample,filtered,unfiltered,batch_col,label_col,condition_col,unknown_label,min_genes,min_cells,min_counts_cell,min_counts_gene,expected_cells,doublet_rate,ambient_correction,ambient_corrected_integration
+sample1,/absolute/path/to/sample1_filtered.h5ad,/absolute/path/to/sample1.h5ad,batch,cell_type,condition,unknown,1,2,3,4,5000,0.08,true,false
+sample2,relative/path/to/sample2_filtered.rds,relative/path/to/sample2.rds,batch_id,annotation,condition,unannotated,5,6,7,8,3000,,false,
+sample3,/absolute/path/to/sample3_filtered.csv,/absolute/path/to/sample3.csv,,,,,9,10,11,12,,,true,true
 ```
 
 For CSV input files, specifying the `batch_col`, `label_col`, `condition_col`, and `unknown_label` columns will not have any effect, as no additional metadata is available in the CSV file.
@@ -63,6 +63,7 @@ For CSV input files, specifying the `batch_col`, `label_col`, `condition_col`, a
 | `min_counts_cell`               | Minimum number of counts required for a cell to be considered. Defaults to `1`.                                                                                                                                                                                                                                                                                                                                     |
 | `min_counts_gene`               | Minimum number of counts required for a gene to be considered. Defaults to `1`.                                                                                                                                                                                                                                                                                                                                     |
 | `expected_cells`                | Number of expected cells, used as input to CellBender for empty droplet detection.                                                                                                                                                                                                                                                                                                                                  |
+| `doublet_rate`                  | Optional expected doublet rate (0-1) for `scDblFinder`. If not provided, `scDblFinder` estimates it internally.                                                                                                                                                                                                                                                                                                    |
 | `max_mito_percentage`           | Maximum percentage of mitochondrial reads for a cell to be considered. Defaults to `100`.                                                                                                                                                                                                                                                                                                                           |
 | `ambient_correction`            | Whether to perform ambient RNA correction for this sample. Set to `true` to use the globally configured method, `false` to skip ambient correction for this sample. Defaults to `true`.                                                                                                                                                                                                                             |
 | `ambient_corrected_integration` | Whether to use ambient-corrected counts for integration for this sample. Set to `true` to use corrected counts in downstream integration, `false` to store them only as additional layers. Can override the global `--ambient_corrected_integration` parameter. Defaults to global setting.                                                                                                                         |
diff --git a/modules/local/doublet_detection/scdblfinder/main.nf b/modules/local/doublet_detection/scdblfinder/main.nf
index 8c9e691a..2f643d15 100644
--- a/modules/local/doublet_detection/scdblfinder/main.nf
+++ b/modules/local/doublet_detection/scdblfinder/main.nf
@@ -8,7 +8,7 @@ process SCDBLFINDER {
         'community.wave.seqera.io/library/bioconductor-anndatar_bioconductor-biocparallel_bioconductor-rhdf5_bioconductor-scdblfinder_pruned:0f9db6b0855861de' }"
 
     input:
-    tuple val(meta), path(h5ad)
+    tuple val(meta), path(h5ad), val(dbr)
 
     output:
     tuple val(meta), path("${prefix}.h5ad"), emit: h5ad
diff --git a/modules/local/doublet_detection/scdblfinder/meta.yml b/modules/local/doublet_detection/scdblfinder/meta.yml
index 33810388..566d28ec 100644
--- a/modules/local/doublet_detection/scdblfinder/meta.yml
+++ b/modules/local/doublet_detection/scdblfinder/meta.yml
@@ -27,6 +27,11 @@ input:
         pattern: "*.{h5ad}"
         ontologies:
           - edam: "http://edamontology.org/format_3590" # HDF5 format
+    - dbr:
+        type: number
+        description: |
+          Optional expected doublet rate (0-1). If null, scDblFinder estimates
+          the doublet rate internally.
 
 output:
   h5ad:
diff --git a/modules/local/doublet_detection/scdblfinder/templates/scdblfinder.R b/modules/local/doublet_detection/scdblfinder/templates/scdblfinder.R
index 0e4dc658..21d526dd 100644
--- a/modules/local/doublet_detection/scdblfinder/templates/scdblfinder.R
+++ b/modules/local/doublet_detection/scdblfinder/templates/scdblfinder.R
@@ -13,37 +13,31 @@ sce <- adata\$as_SingleCellExperiment()
 num_threads <- max(1L, as.integer("${task.cpus}"))
 bp <- MulticoreParam(workers = num_threads, RNGseed = 123)
 
-# 10 Genomics Doublet Rate calculator used to get multiplet rate if not provided
-# 10X multiplet rate table(https://rpubs.com/kenneditodd/doublet_finder_example)
-multiplet_rates_10x <- data.frame(
-  "Multiplet_rate" = c(0.004, 0.008, 0.0160, 0.023, 0.031,
-                        0.039, 0.046, 0.054, 0.061, 0.069, 0.076),
-  "Loaded_cells" = c(800, 1600, 3200, 4800, 6400, 8000, 9600,
-                     11200, 12800, 14400, 16000),
-  "Recovered_cells" = c(500, 1000, 2000, 3000, 4000, 5000, 6000,
-                        7000, 8000, 9000, 10000)
-)
-
-# Adjust to use the number of cells in the SCE object
-idx <- findInterval(ncol(sce), multiplet_rates_10x\$Recovered_cells)
-if (idx < 1L) idx <- 1L
-if (idx > nrow(multiplet_rates_10x)) idx <- nrow(multiplet_rates_10x)
-
-multiplet_rate <- as.numeric(multiplet_rates_10x\$Multiplet_rate[idx])
-message(paste0("Setting multiplet rate to ", multiplet_rate, " for ", ncol(sce), " cells"))
-
 # Save original cell names and count before overwriting sce
 original_cell_names <- colnames(sce)
-n_cells <- ncol(sce)
+
+# Parse per-sample doublet rate from Nextflow input. If unavailable, let
+# scDblFinder estimate dbr internally (recommended default for 10X data).
+dbr_raw <- trimws("${dbr}")
+dbr <- suppressWarnings(as.numeric(dbr_raw))
 
 # Run scDblFinder on the counts matrix (first assay)
 # scDblFinder creates artificial doublets internally and returns a new SCE
 set.seed(123)
-sce <- scDblFinder(
+if (!is.na(dbr)) {
+  message(paste0("Using provided doublet_rate (dbr): ", dbr))
+  sce <- scDblFinder(
     assays(sce)[[1]],
     BPPARAM = bp,
-    dbr = multiplet_rate
-)
+    dbr = dbr
+  )
+} else {
+  message("No valid doublet_rate provided; using scDblFinder internal dbr estimation")
+  sce <- scDblFinder(
+    assays(sce)[[1]],
+    BPPARAM = bp
+  )
+}
 
 # Restore the input barcodes because running scDblFinder on the just the assay matrix above can
 # return a new SCE whose column names no longer match the original AnnData cell IDs.
diff --git a/modules/local/doublet_detection/scdblfinder/tests/main.nf.test b/modules/local/doublet_detection/scdblfinder/tests/main.nf.test
index 8172a071..b0831c94 100644
--- a/modules/local/doublet_detection/scdblfinder/tests/main.nf.test
+++ b/modules/local/doublet_detection/scdblfinder/tests/main.nf.test
@@ -17,7 +17,8 @@ nextflow_process {
                 """
                 input[0] = channel.of([
                         [ id: 'test' ],
-                        file(params.modules_testdata_base_path + 'genomics/homo_sapiens/scrnaseq/h5ad/SRR28679759_filtered_matrix.h5ad', checkIfExists: true)
+                        file(params.modules_testdata_base_path + 'genomics/homo_sapiens/scrnaseq/h5ad/SRR28679759_filtered_matrix.h5ad', checkIfExists: true),
+                        null
                     ]
                 )
                 """
@@ -39,6 +40,38 @@ nextflow_process {
 
     }
 
+    test("Should run with provided doublet_rate") {
+
+        when {
+            params {
+                outdir = "$outputDir"
+            }
+            process {
+                """
+                input[0] = channel.of([
+                        [ id: 'test' ],
+                        file(params.modules_testdata_base_path + 'genomics/homo_sapiens/scrnaseq/h5ad/SRR28679759_filtered_matrix.h5ad', checkIfExists: true),
+                        0.08
+                    ]
+                )
+                """
+            }
+        }
+
+        then {
+            assertAll(
+            { assert process.success },
+            { assert snapshot(
+                process.out.versions,
+                process.out.predictions
+            ).match() },
+            { assert file(process.out.h5ad.get(0).get(1)).exists() },
+            { assert file(process.out.h5ad.get(0).get(1)).size() > 0 }
+            )
+        }
+
+    }
+
     test("Should run without failures - stub") {
 
         options '-stub'
@@ -51,7 +84,8 @@ nextflow_process {
                 """
                 input[0] = channel.of([
                         [ id: 'test' ],
-                        file(params.modules_testdata_base_path + 'genomics/homo_sapiens/scrnaseq/h5ad/SRR28679759_filtered_matrix.h5ad', checkIfExists: true)
+                        file(params.modules_testdata_base_path + 'genomics/homo_sapiens/scrnaseq/h5ad/SRR28679759_filtered_matrix.h5ad', checkIfExists: true),
+                        null
                     ]
                 )
                 """
diff --git a/modules/local/doublet_detection/scdblfinder/tests/main.nf.test.snap b/modules/local/doublet_detection/scdblfinder/tests/main.nf.test.snap
index 2c1f2b89..dda2e33e 100644
--- a/modules/local/doublet_detection/scdblfinder/tests/main.nf.test.snap
+++ b/modules/local/doublet_detection/scdblfinder/tests/main.nf.test.snap
@@ -1,5 +1,5 @@
 {
-    "homo_sapiens - h5ad - stub": {
+    "Should run without failures - stub": {
         "content": [
             {
                 "0": [
@@ -42,62 +42,33 @@
                 ]
             }
         ],
-        "timestamp": "2026-03-12T11:44:51.894263326",
+        "timestamp": "2026-03-12T12:34:23.397301125",
         "meta": {
             "nf-test": "0.9.4",
             "nextflow": "25.10.4"
         }
     },
-    "Should run without failures - stub": {
+    "Should run without failures": {
         "content": [
-            {
-                "0": [
-                    [
-                        {
-                            "id": "test"
-                        },
-                        "test_scdblfinder.h5ad:md5,d41d8cd98f00b204e9800998ecf8427e"
-                    ]
-                ],
-                "1": [
-                    [
-                        {
-                            "id": "test"
-                        },
-                        "test_scdblfinder.csv:md5,d41d8cd98f00b204e9800998ecf8427e"
-                    ]
-                ],
-                "2": [
-                    "versions.yml:md5,d41d8cd98f00b204e9800998ecf8427e"
-                ],
-                "h5ad": [
-                    [
-                        {
-                            "id": "test"
-                        },
-                        "test_scdblfinder.h5ad:md5,d41d8cd98f00b204e9800998ecf8427e"
-                    ]
-                ],
-                "predictions": [
-                    [
-                        {
-                            "id": "test"
-                        },
-                        "test_scdblfinder.csv:md5,d41d8cd98f00b204e9800998ecf8427e"
-                    ]
-                ],
-                "versions": [
-                    "versions.yml:md5,d41d8cd98f00b204e9800998ecf8427e"
+            [
+                "versions.yml:md5,ce056c78586769ad5433f5fbb86f70c7"
+            ],
+            [
+                [
+                    {
+                        "id": "test"
+                    },
+                    "test_scdblfinder.csv:md5,e92cd0219440b0caab1afb4b5b7f3e60"
                 ]
-            }
+            ]
         ],
-        "timestamp": "2026-03-12T12:34:23.397301125",
+        "timestamp": "2026-03-13T12:53:40.597040086",
         "meta": {
             "nf-test": "0.9.4",
             "nextflow": "25.10.4"
         }
     },
-    "Should run without failures": {
+    "Should run with provided doublet_rate": {
         "content": [
             [
                 "versions.yml:md5,ce056c78586769ad5433f5fbb86f70c7"
@@ -107,11 +78,11 @@
                     {
                         "id": "test"
                     },
-                    "test_scdblfinder.csv:md5,26628dd50c32c06df8fd1ffb973c9e3d"
+                    "test_scdblfinder.csv:md5,ad5d0bf6045f81b6a04980d1f522420e"
                 ]
             ]
         ],
-        "timestamp": "2026-03-13T12:35:33.96981645",
+        "timestamp": "2026-03-13T13:02:31.168458271",
         "meta": {
             "nf-test": "0.9.4",
             "nextflow": "25.10.4"
diff --git a/ro-crate-metadata.json b/ro-crate-metadata.json
index 07662401..d1e47b0c 100644
--- a/ro-crate-metadata.json
+++ b/ro-crate-metadata.json
@@ -23,7 +23,7 @@
             "@type": "Dataset",
             "creativeWorkStatus": "InProgress",
             "datePublished": "2025-11-20T09:32:29+00:00",
-            "description": "<h1>\n  <picture>\n    <source media=\"(prefers-color-scheme: dark)\" srcset=\"docs/images/nf-core-scdownstream_logo_dark.png\">\n    <img alt=\"nf-core/scdownstream\" src=\"docs/images/nf-core-scdownstream_logo_light.png\">\n  </picture>\n</h1>\n\n[![Open in GitHub Codespaces](https://img.shields.io/badge/Open_In_GitHub_Codespaces-black?labelColor=grey&logo=github)](https://github.com/codespaces/new/nf-core/scdownstream)\n[![GitHub Actions CI Status](https://github.com/nf-core/scdownstream/actions/workflows/nf-test.yml/badge.svg)](https://github.com/nf-core/scdownstream/actions/workflows/nf-test.yml)\n[![GitHub Actions Linting Status](https://github.com/nf-core/scdownstream/actions/workflows/linting.yml/badge.svg)](https://github.com/nf-core/scdownstream/actions/workflows/linting.yml)[![AWS CI](https://img.shields.io/badge/CI%20tests-full%20size-FF9900?labelColor=000000&logo=Amazon%20AWS)](https://nf-co.re/scdownstream/results)[![Cite with Zenodo](http://img.shields.io/badge/DOI-10.5281/zenodo.XXXXXXX-1073c8?labelColor=000000)](https://doi.org/10.5281/zenodo.XXXXXXX)\n[![nf-test](https://img.shields.io/badge/unit_tests-nf--test-337ab7.svg)](https://www.nf-test.com)\n\n[![Nextflow](https://img.shields.io/badge/version-%E2%89%A525.10.0-green?style=flat&logo=nextflow&logoColor=white&color=%230DC09D&link=https%3A%2F%2Fnextflow.io)](https://www.nextflow.io/)\n[![nf-core template version](https://img.shields.io/badge/nf--core_template-3.5.1-green?style=flat&logo=nfcore&logoColor=white&color=%2324B064&link=https%3A%2F%2Fnf-co.re)](https://github.com/nf-core/tools/releases/tag/3.5.1)\n[![run with conda](http://img.shields.io/badge/run%20with-conda-3EB049?labelColor=000000&logo=anaconda)](https://docs.conda.io/en/latest/)\n[![run with docker](https://img.shields.io/badge/run%20with-docker-0db7ed?labelColor=000000&logo=docker)](https://www.docker.com/)\n[![run with singularity](https://img.shields.io/badge/run%20with-singularity-1d355c.svg?labelColor=000000)](https://sylabs.io/docs/)\n[![Launch on Seqera Platform](https://img.shields.io/badge/Launch%20%F0%9F%9A%80-Seqera%20Platform-%234256e7)](https://cloud.seqera.io/launch?pipeline=https://github.com/nf-core/scdownstream)\n\n[![Get help on Slack](http://img.shields.io/badge/slack-nf--core%20%23scdownstream-4A154B?labelColor=000000&logo=slack)](https://nfcore.slack.com/channels/scdownstream)[![Follow on Bluesky](https://img.shields.io/badge/bluesky-%40nf__core-1185fe?labelColor=000000&logo=bluesky)](https://bsky.app/profile/nf-co.re)[![Follow on Mastodon](https://img.shields.io/badge/mastodon-nf__core-6364ff?labelColor=FFFFFF&logo=mastodon)](https://mstdn.science/@nf_core)[![Watch on YouTube](http://img.shields.io/badge/youtube-nf--core-FF0000?labelColor=000000&logo=youtube)](https://www.youtube.com/c/nf-core)\n\n## Introduction\n\n**nf-core/scdownstream** is a bioinformatics pipeline that can be used to process already quantified single-cell RNA-seq data. It takes a samplesheet and h5ad-, SingleCellExperiment/Seurat- or CSV files as input and performs quality control, integration, dimensionality reduction and clustering. It produces an integrated h5ad and SingleCellExperiment file and an extensive QC report.\n\nThe pipeline is based on the learnings and implementations from the following pipelines (alphabetical):\n\n- [panpipes](https://github.com/DendrouLab/panpipes)\n- [scFlow](https://combiz.github.io/scFlow/)\n- [scRAFIKI](https://github.com/Mye-InfoBank/scRAFIKI)\n- [YASCP](https://github.com/wtsi-hgi/yascp)\n\n# ![nf-core/scdownstream](docs/images/metromap.png)\n\nSteps marked with the boat icon are not yet implemented. For the other steps, the pipeline uses the following tools:\n\n1. Per-sample preprocessing\n   1. Convert all RDS files to h5ad format\n   2. Create filtered matrix (if not provided)\n   3. Present QC for raw counts ([`MultiQC`](http://multiqc.info/))\n   4. Remove ambient RNA\n      - [decontX](https://bioconductor.org/packages/release/bioc/html/decontX.html)\n      - [soupX](https://cran.r-project.org/web/packages/SoupX/readme/README.html)\n      - [cellbender](https://cellbender.readthedocs.io/en/latest/)\n      - [scAR](https://docs.scvi-tools.org/en/stable/user_guide/models/scar.html)\n   5. Apply user-defined QC filters (can be defined per sample in the samplesheet)\n   6. Doublet detection (Majority vote possible)\n      - [SOLO](https://docs.scvi-tools.org/en/stable/user_guide/models/solo.html)\n      - [scrublet](https://scanpy.readthedocs.io/en/stable/api/generated/scanpy.pp.scrublet.html)\n      - [DoubletDetection](https://doubletdetection.readthedocs.io/en/v2.5.2/doubletdetection.doubletdetection.html)\n      - [SCDS](https://bioconductor.org/packages/devel/bioc/vignettes/scds/inst/doc/scds.html)\n      - [scDblFinder](https://bioconductor.org/packages/release/bioc/html/scDblFinder.html)\n2. Sample aggregation\n   1. Merge into a single h5ad file\n   2. Present QC for merged counts ([`MultiQC`](http://multiqc.info/))\n   3. Integration\n      - [scVI](https://docs.scvi-tools.org/en/stable/user_guide/models/scvi.html)\n      - [scANVI](https://docs.scvi-tools.org/en/stable/user_guide/models/scanvi.html)\n      - [Harmony](https://portals.broadinstitute.org/harmony/articles/quickstart.html)\n      - [BBKNN](https://github.com/Teichlab/bbknn)\n      - [Combat](https://scanpy.readthedocs.io/en/latest/api/generated/scanpy.pp.combat.html)\n      - [Seurat](https://satijalab.org/seurat/articles/integration_introduction)\n3. Cell type annotation\n   - [celltypist](https://www.celltypist.org/)\n4. Clustering and dimensionality reduction\n   1. [Leiden clustering](https://scanpy.readthedocs.io/en/stable/generated/scanpy.tl.leiden.html)\n   2. [UMAP](https://scanpy.readthedocs.io/en/stable/generated/scanpy.tl.umap.html)\n5. Create report ([`MultiQC`](http://multiqc.info/))\n\n## Usage\n\n> [!NOTE]\n> If you are new to Nextflow and nf-core, please refer to [this page](https://nf-co.re/docs/usage/installation) on how to set-up Nextflow. Make sure to [test your setup](https://nf-co.re/docs/usage/introduction#how-to-run-a-pipeline) with `-profile test` before running the workflow on actual data.\n\n> [!NOTE]\n> If you are confused by the terms `filtered` and `unfiltered`, please check out the respective [documentation](https://nf-co.re/scdownstream/dev/docs/usage/#filtered-and-unfiltered-matrices).\n\nFirst, prepare a samplesheet with your input data that looks as follows:\n\n```csv title=\"samplesheet.csv\"\nsample,unfiltered\nsample1,/absolute/path/to/sample1.h5ad\nsample2,/absolute/path/to/sample3.h5\nsample3,relative/path/to/sample2.rds\nsample4,/absolute/path/to/sample3.csv\n```\n\nEach entry represents a h5ad, h5, RDS or CSV file. RDS files may contain any object that can be converted to a SingleCellExperiment using the [Seurat `as.SingleCellExperiment`](https://satijalab.org/seurat/reference/as.singlecellexperiment) function.\nCSV files should contain a matrix with genes as columns and cells as rows. The first column should contain cell names/barcodes.\n\n-->\n\nNow, you can run the pipeline using:\n\n```bash\nnextflow run nf-core/scdownstream \\\n   -profile <docker/singularity/.../institute> \\\n   --input samplesheet.csv \\\n   --outdir <OUTDIR>\n```\n\n> [!WARNING]\n> Please provide pipeline parameters via the CLI or Nextflow `-params-file` option. Custom config files including those provided by the `-c` Nextflow option can be used to provide any configuration _**except for parameters**_; see [docs](https://nf-co.re/docs/usage/getting_started/configuration#custom-configuration-files).\n\nFor more details and further functionality, please refer to the [usage documentation](https://nf-co.re/scdownstream/usage) and the [parameter documentation](https://nf-co.re/scdownstream/parameters).\n\n## Pipeline output\n\nTo see the results of an example test run with a full size dataset refer to the [results](https://nf-co.re/scdownstream/results) tab on the nf-core website pipeline page.\nFor more details about the output files and reports, please refer to the\n[output documentation](https://nf-co.re/scdownstream/output).\n\n## Credits\n\nnf-core/scdownstream was originally written by [Nico Trummer](https://github.com/nictru).\n\nWe thank the following people for their extensive assistance in the development of this pipeline (alphabetical):\n\n- [Fabian Rost](https://github.com/fbnrst)\n- [Fabiola Curion](https://github.com/bio-la)\n- [Gregor Sturm](https://github.com/grst)\n- [Jonathan Talbot-Martin](https://github.com/jtalbotmartin)\n- [Lukas Heumos](https://github.com/zethson)\n- [Matiss Ozols](https://github.com/maxozo)\n- [Nathan Skene](https://github.com/NathanSkene)\n- [Nurun Fancy](https://github.com/nfancy)\n- [Riley Grindle](https://github.com/Riley-Grindle)\n- [Ryan Seaman](https://github.com/RPSeaman)\n- [Steffen M\u00f6ller](https://github.com/smoe)\n- [Wojtek Sowinski](https://github.com/WojtekSowinski)\n\n## Contributions and Support\n\nIf you would like to contribute to this pipeline, please see the [contributing guidelines](.github/CONTRIBUTING.md).\n\nFor further information or help, don't hesitate to get in touch on the [Slack `#scdownstream` channel](https://nfcore.slack.com/channels/scdownstream) (you can join with [this invite](https://nf-co.re/join/slack)).\n\n## Citations\n\n<!-- TODO nf-core: Add citation for pipeline after first release. Uncomment lines below and update Zenodo doi and badge at the top of this file. -->\n<!-- If you use nf-core/scdownstream for your analysis, please cite it using the following doi: [10.5281/zenodo.XXXXXX](https://doi.org/10.5281/zenodo.XXXXXX) -->\n\n<!-- TODO nf-core: Add bibliography of tools and data used in your pipeline -->\n\nAn extensive list of references for the tools used by the pipeline can be found in the [`CITATIONS.md`](CITATIONS.md) file.\n\nYou can cite the `nf-core` publication as follows:\n\n> **The nf-core framework for community-curated bioinformatics pipelines.**\n>\n> Philip Ewels, Alexander Peltzer, Sven Fillinger, Harshil Patel, Johannes Alneberg, Andreas Wilm, Maxime Ulysse Garcia, Paolo Di Tommaso & Sven Nahnsen.\n>\n> _Nat Biotechnol._ 2020 Feb 13. doi: [10.1038/s41587-020-0439-x](https://dx.doi.org/10.1038/s41587-020-0439-x).\n",
+            "description": "<h1>\n  <picture>\n    <source media=\"(prefers-color-scheme: dark)\" srcset=\"docs/images/nf-core-scdownstream_logo_dark.png\">\n    <img alt=\"nf-core/scdownstream\" src=\"docs/images/nf-core-scdownstream_logo_light.png\">\n  </picture>\n</h1>\n\n[![Open in GitHub Codespaces](https://img.shields.io/badge/Open_In_GitHub_Codespaces-black?labelColor=grey&logo=github)](https://github.com/codespaces/new/nf-core/scdownstream)\n[![GitHub Actions CI Status](https://github.com/nf-core/scdownstream/actions/workflows/nf-test.yml/badge.svg)](https://github.com/nf-core/scdownstream/actions/workflows/nf-test.yml)\n[![GitHub Actions Linting Status](https://github.com/nf-core/scdownstream/actions/workflows/linting.yml/badge.svg)](https://github.com/nf-core/scdownstream/actions/workflows/linting.yml)[![AWS CI](https://img.shields.io/badge/CI%20tests-full%20size-FF9900?labelColor=000000&logo=Amazon%20AWS)](https://nf-co.re/scdownstream/results)[![Cite with Zenodo](http://img.shields.io/badge/DOI-10.5281/zenodo.XXXXXXX-1073c8?labelColor=000000)](https://doi.org/10.5281/zenodo.XXXXXXX)\n[![nf-test](https://img.shields.io/badge/unit_tests-nf--test-337ab7.svg)](https://www.nf-test.com)\n\n[![Nextflow](https://img.shields.io/badge/version-%E2%89%A525.10.0-green?style=flat&logo=nextflow&logoColor=white&color=%230DC09D&link=https%3A%2F%2Fnextflow.io)](https://www.nextflow.io/)\n[![nf-core template version](https://img.shields.io/badge/nf--core_template-3.5.1-green?style=flat&logo=nfcore&logoColor=white&color=%2324B064&link=https%3A%2F%2Fnf-co.re)](https://github.com/nf-core/tools/releases/tag/3.5.1)\n[![run with conda](http://img.shields.io/badge/run%20with-conda-3EB049?labelColor=000000&logo=anaconda)](https://docs.conda.io/en/latest/)\n[![run with docker](https://img.shields.io/badge/run%20with-docker-0db7ed?labelColor=000000&logo=docker)](https://www.docker.com/)\n[![run with singularity](https://img.shields.io/badge/run%20with-singularity-1d355c.svg?labelColor=000000)](https://sylabs.io/docs/)\n[![Launch on Seqera Platform](https://img.shields.io/badge/Launch%20%F0%9F%9A%80-Seqera%20Platform-%234256e7)](https://cloud.seqera.io/launch?pipeline=https://github.com/nf-core/scdownstream)\n\n[![Get help on Slack](http://img.shields.io/badge/slack-nf--core%20%23scdownstream-4A154B?labelColor=000000&logo=slack)](https://nfcore.slack.com/channels/scdownstream)[![Follow on Bluesky](https://img.shields.io/badge/bluesky-%40nf__core-1185fe?labelColor=000000&logo=bluesky)](https://bsky.app/profile/nf-co.re)[![Follow on Mastodon](https://img.shields.io/badge/mastodon-nf__core-6364ff?labelColor=FFFFFF&logo=mastodon)](https://mstdn.science/@nf_core)[![Watch on YouTube](http://img.shields.io/badge/youtube-nf--core-FF0000?labelColor=000000&logo=youtube)](https://www.youtube.com/c/nf-core)\n\n## Introduction\n\n**nf-core/scdownstream** is a bioinformatics pipeline that can be used to process already quantified single-cell RNA-seq data. It takes a samplesheet and h5ad-, SingleCellExperiment/Seurat- or CSV files as input and performs quality control, integration, dimensionality reduction and clustering. It produces an integrated h5ad and SingleCellExperiment file and an extensive QC report.\n\nThe pipeline is based on the learnings and implementations from the following pipelines (alphabetical):\n\n- [panpipes](https://github.com/DendrouLab/panpipes)\n- [scFlow](https://combiz.github.io/scFlow/)\n- [scRAFIKI](https://github.com/Mye-InfoBank/scRAFIKI)\n- [YASCP](https://github.com/wtsi-hgi/yascp)\n\n# ![nf-core/scdownstream](docs/images/metromap.png)\n\nSteps marked with the boat icon are not yet implemented. For the other steps, the pipeline uses the following tools:\n\n1. Per-sample preprocessing\n   1. Convert all RDS files to h5ad format\n   2. Create filtered matrix (if not provided)\n   3. Present QC for raw counts ([`MultiQC`](http://multiqc.info/))\n   4. Remove ambient RNA\n      - [decontX](https://bioconductor.org/packages/release/bioc/html/decontX.html)\n      - [soupX](https://cran.r-project.org/web/packages/SoupX/readme/README.html)\n      - [cellbender](https://cellbender.readthedocs.io/en/latest/)\n      - [scAR](https://docs.scvi-tools.org/en/stable/user_guide/models/scar.html)\n   5. Apply user-defined QC filters (can be defined per sample in the samplesheet)\n   6. Doublet detection (Majority vote possible)\n      - [SOLO](https://docs.scvi-tools.org/en/stable/user_guide/models/solo.html)\n      - [scrublet](https://scanpy.readthedocs.io/en/stable/api/generated/scanpy.pp.scrublet.html)\n      - [DoubletDetection](https://doubletdetection.readthedocs.io/en/v2.5.2/doubletdetection.doubletdetection.html)\n      - [SCDS](https://bioconductor.org/packages/devel/bioc/vignettes/scds/inst/doc/scds.html)\n      - [scDblFinder](https://bioconductor.org/packages/release/bioc/html/scDblFinder.html)\n2. Sample aggregation\n   1. Merge into a single h5ad file\n   2. Present QC for merged counts ([`MultiQC`](http://multiqc.info/))\n   3. Integration\n      - [scVI](https://docs.scvi-tools.org/en/stable/user_guide/models/scvi.html)\n      - [scANVI](https://docs.scvi-tools.org/en/stable/user_guide/models/scanvi.html)\n      - [Harmony](https://portals.broadinstitute.org/harmony/articles/quickstart.html)\n      - [BBKNN](https://github.com/Teichlab/bbknn)\n      - [Combat](https://scanpy.readthedocs.io/en/latest/api/generated/scanpy.pp.combat.html)\n      - [Seurat](https://satijalab.org/seurat/articles/integration_introduction)\n3. Cell type annotation\n   - [celltypist](https://www.celltypist.org/)\n4. Clustering and dimensionality reduction\n   1. [Leiden clustering](https://scanpy.readthedocs.io/en/stable/generated/scanpy.tl.leiden.html)\n   2. [UMAP](https://scanpy.readthedocs.io/en/stable/generated/scanpy.tl.umap.html)\n5. Create report ([`MultiQC`](http://multiqc.info/))\n\n## Usage\n\n> [!NOTE]\n> If you are new to Nextflow and nf-core, please refer to [this page](https://nf-co.re/docs/usage/installation) on how to set-up Nextflow. Make sure to [test your setup](https://nf-co.re/docs/usage/introduction#how-to-run-a-pipeline) with `-profile test` before running the workflow on actual data.\n\n> [!NOTE]\n> If you are confused by the terms `filtered` and `unfiltered`, please check out the respective [documentation](https://nf-co.re/scdownstream/dev/docs/usage/#filtered-and-unfiltered-matrices).\n\nFirst, prepare a samplesheet with your input data that looks as follows:\n\n```csv title=\"samplesheet.csv\"\nsample,unfiltered\nsample1,/absolute/path/to/sample1.h5ad\nsample2,/absolute/path/to/sample3.h5\nsample3,relative/path/to/sample2.rds\nsample4,/absolute/path/to/sample3.csv\n```\n\nEach entry represents a h5ad, h5, RDS or CSV file. RDS files may contain any object that can be converted to a SingleCellExperiment using the [Seurat `as.SingleCellExperiment`](https://satijalab.org/seurat/reference/as.singlecellexperiment) function.\nCSV files should contain a matrix with genes as columns and cells as rows. The first column should contain cell names/barcodes.\n\nFor `scDblFinder`, you can optionally add a `doublet_rate` column (values between `0` and `1`) to the samplesheet. If omitted, `scDblFinder` estimates the doublet rate internally.\n\n-->\n\nNow, you can run the pipeline using:\n\n```bash\nnextflow run nf-core/scdownstream \\\n   -profile <docker/singularity/.../institute> \\\n   --input samplesheet.csv \\\n   --outdir <OUTDIR>\n```\n\n> [!WARNING]\n> Please provide pipeline parameters via the CLI or Nextflow `-params-file` option. Custom config files including those provided by the `-c` Nextflow option can be used to provide any configuration _**except for parameters**_; see [docs](https://nf-co.re/docs/usage/getting_started/configuration#custom-configuration-files).\n\nFor more details and further functionality, please refer to the [usage documentation](https://nf-co.re/scdownstream/usage) and the [parameter documentation](https://nf-co.re/scdownstream/parameters).\n\n## Pipeline output\n\nTo see the results of an example test run with a full size dataset refer to the [results](https://nf-co.re/scdownstream/results) tab on the nf-core website pipeline page.\nFor more details about the output files and reports, please refer to the\n[output documentation](https://nf-co.re/scdownstream/output).\n\n## Credits\n\nnf-core/scdownstream was originally written by [Nico Trummer](https://github.com/nictru).\n\nWe thank the following people for their extensive assistance in the development of this pipeline (alphabetical):\n\n- [Fabian Rost](https://github.com/fbnrst)\n- [Fabiola Curion](https://github.com/bio-la)\n- [Gregor Sturm](https://github.com/grst)\n- [Jonathan Talbot-Martin](https://github.com/jtalbotmartin)\n- [Lukas Heumos](https://github.com/zethson)\n- [Matiss Ozols](https://github.com/maxozo)\n- [Nathan Skene](https://github.com/NathanSkene)\n- [Nurun Fancy](https://github.com/nfancy)\n- [Riley Grindle](https://github.com/Riley-Grindle)\n- [Ryan Seaman](https://github.com/RPSeaman)\n- [Steffen M\u00f6ller](https://github.com/smoe)\n- [Wojtek Sowinski](https://github.com/WojtekSowinski)\n\n## Contributions and Support\n\nIf you would like to contribute to this pipeline, please see the [contributing guidelines](.github/CONTRIBUTING.md).\n\nFor further information or help, don't hesitate to get in touch on the [Slack `#scdownstream` channel](https://nfcore.slack.com/channels/scdownstream) (you can join with [this invite](https://nf-co.re/join/slack)).\n\n## Citations\n\n<!-- TODO nf-core: Add citation for pipeline after first release. Uncomment lines below and update Zenodo doi and badge at the top of this file. -->\n<!-- If you use nf-core/scdownstream for your analysis, please cite it using the following doi: [10.5281/zenodo.XXXXXX](https://doi.org/10.5281/zenodo.XXXXXX) -->\n\n<!-- TODO nf-core: Add bibliography of tools and data used in your pipeline -->\n\nAn extensive list of references for the tools used by the pipeline can be found in the [`CITATIONS.md`](CITATIONS.md) file.\n\nYou can cite the `nf-core` publication as follows:\n\n> **The nf-core framework for community-curated bioinformatics pipelines.**\n>\n> Philip Ewels, Alexander Peltzer, Sven Fillinger, Harshil Patel, Johannes Alneberg, Andreas Wilm, Maxime Ulysse Garcia, Paolo Di Tommaso & Sven Nahnsen.\n>\n> _Nat Biotechnol._ 2020 Feb 13. doi: [10.1038/s41587-020-0439-x](https://dx.doi.org/10.1038/s41587-020-0439-x).\n",
             "hasPart": [
                 {
                     "@id": "main.nf"
diff --git a/subworkflows/local/doublet_detection/main.nf b/subworkflows/local/doublet_detection/main.nf
index cc195cfc..e75056ad 100644
--- a/subworkflows/local/doublet_detection/main.nf
+++ b/subworkflows/local/doublet_detection/main.nf
@@ -21,6 +21,7 @@ workflow DOUBLET_DETECTION {
         log.info("DOUBLET_DETECTION: Not performed since no methods selected.")
     } else {
         ch_batch_col = ch_h5ad.map { meta, _h5ad -> meta.batch_col }
+        ch_h5ad_doublet_rate = ch_h5ad.map { meta, h5ad -> [meta, h5ad, meta.doublet_rate] }
 
         if (methods.contains('scds')) {
             SCDS (
@@ -59,7 +60,7 @@ workflow DOUBLET_DETECTION {
 
         if (methods.contains('scdblfinder')) {
             SCDBLFINDER (
-                ch_h5ad
+                ch_h5ad_doublet_rate
             )
             ch_predictions = ch_predictions.mix(SCDBLFINDER.out.predictions)
             ch_versions = ch_versions.mix(SCDBLFINDER.out.versions)

From ee63725dbf8b07c3be8d4eefec3ab0819f4d653a Mon Sep 17 00:00:00 2001
From: nf-core-bot <core@nf-co.re>
Date: Fri, 13 Mar 2026 10:32:41 +0000
Subject: [PATCH 23/28] [automated] Fix code linting

---
 docs/usage.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/usage.md b/docs/usage.md
index 8273218e..d5dd5c52 100644
--- a/docs/usage.md
+++ b/docs/usage.md
@@ -63,7 +63,7 @@ For CSV input files, specifying the `batch_col`, `label_col`, `condition_col`, a
 | `min_counts_cell`               | Minimum number of counts required for a cell to be considered. Defaults to `1`.                                                                                                                                                                                                                                                                                                                                     |
 | `min_counts_gene`               | Minimum number of counts required for a gene to be considered. Defaults to `1`.                                                                                                                                                                                                                                                                                                                                     |
 | `expected_cells`                | Number of expected cells, used as input to CellBender for empty droplet detection.                                                                                                                                                                                                                                                                                                                                  |
-| `doublet_rate`                  | Optional expected doublet rate (0-1) for `scDblFinder`. If not provided, `scDblFinder` estimates it internally.                                                                                                                                                                                                                                                                                                    |
+| `doublet_rate`                  | Optional expected doublet rate (0-1) for `scDblFinder`. If not provided, `scDblFinder` estimates it internally.                                                                                                                                                                                                                                                                                                     |
 | `max_mito_percentage`           | Maximum percentage of mitochondrial reads for a cell to be considered. Defaults to `100`.                                                                                                                                                                                                                                                                                                                           |
 | `ambient_correction`            | Whether to perform ambient RNA correction for this sample. Set to `true` to use the globally configured method, `false` to skip ambient correction for this sample. Defaults to `true`.                                                                                                                                                                                                                             |
 | `ambient_corrected_integration` | Whether to use ambient-corrected counts for integration for this sample. Set to `true` to use corrected counts in downstream integration, `false` to store them only as additional layers. Can override the global `--ambient_corrected_integration` parameter. Defaults to global setting.                                                                                                                         |

From 6480259e53f23c933e8bd8a14a86f707091632cf Mon Sep 17 00:00:00 2001
From: Kurayi Chawatama <142725139+KurayiChawatama@users.noreply.github.com>
Date: Fri, 13 Mar 2026 13:46:08 +0300
Subject: [PATCH 24/28] change other doublet detection methods to use mix

Co-authored-by: Copilot Autofix powered by AI <175728472+Copilot@users.noreply.github.com>
---
 subworkflows/local/doublet_detection/main.nf | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/subworkflows/local/doublet_detection/main.nf b/subworkflows/local/doublet_detection/main.nf
index e75056ad..81e26962 100644
--- a/subworkflows/local/doublet_detection/main.nf
+++ b/subworkflows/local/doublet_detection/main.nf
@@ -28,7 +28,7 @@ workflow DOUBLET_DETECTION {
                 ch_h5ad
             )
             ch_predictions = ch_predictions.mix(SCDS.out.predictions)
-            ch_versions = SCDS.out.versions
+            ch_versions = ch_versions.mix(SCDS.out.versions)
         }
 
         if (methods.contains('solo')) {
@@ -38,7 +38,7 @@ workflow DOUBLET_DETECTION {
                 scvi_max_epochs ?: []
             )
             ch_predictions = ch_predictions.mix(SCVITOOLS_SOLO.out.predictions)
-            ch_versions = SCVITOOLS_SOLO.out.versions
+            ch_versions = ch_versions.mix(SCVITOOLS_SOLO.out.versions)
         }
 
         if (methods.contains('scrublet')) {
@@ -47,7 +47,7 @@ workflow DOUBLET_DETECTION {
                 ch_batch_col
             )
             ch_predictions = ch_predictions.mix(SCANPY_SCRUBLET.out.predictions)
-            ch_versions = SCANPY_SCRUBLET.out.versions
+            ch_versions = ch_versions.mix(SCANPY_SCRUBLET.out.versions)
         }
 
         if (methods.contains('doubletdetection')) {
@@ -55,7 +55,7 @@ workflow DOUBLET_DETECTION {
                 ch_h5ad
             )
             ch_predictions = ch_predictions.mix(DOUBLETDETECTION.out.predictions)
-            ch_versions = DOUBLETDETECTION.out.versions
+            ch_versions = ch_versions.mix(DOUBLETDETECTION.out.versions)
         }
 
         if (methods.contains('scdblfinder')) {

From 4b5b2b3fa1735bf7bf75042543626272519d610d Mon Sep 17 00:00:00 2001
From: KurayiChawatama <kurichawaz@gmail.com>
Date: Fri, 13 Mar 2026 13:45:11 +0300
Subject: [PATCH 25/28] Remove redundant restoration of original cell barcodes
 in scDblFinder processing

---
 .../doublet_detection/scdblfinder/templates/scdblfinder.R | 8 --------
 1 file changed, 8 deletions(-)

diff --git a/modules/local/doublet_detection/scdblfinder/templates/scdblfinder.R b/modules/local/doublet_detection/scdblfinder/templates/scdblfinder.R
index 21d526dd..190743ee 100644
--- a/modules/local/doublet_detection/scdblfinder/templates/scdblfinder.R
+++ b/modules/local/doublet_detection/scdblfinder/templates/scdblfinder.R
@@ -39,14 +39,6 @@ if (!is.na(dbr)) {
   )
 }
 
-# Restore the input barcodes because running scDblFinder on the just the assay matrix above can
-# return a new SCE whose column names no longer match the original AnnData cell IDs.
-# Keeping the original names is required so the output h5ad obs_names and CSV rows
-# still map back to the same cells seen by downstream steps.
-if (!is.null(original_cell_names) && length(original_cell_names) == ncol(sce)) {
-    colnames(sce) <- original_cell_names
-}
-
 # Generate a summary table
 message("scDblFinder results summary:")
 print(table(sce\$scDblFinder.class))

From ee1d883846a6148eeddeb2d33ba86ae92e5cd06e Mon Sep 17 00:00:00 2001
From: KurayiChawatama <kurichawaz@gmail.com>
Date: Fri, 13 Mar 2026 14:13:08 +0300
Subject: [PATCH 26/28] Remove unnecessary comment about RNG seed parameter in
 scDblFinder script

---
 .../local/doublet_detection/scdblfinder/templates/scdblfinder.R  | 1 -
 1 file changed, 1 deletion(-)

diff --git a/modules/local/doublet_detection/scdblfinder/templates/scdblfinder.R b/modules/local/doublet_detection/scdblfinder/templates/scdblfinder.R
index 190743ee..d930fefd 100644
--- a/modules/local/doublet_detection/scdblfinder/templates/scdblfinder.R
+++ b/modules/local/doublet_detection/scdblfinder/templates/scdblfinder.R
@@ -9,7 +9,6 @@ library(anndataR)
 adata <- read_h5ad("${h5ad}")
 sce <- adata\$as_SingleCellExperiment()
 
-# Set the param to a specified RNG seed for reproducibility
 num_threads <- max(1L, as.integer("${task.cpus}"))
 bp <- MulticoreParam(workers = num_threads, RNGseed = 123)
 

From a2fe8ab88721a0f890ea8c6ceab95bfacb647d24 Mon Sep 17 00:00:00 2001
From: KurayiChawatama <kurichawaz@gmail.com>
Date: Fri, 13 Mar 2026 14:20:41 +0300
Subject: [PATCH 27/28] Refactor doublet rate handling in scDblFinder to
 streamline logic and improve readability

---
 .../scdblfinder/templates/scdblfinder.R       | 22 +++++++++----------
 1 file changed, 10 insertions(+), 12 deletions(-)

diff --git a/modules/local/doublet_detection/scdblfinder/templates/scdblfinder.R b/modules/local/doublet_detection/scdblfinder/templates/scdblfinder.R
index d930fefd..50edc27c 100644
--- a/modules/local/doublet_detection/scdblfinder/templates/scdblfinder.R
+++ b/modules/local/doublet_detection/scdblfinder/templates/scdblfinder.R
@@ -23,21 +23,19 @@ dbr <- suppressWarnings(as.numeric(dbr_raw))
 # Run scDblFinder on the counts matrix (first assay)
 # scDblFinder creates artificial doublets internally and returns a new SCE
 set.seed(123)
-if (!is.na(dbr)) {
-  message(paste0("Using provided doublet_rate (dbr): ", dbr))
-  sce <- scDblFinder(
-    assays(sce)[[1]],
-    BPPARAM = bp,
-    dbr = dbr
-  )
-} else {
+if (is.na(dbr)) {
   message("No valid doublet_rate provided; using scDblFinder internal dbr estimation")
-  sce <- scDblFinder(
-    assays(sce)[[1]],
-    BPPARAM = bp
-  )
+  dbr <- NULL
+} else {
+  message(paste0("Using provided doublet_rate (dbr): ", dbr))
 }
 
+sce <- scDblFinder(
+  assays(sce)[[1]],
+  BPPARAM = bp,
+  dbr = dbr
+)
+
 # Generate a summary table
 message("scDblFinder results summary:")
 print(table(sce\$scDblFinder.class))

From 6194ca488b26b5ae7e13664cf63258e950e02a93 Mon Sep 17 00:00:00 2001
From: KurayiChawatama <kurichawaz@gmail.com>
Date: Fri, 13 Mar 2026 14:32:30 +0300
Subject: [PATCH 28/28] Fix regex pattern for doublet detection tool options in
 nextflow_schema.json

---
 nextflow_schema.json | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/nextflow_schema.json b/nextflow_schema.json
index 2233d8a6..545f8f8d 100644
--- a/nextflow_schema.json
+++ b/nextflow_schema.json
@@ -106,7 +106,7 @@
                     "default": "scrublet",
                     "description": "Specify the tools to use for doublet detection. Setting to 'none' will skip this step",
                     "help_text": "If you want to use multiple tools, separate them with a comma. Available methods are: solo, scrublet, doubletdetection, scds, scdblfinder",
-                    "pattern": "^(none|((solo|scrublet|doubletdetection|scds|scdblfinder)?,?)*[^,]+$)"
+                    "pattern": "^(none|(solo|scrublet|doubletdetection|scds|scdblfinder)(,(solo|scrublet|doubletdetection|scds|scdblfinder))*)$"
                 },
                 "doublet_detection_threshold": {
                     "type": "integer",