samhornstein · samhornstein · May 13, 2026 · May 13, 2026 · May 13, 2026 · May 15, 2026
diff --git a/.gitignore b/.gitignore
@@ -4,3 +4,6 @@ envs/amrplusplus-update.sif
 logs/
 .nextflow*
 nextflow-24.10.9-dist
+trace-*.txt
+wb/config/wb.env
+wb/config/gcp.env
diff --git a/bin/kraken2_long_to_wide_update.py b/bin/kraken2_long_to_wide_update.py
@@ -13,28 +13,30 @@
     'R1':0,
     'R2':1,
     'R3':2,
-    'K': 3,
-    'P': 4,
-    'C': 5,
-    'O': 6,
-    'F': 7,
-    'G': 8,
-    'S': 9,
-    'U': 10
+    'D': 3,
+    'K': 4,
+    'P': 5,
+    'C': 6,
+    'O': 7,
+    'F': 8,
+    'G': 9,
+    'S': 10,
+    'U': 11
 }
 
 taxa_level_names = {
     0: 'Root1',
     1: 'Root2',
     2: 'Root3',
-    3: 'Kingdom',
-    4: 'Phylum',
-    5: 'Class',
-    6: 'Order',
-    7: 'Family',
-    8: 'Genus',
-    9: 'Species',
-    10: 'Unclassified'
+    3: 'Domain',
+    4: 'Kingdom',
+    5: 'Phylum',
+    6: 'Class',
+    7: 'Order',
+    8: 'Family',
+    9: 'Genus',
+    10: 'Species',
+    11: 'Unclassified'
 }
 
 

diff --git a/config/google_batch.config b/config/google_batch.config
@@ -10,30 +10,58 @@
  */
 
 process {
-    // Default for all processes
     cpus = 4
     memory = '16 GB'
     machineType = 'n2-standard-4'
 
     cache = 'lenient'
-    executor = 'google-batch'
 
     withName: 'runqc' {
         cpus = 16
-        memory = "64.GB"
-        machineType = "n2-standard-16"
+        memory = '64 GB'
+        machineType = 'n2-standard-16'
         containerOptions = '--env _JAVA_OPTIONS="-Xmx60g"'
     }
 
-    withName: 'bwa_align' {
+    withName: 'bowtie2_index' {
+        cpus = 8
+        memory = '64 GB'
+        machineType = 'n2-highmem-8'
+    }
+
+    withName: 'index' {
+        cpus = 8
+        memory = '64 GB'
+        machineType = 'n2-highmem-8'
+    }
+
+    withName: 'bowtie2_align' {
+        cpus = 32
+        memory = '128 GB'
+        machineType = 'n2-standard-32'
+    }
+
+    withName: 'bowtie2_rm_contaminant_fq' {
         cpus = 32
-        memory = "256.GB"
-        machineType = "n2-highmem-32"
+        memory = '256 GB'
+        machineType = 'n2-highmem-32'
+    }
+
+    withName: 'bwa_align' {
+        cpus = 16
+        memory = '128 GB'
+        machineType = 'n2-highmem-32'
     }
 
     withName: 'runkraken' {
         cpus = 16
-        memory = "256.GB"
-        machineType = "n2-highmem-32"
+        memory = '256 GB'
+        machineType = 'n2-highmem-32'
+    }
+
+    withName: 'runkrakenInterleaved' {
+        cpus = 16
+        memory = '256 GB'
+        machineType = 'n2-highmem-32'
     }
 }
diff --git a/docs/workbench.md b/docs/workbench.md
@@ -0,0 +1,140 @@
+# FloRes on Verily Workbench
+
+**Prerequisites**:
+- You must create a Workbench workspace where you have **ADMIN** permissions
+- All setup and execution must be done within this workspace
+
+## Dependencies
+
+- **Verily Workbench CLI** (`wb`) - Workbench command-line tool
+- **Google Cloud SDK** (`gcloud`) - GCP command-line tool
+- **Docker** - For building and pushing container images (must be running)
+- **Nextflow v24** - Workflow orchestration (installed in Workbench app)
+  - **Note**: v25 has breaking changes and is not compatible with this pipeline
+
+## Quick Start: Workbench Orchestration with Google Batch
+
+This guide walks through setting up and running FloRes with Workbench orchestration and Google Batch compute. The setup is split between local commands (for infrastructure) and Workbench app commands (for execution).
+
+### Step 1: Create Workspace and App
+
+Create a new workspace and app in the Workbench UI (or use the CLI if preferred).
+
+### Step 2: Local Setup
+
+Run these commands on your **local machine**:
+
+```bash
+# Set your active workspace (replace with your workspace ID)
+wb workspace set --id=your-workspace-id
+
+# Copy the Workbench environment template
+cp wb/config/wb.env.template wb/config/wb.env
+```
+
+Edit `wb/config/wb.env` and set the user-defined variables:
+- `GCS_BUCKET`: Your Workbench GCS bucket resource ID (e.g., `nf-output`)
+- `GCS_REF_BUCKET`: Bucket containing reference genomes (e.g., `referencegenomes-wb-my-workspace-1234`)
+- `GCS_BUCKET_LOCATION`: Region (default: `us-central1`)
+- `GOOGLE_ARTIFACT_REPO`: Your artifact registry repo (e.g., `nextflow-containers`)
+
+**Note**: Project IDs, service accounts, and registry paths are automatically determined from your `gcloud` and `wb` CLI configurations.
+
+Then run:
+
+```bash
+# Set up infrastructure (creates buckets, service accounts, etc.)
+./wb/setup_infra.sh wb
+
+# Upload input data and reference databases to GCS
+./wb/upload_data.sh wb
+
+# Build Docker image and push to Artifact Registry
+# NOTE: Docker must be running before executing this command
+./wb/build.sh --env wb --push
+```
+
+### Step 3: Workbench App Setup
+
+Open your Workbench app, launch the Terminal, and run:
+
+```bash
+# Clone the repository
+cd repos/ && git clone https://github.com/passdan/FloRes.git && cd FloRes/
+
+# Copy the environment template
+cp wb/config/wb.env.template wb/config/wb.env
+```
+
+Now copy your local `wb/config/wb.env` configuration into the Workbench app.
+
+### Step 4: Run the Pipeline
+
+```bash
+./wb/run.sh --env wb
+```
+
+Results will be stored in your configured GCS bucket.
+
+**Known Issues**:
+- The `gcloud storage cp` command may not correctly resolve Workbench resource names to full `gs://` paths when running `upload_data.sh` or `run.sh`. If you encounter path resolution issues, manually specify the full GCS bucket path in your `wb.env` configuration.
+
+---
+
+## Alternative: Quick Demo in Workbench JupyterLab
+
+For a simple demonstration without Google Batch (both orchestration and execution running in the same Workbench app):
+
+Create a new Workbench workspace and add this git repository in the **Apps** tab.
+
+Create a JupyterLab app instance, launch it, and open the terminal:
+
+```bash
+# Initialize conda
+conda init
+source ~/.bashrc
+
+# Navigate to the repository
+cd repos/FloRes
+
+# Create and activate the conda environment
+conda env create -f envs/AMR++_env.yaml
+conda activate AMR++_env
+
+# Verify Nextflow version 24 is installed
+nextflow -v
+
+# Run the test pipeline (takes ~5 minutes)
+nextflow run main_AMR++.nf
+```
+
+Expected output: results in `~/repos/FloRes/test_results`
+
+---
+
+## Configuration
+
+### Resource Scaling on Google Batch
+
+Google Batch does NOT automatically scale machine types based on CPU/memory requests. Resource scaling is configured in `config/google_batch.config`.
+
+Each process that needs more than default resources must explicitly specify a matching `machineType`. Current resource allocations:
+
+| Process | CPUs | Memory | Machine Type |
+|---------|------|--------|-------------|
+| Default | 4 | 16 GB | n2-standard-4 |
+| runqc | 16 | 64 GB | n2-standard-16 |
+| bowtie2_align | 32 | 128 GB | n2-standard-32 |
+| bowtie2_rm_contaminant_fq | 32 | 256 GB | n2-highmem-32 |
+| bwa_align | 16 | 128 GB | n2-highmem-32 |
+| runkraken | 16 | 256 GB | n2-highmem-32 |
+| runkrakenInterleaved | 16 | 256 GB | n2-highmem-32 |
+
+### Supporting Environments
+
+**Local** (testing): `./wb/run.sh --env local`
+- Requires Docker and Conda
+
+**GCP** (debugging): `./wb/run.sh --env gcp`
+- For debugging Google Batch jobs with visible logs
+- Requires `gcloud` CLI and Docker
diff --git a/envs/containers/Dockerfile b/envs/containers/Dockerfile
@@ -20,6 +20,7 @@ RUN apt-get update -q && \
         subversion \
         wget \
         g++ \
+        make \
         libarchive13 \
     && apt-get clean \
     && rm -rf /var/lib/apt/lists/*
@@ -59,5 +60,19 @@ RUN set -x && \
     find /opt/conda/ -follow -type f -name '*.js.map' -delete && \
     /opt/conda/bin/conda clean -afy && \
     /opt/conda/bin/conda install -c conda-forge mamba && \
-    /opt/conda/bin/mamba install -c conda-forge -c bioconda git python=3.9 trimmomatic multiqc bwa samtools bedtools kraken2 multiqc fastqc krona bracken numpy pysam pandas biopython matplotlib nextflow && \
+    /opt/conda/bin/mamba install -c conda-forge -c bioconda git python=3.9 trimmomatic multiqc bwa samtools bedtools kraken2 multiqc fastqc krona bracken numpy pysam pandas biopython matplotlib nextflow=24 fastp bowtie2 && \
     conda clean --all
+
+# Copy AMR++ bin scripts into the container
+COPY bin /opt/amrplusplus/bin
+RUN chmod +x /opt/amrplusplus/bin/*.py && \
+    chmod +x /opt/amrplusplus/bin/rarefaction && \
+    chmod +x /opt/amrplusplus/bin/resistome
+
+# Clone AmrPlusPlus_SNP for SNP verification (replaces empty dir copied from bin/)
+RUN rm -rf /opt/amrplusplus/bin/AmrPlusPlus_SNP && \
+    git clone https://github.com/Isabella136/AmrPlusPlus_SNP.git /opt/amrplusplus/bin/AmrPlusPlus_SNP && \
+    chmod -R 755 /opt/amrplusplus/bin/AmrPlusPlus_SNP
+
+# Add bin directory to PATH
+ENV PATH="/opt/amrplusplus/bin:${PATH}"
diff --git a/modules/Alignment/bowtie2-for_AMRplusplus.nf b/modules/Alignment/bowtie2-for_AMRplusplus.nf
@@ -145,6 +145,6 @@ process HostRemovalStats {
         path("host.removal.stats"), emit: combo_host_rm_stats
 
     """
-    ${PYTHON3} $baseDir/bin/samtools_idxstats.py -i ${host_rm_stats} -o host.removal.stats
+    ${PYTHON3} /opt/amrplusplus/bin/samtools_idxstats.py -i ${host_rm_stats} -o host.removal.stats
     """
 }
diff --git a/modules/Alignment/bwa.nf b/modules/Alignment/bwa.nf
@@ -63,22 +63,22 @@ process bwa_align {
     script:
     if( deduped == "N")
         """
-        ${BWA} mem ${indexfiles[0]} ${reads} -t ${threads} -R '@RG\\tID:${pair_id}\\tSM:${pair_id}' | \
-        ${SAMTOOLS} sort -@ ${threads} -m 4G -o ${pair_id}_alignment_sorted.bam
+        ${BWA} mem ${indexfiles[0]} ${reads} -t ${task.cpus} -R '@RG\\tID:${pair_id}\\tSM:${pair_id}' | \
+        ${SAMTOOLS} sort -@ ${task.cpus} -m 4G -o ${pair_id}_alignment_sorted.bam
         """
     else if( deduped == "Y")
         """
-        ${BWA} mem ${indexfiles[0]} ${reads} -t ${threads} -R '@RG\\tID:${pair_id}\\tSM:${pair_id}' > ${pair_id}_alignment.sam
-        ${SAMTOOLS} view -@ ${threads} -S -b ${pair_id}_alignment.sam > ${pair_id}_alignment.bam
+        ${BWA} mem ${indexfiles[0]} ${reads} -t ${task.cpus} -R '@RG\\tID:${pair_id}\\tSM:${pair_id}' > ${pair_id}_alignment.sam
+        ${SAMTOOLS} view -@ ${task.cpus} -S -b ${pair_id}_alignment.sam > ${pair_id}_alignment.bam
         rm ${pair_id}_alignment.sam
-        ${SAMTOOLS} sort -@ ${threads} -m 3G -n ${pair_id}_alignment.bam -o ${pair_id}_alignment_sorted.bam
+        ${SAMTOOLS} sort -@ ${task.cpus} -m 3G -n ${pair_id}_alignment.bam -o ${pair_id}_alignment_sorted.bam
         rm ${pair_id}_alignment.bam
-        ${SAMTOOLS} fixmate -@ ${threads} ${pair_id}_alignment_sorted.bam ${pair_id}_alignment_sorted_fix.bam
-        ${SAMTOOLS} sort -@ ${threads} -m 3G ${pair_id}_alignment_sorted_fix.bam -o ${pair_id}_alignment_sorted_fix.sorted.bam
+        ${SAMTOOLS} fixmate -@ ${task.cpus} ${pair_id}_alignment_sorted.bam ${pair_id}_alignment_sorted_fix.bam
+        ${SAMTOOLS} sort -@ ${task.cpus} -m 3G ${pair_id}_alignment_sorted_fix.bam -o ${pair_id}_alignment_sorted_fix.sorted.bam
         rm ${pair_id}_alignment_sorted_fix.bam
         ${SAMTOOLS} rmdup -S ${pair_id}_alignment_sorted_fix.sorted.bam ${pair_id}_alignment_dedup.bam
         rm ${pair_id}_alignment_sorted_fix.sorted.bam
-        ${SAMTOOLS} view -@ ${threads} -h -o ${pair_id}_alignment_dedup.sam ${pair_id}_alignment_dedup.bam
+        ${SAMTOOLS} view -@ ${task.cpus} -h -o ${pair_id}_alignment_dedup.sam ${pair_id}_alignment_dedup.bam
         rm ${pair_id}_alignment_dedup.sam
         """
     else
@@ -107,13 +107,13 @@ process bwa_rm_contaminant_fq {
     path("${pair_id}.samtools.idxstats"), emit: host_rm_stats
 
     """
-    ${BWA} mem ${indexfiles[0]} ${reads[0]} ${reads[1]} -t ${threads} | \
-    ${SAMTOOLS} sort -@ ${threads} -m 4G -o ${pair_id}.host.sorted.bam
+    ${BWA} mem ${indexfiles[0]} ${reads[0]} ${reads[1]} -t ${task.cpus} | \
+    ${SAMTOOLS} sort -@ ${task.cpus} -m 4G -o ${pair_id}.host.sorted.bam
     ${SAMTOOLS} index ${pair_id}.host.sorted.bam && ${SAMTOOLS} idxstats ${pair_id}.host.sorted.bam > ${pair_id}.samtools.idxstats
     ${SAMTOOLS} view -h -f 12 -b ${pair_id}.host.sorted.bam -o ${pair_id}.host.sorted.removed.bam
-    ${SAMTOOLS} sort -n -@ ${threads} -m 3G ${pair_id}.host.sorted.removed.bam -o ${pair_id}.host.resorted.removed.bam
+    ${SAMTOOLS} sort -n -@ ${task.cpus} -m 3G ${pair_id}.host.sorted.removed.bam -o ${pair_id}.host.resorted.removed.bam
     ${SAMTOOLS}  \
-       fastq -@ ${threads} -c 6  \
+       fastq -@ ${task.cpus} -c 6  \
       ${pair_id}.host.resorted.removed.bam \
       -1 ${pair_id}.non.host.R1.fastq.gz \
       -2 ${pair_id}.non.host.R2.fastq.gz \
@@ -143,6 +143,6 @@ process HostRemovalStats {
         path("host.removal.stats"), emit: combo_host_rm_stats
 
     """
-    ${PYTHON3} $baseDir/bin/samtools_idxstats.py -i ${host_rm_stats} -o host.removal.stats
+    ${PYTHON3} /opt/amrplusplus/bin/samtools_idxstats.py -i ${host_rm_stats} -o host.removal.stats
     """
 }
diff --git a/modules/Fastqc/fastqc.nf b/modules/Fastqc/fastqc.nf
@@ -51,7 +51,8 @@ process multiqc {
     script:
     """
     cp $config/* .
-    multiqc -v data* --interactive -f --cl-config "max_table_rows: 3000"
-    mv multiqc_data/multiqc_general_stats.txt .
+    multiqc -v data* --interactive -f --cl-config "max_table_rows: 3000" --outdir multiqc_data --filename multiqc_report.html
+    mv multiqc_data/multiqc_report_data/multiqc_general_stats.txt .
+    mv multiqc_data/multiqc_report.html .
     """
 }
diff --git a/modules/Microbiome/kraken2.nf b/modules/Microbiome/kraken2.nf
@@ -78,7 +78,7 @@ process krakenresults {
         path("kraken_analytic_matrix.csv")
 
     """
-    ${PYTHON3} $baseDir/bin/kraken2_long_to_wide.py -i ${kraken_reports} -o kraken_analytic_matrix.csv
+    ${PYTHON3} /opt/amrplusplus/bin/kraken2_long_to_wide.py -i ${kraken_reports} -o kraken_analytic_matrix.csv
     """
 }
 

diff --git a/modules/Microbiome/kraken_and_bracken.nf b/modules/Microbiome/kraken_and_bracken.nf
@@ -104,13 +104,14 @@ process krakenresults {
 
 
     """
-    ${PYTHON3} $baseDir/bin/kraken2_long_to_wide_update.py -i ${kraken_reports} -o kraken_analytic_matrix.csv
+    ${PYTHON3} /opt/amrplusplus/bin/kraken2_long_to_wide_update.py -i ${kraken_reports} -o kraken_analytic_matrix.csv
     """
 }
 
 process runbracken {
     label "microbiome"
-
+    errorStrategy { task.exitStatus == 1 ? 'ignore' : 'terminate' }
+
     input:
        tuple val(sample_id), path(kraken_report), val(level)
        path(krakendb)
-Original file line number
+Diff line change
@@ Expand Up / @@ -78,7 +78,7 @@ process krakenresults { @@
             path("kraken_analytic_matrix.csv")
         """
-        ${PYTHON3} $baseDir/bin/kraken2_long_to_wide.py -i ${kraken_reports} -o kraken_analytic_matrix.csv
+        ${PYTHON3} /opt/amrplusplus/bin/kraken2_long_to_wide.py -i ${kraken_reports} -o kraken_analytic_matrix.csv
         """
     }
@@ Expand Down @@