From 444a508f818a0a8aa9ceb443aadb667a58b1dbfc Mon Sep 17 00:00:00 2001
From: Samuel Hornstein <samh@verily.com>
Date: Wed, 13 May 2026 13:00:31 -0700
Subject: [PATCH 1/7] fix: enable workbench execution with correct profile and
 cloud params

Fixes several issues preventing FloRes from running on Verily Workbench:
- wb/run.sh now passes -profile and -c flags to nextflow
- Adds params_google_batch.config with all gs:// paths for cloud execution
- Restores params.config to local-only defaults (no hardcoded google-batch executor)
- Moves process resource declarations into config/google_batch.config
- Parameterizes hardcoded bucket names with GCS_REF_BUCKET env var
---
 config/google_batch.config | 34 ++++++++++----
 nextflow.config            |  4 +-
 params.config              | 63 +++-----------------------
 params_google_batch.config | 92 ++++++++++++++++++++++++++++++++++++++
 wb/config/wb.env           |  2 +-
 wb/config/wb.env.template  |  7 ++-
 wb/run.sh                  |  3 +-
 7 files changed, 134 insertions(+), 71 deletions(-)
 create mode 100644 params_google_batch.config

diff --git a/config/google_batch.config b/config/google_batch.config
index e51154e..0457262 100644
--- a/config/google_batch.config
+++ b/config/google_batch.config
@@ -10,30 +10,46 @@
  */
 
 process {
-    // Default for all processes
     cpus = 4
     memory = '16 GB'
     machineType = 'n2-standard-4'
 
     cache = 'lenient'
-    executor = 'google-batch'
 
     withName: 'runqc' {
         cpus = 16
-        memory = "64.GB"
-        machineType = "n2-standard-16"
+        memory = '64 GB'
+        machineType = 'n2-standard-16'
         containerOptions = '--env _JAVA_OPTIONS="-Xmx60g"'
     }
 
-    withName: 'bwa_align' {
+    withName: 'bowtie2_align' {
+        cpus = 32
+        memory = '128 GB'
+        machineType = 'n2-standard-32'
+    }
+
+    withName: 'bowtie2_rm_contaminant_fq' {
         cpus = 32
-        memory = "256.GB"
-        machineType = "n2-highmem-32"
+        memory = '256 GB'
+        machineType = 'n2-highmem-32'
+    }
+
+    withName: 'bwa_align' {
+        cpus = 16
+        memory = '128 GB'
+        machineType = 'n2-highmem-32'
     }
 
     withName: 'runkraken' {
         cpus = 16
-        memory = "256.GB"
-        machineType = "n2-highmem-32"
+        memory = '256 GB'
+        machineType = 'n2-highmem-32'
+    }
+
+    withName: 'runkrakenInterleaved' {
+        cpus = 16
+        memory = '256 GB'
+        machineType = 'n2-highmem-32'
     }
 }
diff --git a/nextflow.config b/nextflow.config
index bb06a4d..785fa3d 100755
--- a/nextflow.config
+++ b/nextflow.config
@@ -80,7 +80,7 @@ profiles {
     process.maxRetries = 2
 
     workDir = "gs://${GCS_BUCKET}/scratch"
-    refDir = "gs://referencegenomes-wb-mighty-tangerine-1678"
+    refDir = "gs://${GCS_REF_BUCKET}"
 
     google.region  = "${GCS_BUCKET_LOCATION}"
     google.project = "$GOOGLE_CLOUD_PROJECT"
@@ -104,7 +104,7 @@ profiles {
     process.maxRetries = 5
 
     workDir = "gs://${GCS_BUCKET}/scratch"
-    params.refDir = "gs://referencegenomes-wb-mighty-tangerine-1678"
+    params.refDir = "gs://${GCS_REF_BUCKET}"
 
     google.region  = "${GCS_BUCKET_LOCATION}"
     google.project = "$GOOGLE_CLOUD_PROJECT"
diff --git a/params.config b/params.config
index 9c8aa37..ad563bf 100755
--- a/params.config
+++ b/params.config
@@ -14,10 +14,10 @@ params {
     // -----------------------------------------------------------------
 
     /* Location of forward and reverse read pairs */
-    reads = "gs://{GCS_BUCKET}/rawdata-wb-farms/S358_MiSeq_BHWNTNDRX5/fastq/C1*{1,2}.f*q.gz"
+    reads = "${baseDir}/data/raw/*_R{1,2}.fastq.gz"
 
     /* Output directory */
-    output = "gs://${GCS_BUCKET}/results"
+    output = "test_results"
 
     // -----------------------------------------------------------------
     //  Reference Databases
@@ -28,13 +28,13 @@ params {
     split = ""
 
     /* Location of reference/host genome */
-    host = "gs://${GCS_REF_BUCKET}/grch38_1kgmaj.fa"
+    host = "${baseDir}/data/host/chr21.fasta.gz"
 
     /* Optionally, you can specify the location of the host index files created with bwa with the path and wildcard (*): */
-    host_index = null
-    
-    /* Kraken database location, default is "null" and will download minikraken db */   
-    kraken_db = "gs://${GCS_REF_BUCKET}/minikraken_8GB_20200312"
+    host_index = "${baseDir}/data/host/chr21.fasta.gz*"
+
+    /* Kraken database location, default is "null" and will download minikraken db */
+    kraken_db = null
 
     /* Location of amr index files with wildcard */
     /* If you want the bowtie indexes built, use the bareword "null" */
@@ -105,55 +105,6 @@ params {
     dada2_db = "$baseDir/data/qiime/gg-13-8-99-515-806-nb-classifier.qza"
 }
 
-    // -----------------------------------------------------------------
-    //  default step resource requirements
-    // -----------------------------------------------------------------
-
-
-process {
-    // Stage resource usages
-    // See config for singularity install details
-    cache = 'lenient'
-    executor = 'google-batch'
-
-    withName: 'runqc' {
-        cpus = 16
-        memory = "64.GB"
-        machineType = "n2-standard-16"
-        containerOptions = '--env _JAVA_OPTIONS="-Xmx60g"'
-    }
-
-    withName: 'bowtie2_align' {
-        cpus = 32
-        memory = "128.GB"
-    }
-
-    withName: 'bowtie2_rm_contaminant_fq' {
-        cpus = 32
-        memory = "256.GB"
-        machineType = "n2-highmem-32"
-    }
-
-    withName: 'bwa_align' {
-        cpus = 16
-	    memory = "128.GB"
-        machineType = "n2-highmem-32"
-    }
-    
-    withName: 'runkraken' {
-        cpus = 16
-        memory = "256.GB"
-        machineType = "n2-highmem-32"
-    }
-
-    withName: 'runkrakenInterleaved' {
-        cpus = 16
-        memory = "256.GB"
-        machineType = "n2-highmem-32"
-    }
-}
-
-
 // The location of each dependency binary needs to be specified here.
 // The examples listed below are assuming the tools are already in the $PATH, however,
 // the absolute path to each tool can be entered individually.
diff --git a/params_google_batch.config b/params_google_batch.config
new file mode 100644
index 0000000..ce1989d
--- /dev/null
+++ b/params_google_batch.config
@@ -0,0 +1,92 @@
+/*
+ * Google Batch Parameter Configuration
+ * All file paths use gs:// for cloud execution via Google Batch.
+ * Uses GCS_BUCKET and GCS_REF_BUCKET environment variables.
+ */
+
+def gcs_bucket = System.getenv("GCS_BUCKET") ?: "nf-files"
+def gcs_ref_bucket = System.getenv("GCS_REF_BUCKET") ?: gcs_bucket
+
+params {
+    help = false
+
+    // -----------------------------------------------------------------
+    //  Input Data
+    // -----------------------------------------------------------------
+    reads = "gs://${gcs_bucket}/rawdata-wb-farms/S358_MiSeq_BHWNTNDRX5/fastq/C1*{1,2}.f*q.gz"
+    bam_files = null
+    split = ""
+
+    // -----------------------------------------------------------------
+    //  Reference Databases
+    // -----------------------------------------------------------------
+    host = "gs://${gcs_ref_bucket}/grch38_1kgmaj.fa"
+    host_index = null
+
+    kraken_db = "gs://${gcs_ref_bucket}/minikraken_8GB_20200312"
+
+    amr_index = null
+    amr = "gs://${gcs_bucket}/data/amr/megares_database_v3.fasta"
+    annotation = "gs://${gcs_bucket}/data/amr/megares_annotations_v3.00.csv"
+
+    // -----------------------------------------------------------------
+    //  Output
+    // -----------------------------------------------------------------
+    output = "gs://${gcs_bucket}/results"
+
+    // -----------------------------------------------------------------
+    //  Pipeline Logic & Analysis Toggles
+    // -----------------------------------------------------------------
+    snp = "Y"
+    deduped = "N"
+    prefix = "AMR"
+    threads = 8
+
+    // -----------------------------------------------------------------
+    //  Trimming Parameters
+    // -----------------------------------------------------------------
+    adapters = "gs://${gcs_bucket}/data/adapters/nextera.fa"
+    leading = 3
+    trailing = 3
+    slidingwindow = "4:15"
+    minlen = 36
+
+    // -----------------------------------------------------------------
+    //  Resistome Analysis Parameters
+    // -----------------------------------------------------------------
+    threshold = 80
+    min = 5
+    max = 100
+    skip = 5
+    samples = 1
+
+    // -----------------------------------------------------------------
+    //  Other Tools
+    // -----------------------------------------------------------------
+    multiqc = "gs://${gcs_bucket}/data/multiqc"
+
+    p_trim_left_f = 25
+    p_trim_left_r = 26
+    p_trunc_len_f = 225
+    p_trunc_len_r = 220
+
+    taxlevel = "R1,R2,R3,K,P,C,O,F,G,S"
+
+    dada2_db = "gs://${gcs_bucket}/data/qiime/gg-13-8-99-515-806-nb-classifier.qza"
+}
+
+env {
+    JAVA = "java"
+    TRIMMOMATIC = "trimmomatic"
+    FASTP = "fastp"
+    PYTHON3 = "python3"
+    BWA = "bwa"
+    BOWTIE2 = "bowtie2"
+    SAMTOOLS = "samtools"
+    BEDTOOLS = "bedtools"
+    RESISTOME = "resistome"
+    RAREFACTION = "rarefaction"
+    SNPFINDER = "snpfinder"
+    KRAKEN2 = "kraken2"
+    QIIME = "qiime"
+}
diff --git a/wb/config/wb.env b/wb/config/wb.env
index c4e4ea6..53b69aa 100644
--- a/wb/config/wb.env
+++ b/wb/config/wb.env
@@ -41,4 +41,4 @@ REGISTRY_PATH="us-central1-docker.pkg.dev/${WORKBENCH_GOOGLE_CLOUD_PROJECT}/${GO
 
 # Nextflow profile and config
 NEXTFLOW_PROFILE="workbench"
-NEXTFLOW_CONFIG="params.config"
+NEXTFLOW_CONFIG="params_google_batch.config"
diff --git a/wb/config/wb.env.template b/wb/config/wb.env.template
index 3503d55..4c807a1 100644
--- a/wb/config/wb.env.template
+++ b/wb/config/wb.env.template
@@ -12,6 +12,11 @@
 # Note: Use the resource ID, not the full GCS bucket name
 export GCS_BUCKET=<YOUR_BUCKET_ID>
 
+# GCS reference data bucket
+# Replace <YOUR_REF_BUCKET_ID> with the bucket containing reference genomes
+# Example: "referencegenomes-wb-my-workspace-1234"
+export GCS_REF_BUCKET=<YOUR_REF_BUCKET_ID>
+
 # GCS bucket location/region
 # Common values: us-central1, us-east1, europe-west1
 export GCS_BUCKET_LOCATION=us-central1
@@ -40,4 +45,4 @@ REGISTRY_PATH="us-central1-docker.pkg.dev/${WORKBENCH_GOOGLE_CLOUD_PROJECT}/${GO
 
 # Nextflow profile and config
 NEXTFLOW_PROFILE="workbench"
-NEXTFLOW_CONFIG="params_google-batch.config"
+NEXTFLOW_CONFIG="params_google_batch.config"
diff --git a/wb/run.sh b/wb/run.sh
index 67580fe..27ee8bf 100755
--- a/wb/run.sh
+++ b/wb/run.sh
@@ -132,5 +132,4 @@ fi
 now=$(date +"%Y-%m-%d--%H-%M")
 
 # Run nextflow with Google Batch profile
-#nextflow -c "${NEXTFLOW_CONFIG}" run main_AMR++.nf --pipeline "standard_AMR_wKraken_and_Bracken" -profile "${NEXTFLOW_PROFILE}" -with-trace "trace-${now}.txt"   -resume -bg
-nextflow run main_AMR++.nf --pipeline "standard_AMR_wKraken_and_Bracken" -with-trace "trace-${now}.txt"   
+nextflow run main_AMR++.nf -profile "${NEXTFLOW_PROFILE}" -c "${NEXTFLOW_CONFIG}" --pipeline "standard_AMR_wKraken_and_Bracken" -with-trace "trace-${now}.txt"

From 4abcc98175e86e94b1da34e14b35e5ca31984e9a Mon Sep 17 00:00:00 2001
From: Samuel Hornstein <samh@verily.com>
Date: Wed, 13 May 2026 13:04:51 -0700
Subject: [PATCH 2/7] docs: add Workbench quick start guide

---
 docs/workbench.md | 140 ++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 140 insertions(+)
 create mode 100644 docs/workbench.md

diff --git a/docs/workbench.md b/docs/workbench.md
new file mode 100644
index 0000000..59fe718
--- /dev/null
+++ b/docs/workbench.md
@@ -0,0 +1,140 @@
+# FloRes on Verily Workbench
+
+**Prerequisites**:
+- You must create a Workbench workspace where you have **ADMIN** permissions
+- All setup and execution must be done within this workspace
+
+## Dependencies
+
+- **Verily Workbench CLI** (`wb`) - Workbench command-line tool
+- **Google Cloud SDK** (`gcloud`) - GCP command-line tool
+- **Docker** - For building and pushing container images (must be running)
+- **Nextflow v24** - Workflow orchestration (installed in Workbench app)
+  - **Note**: v25 has breaking changes and is not compatible with this pipeline
+
+## Quick Start: Workbench Orchestration with Google Batch
+
+This guide walks through setting up and running FloRes with Workbench orchestration and Google Batch compute. The setup is split between local commands (for infrastructure) and Workbench app commands (for execution).
+
+### Step 1: Create Workspace and App
+
+Create a new workspace and app in the Workbench UI (or use the CLI if preferred).
+
+### Step 2: Local Setup
+
+Run these commands on your **local machine**:
+
+```bash
+# Set your active workspace (replace with your workspace ID)
+wb workspace set --id=your-workspace-id
+
+# Copy the Workbench environment template
+cp wb/config/wb.env.template wb/config/wb.env
+```
+
+Edit `wb/config/wb.env` and set the user-defined variables:
+- `GCS_BUCKET`: Your Workbench GCS bucket resource ID (e.g., `nf-output`)
+- `GCS_REF_BUCKET`: Bucket containing reference genomes (e.g., `referencegenomes-wb-my-workspace-1234`)
+- `GCS_BUCKET_LOCATION`: Region (default: `us-central1`)
+- `GOOGLE_ARTIFACT_REPO`: Your artifact registry repo (e.g., `nextflow-containers`)
+
+**Note**: Project IDs, service accounts, and registry paths are automatically determined from your `gcloud` and `wb` CLI configurations.
+
+Then run:
+
+```bash
+# Set up infrastructure (creates buckets, service accounts, etc.)
+./wb/setup_infra.sh wb
+
+# Upload input data and reference databases to GCS
+./wb/upload_data.sh wb
+
+# Build Docker image and push to Artifact Registry
+# NOTE: Docker must be running before executing this command
+./wb/build.sh --env wb --push
+```
+
+### Step 3: Workbench App Setup
+
+Open your Workbench app, launch the Terminal, and run:
+
+```bash
+# Clone the repository
+cd repos/ && git clone https://github.com/passdan/FloRes.git && cd FloRes/
+
+# Copy the environment template
+cp wb/config/wb.env.template wb/config/wb.env
+```
+
+Now copy your local `wb/config/wb.env` configuration into the Workbench app.
+
+### Step 4: Run the Pipeline
+
+```bash
+./wb/run.sh --env wb
+```
+
+Results will be stored in your configured GCS bucket.
+
+**Known Issues**:
+- The `gcloud storage cp` command may not correctly resolve Workbench resource names to full `gs://` paths when running `upload_data.sh` or `run.sh`. If you encounter path resolution issues, manually specify the full GCS bucket path in your `wb.env` configuration.
+
+---
+
+## Alternative: Quick Demo in Workbench JupyterLab
+
+For a simple demonstration without Google Batch (both orchestration and execution running in the same Workbench app):
+
+Create a new Workbench workspace and add this git repository in the **Apps** tab.
+
+Create a JupyterLab app instance, launch it, and open the terminal:
+
+```bash
+# Initialize conda
+conda init
+source ~/.bashrc
+
+# Navigate to the repository
+cd repos/FloRes
+
+# Create and activate the conda environment
+conda env create -f envs/AMR++_env.yaml
+conda activate AMR++_env
+
+# Verify Nextflow version 24 is installed
+nextflow -v
+
+# Run the test pipeline (takes ~5 minutes)
+nextflow run main_AMR++.nf
+```
+
+Expected output: results in `~/repos/FloRes/test_results`
+
+---
+
+## Configuration
+
+### Resource Scaling on Google Batch
+
+Google Batch does NOT automatically scale machine types based on CPU/memory requests. Resource scaling is configured in `config/google_batch.config`.
+
+Each process that needs more than default resources must explicitly specify a matching `machineType`. Current resource allocations:
+
+| Process | CPUs | Memory | Machine Type |
+|---------|------|--------|-------------|
+| Default | 4 | 16 GB | n2-standard-4 |
+| runqc | 16 | 64 GB | n2-standard-16 |
+| bowtie2_align | 32 | 128 GB | n2-standard-32 |
+| bowtie2_rm_contaminant_fq | 32 | 256 GB | n2-highmem-32 |
+| bwa_align | 16 | 128 GB | n2-highmem-32 |
+| runkraken | 16 | 256 GB | n2-highmem-32 |
+| runkrakenInterleaved | 16 | 256 GB | n2-highmem-32 |
+
+### Supporting Environments
+
+**Local** (testing): `./wb/run.sh --env local`
+- Requires Docker and Conda
+
+**GCP** (debugging): `./wb/run.sh --env gcp`
+- For debugging Google Batch jobs with visible logs
+- Requires `gcloud` CLI and Docker

From 797f0747f53e35297606e69f2778a228686a9167 Mon Sep 17 00:00:00 2001
From: Samuel Hornstein <samh@verily.com>
Date: Wed, 13 May 2026 14:36:45 -0700
Subject: [PATCH 3/7] chore: update wb.env to current workspace buckets

---
 wb/config/wb.env | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/wb/config/wb.env b/wb/config/wb.env
index 53b69aa..69c153b 100644
--- a/wb/config/wb.env
+++ b/wb/config/wb.env
@@ -10,12 +10,12 @@
 # Replace <YOUR_BUCKET_ID> with your Workbench GCS bucket resource ID
 # Example: "nf-output" or "my-pipeline-data"
 # Note: Use the resource ID, not the full GCS bucket name
-export GCS_BUCKET=wb-mighty-tangerine-1678
-export GCS_REF_BUCKET=referencegenomes-wb-mighty-tangerine-1678
+export GCS_BUCKET=nf-output-wb-cagey-coconut-8353
+export GCS_REF_BUCKET=reference-genomes-wb-cagey-coconut-8353
 
 # GCS bucket location/region
 # Common values: us-central1, us-east1, europe-west1
-export GCS_BUCKET_LOCATION=europe-west2
+export GCS_BUCKET_LOCATION=us-central1
 
 # Google Artifact Registry repository name
 # Replace <YOUR_ARTIFACT_REPO> with your artifact registry repository name

From e995d10083e0f05c1290d6bd9970aa50f48466a7 Mon Sep 17 00:00:00 2001
From: Samuel Hornstein <samh@verily.com>
Date: Thu, 14 May 2026 20:57:26 -0700
Subject: [PATCH 4/7] fix: bundle bin/ into Docker and use container paths for
 workbench execution

Apply learnings from AMR workbench conversion: add fastp/bowtie2/make to
container, pin nextflow=24, COPY bin/ to /opt/amrplusplus/bin, replace all
$baseDir/bin/ refs with container paths, and use ${task.cpus} instead of
${threads} in bwa/trimmomatic modules.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
---
 envs/containers/Dockerfile                   | 17 ++++++++++++-
 modules/Alignment/bowtie2-for_AMRplusplus.nf |  2 +-
 modules/Alignment/bwa.nf                     | 26 ++++++++++----------
 modules/Microbiome/kraken2.nf                |  2 +-
 modules/Microbiome/kraken_and_bracken.nf     |  2 +-
 modules/Resistome/resistome.nf               | 12 ++++-----
 modules/Trimming/trimmomatic.nf              |  4 +--
 7 files changed, 40 insertions(+), 25 deletions(-)

diff --git a/envs/containers/Dockerfile b/envs/containers/Dockerfile
index 12b2a90..ba8d795 100755
--- a/envs/containers/Dockerfile
+++ b/envs/containers/Dockerfile
@@ -20,6 +20,7 @@ RUN apt-get update -q && \
         subversion \
         wget \
         g++ \
+        make \
         libarchive13 \
     && apt-get clean \
     && rm -rf /var/lib/apt/lists/*
@@ -59,5 +60,19 @@ RUN set -x && \
     find /opt/conda/ -follow -type f -name '*.js.map' -delete && \
     /opt/conda/bin/conda clean -afy && \
     /opt/conda/bin/conda install -c conda-forge mamba && \
-    /opt/conda/bin/mamba install -c conda-forge -c bioconda git python=3.9 trimmomatic multiqc bwa samtools bedtools kraken2 multiqc fastqc krona bracken numpy pysam pandas biopython matplotlib nextflow && \
+    /opt/conda/bin/mamba install -c conda-forge -c bioconda git python=3.9 trimmomatic multiqc bwa samtools bedtools kraken2 multiqc fastqc krona bracken numpy pysam pandas biopython matplotlib nextflow=24 fastp bowtie2 && \
     conda clean --all
+
+# Copy AMR++ bin scripts into the container
+COPY bin /opt/amrplusplus/bin
+RUN chmod +x /opt/amrplusplus/bin/*.py && \
+    chmod +x /opt/amrplusplus/bin/rarefaction && \
+    chmod +x /opt/amrplusplus/bin/resistome
+
+# Clone AmrPlusPlus_SNP for SNP verification (replaces empty dir copied from bin/)
+RUN rm -rf /opt/amrplusplus/bin/AmrPlusPlus_SNP && \
+    git clone https://github.com/Isabella136/AmrPlusPlus_SNP.git /opt/amrplusplus/bin/AmrPlusPlus_SNP && \
+    chmod -R 755 /opt/amrplusplus/bin/AmrPlusPlus_SNP
+
+# Add bin directory to PATH
+ENV PATH="/opt/amrplusplus/bin:${PATH}"
diff --git a/modules/Alignment/bowtie2-for_AMRplusplus.nf b/modules/Alignment/bowtie2-for_AMRplusplus.nf
index f7b8dfc..f026f9a 100755
--- a/modules/Alignment/bowtie2-for_AMRplusplus.nf
+++ b/modules/Alignment/bowtie2-for_AMRplusplus.nf
@@ -145,6 +145,6 @@ process HostRemovalStats {
         path("host.removal.stats"), emit: combo_host_rm_stats
 
     """
-    ${PYTHON3} $baseDir/bin/samtools_idxstats.py -i ${host_rm_stats} -o host.removal.stats
+    ${PYTHON3} /opt/amrplusplus/bin/samtools_idxstats.py -i ${host_rm_stats} -o host.removal.stats
     """
 }
diff --git a/modules/Alignment/bwa.nf b/modules/Alignment/bwa.nf
index 1617558..cf0a59f 100755
--- a/modules/Alignment/bwa.nf
+++ b/modules/Alignment/bwa.nf
@@ -63,22 +63,22 @@ process bwa_align {
     script:
     if( deduped == "N")
         """
-        ${BWA} mem ${indexfiles[0]} ${reads} -t ${threads} -R '@RG\\tID:${pair_id}\\tSM:${pair_id}' | \
-        ${SAMTOOLS} sort -@ ${threads} -m 4G -o ${pair_id}_alignment_sorted.bam
+        ${BWA} mem ${indexfiles[0]} ${reads} -t ${task.cpus} -R '@RG\\tID:${pair_id}\\tSM:${pair_id}' | \
+        ${SAMTOOLS} sort -@ ${task.cpus} -m 4G -o ${pair_id}_alignment_sorted.bam
         """
     else if( deduped == "Y")
         """
-        ${BWA} mem ${indexfiles[0]} ${reads} -t ${threads} -R '@RG\\tID:${pair_id}\\tSM:${pair_id}' > ${pair_id}_alignment.sam
-        ${SAMTOOLS} view -@ ${threads} -S -b ${pair_id}_alignment.sam > ${pair_id}_alignment.bam
+        ${BWA} mem ${indexfiles[0]} ${reads} -t ${task.cpus} -R '@RG\\tID:${pair_id}\\tSM:${pair_id}' > ${pair_id}_alignment.sam
+        ${SAMTOOLS} view -@ ${task.cpus} -S -b ${pair_id}_alignment.sam > ${pair_id}_alignment.bam
         rm ${pair_id}_alignment.sam
-        ${SAMTOOLS} sort -@ ${threads} -m 3G -n ${pair_id}_alignment.bam -o ${pair_id}_alignment_sorted.bam
+        ${SAMTOOLS} sort -@ ${task.cpus} -m 3G -n ${pair_id}_alignment.bam -o ${pair_id}_alignment_sorted.bam
         rm ${pair_id}_alignment.bam
-        ${SAMTOOLS} fixmate -@ ${threads} ${pair_id}_alignment_sorted.bam ${pair_id}_alignment_sorted_fix.bam
-        ${SAMTOOLS} sort -@ ${threads} -m 3G ${pair_id}_alignment_sorted_fix.bam -o ${pair_id}_alignment_sorted_fix.sorted.bam
+        ${SAMTOOLS} fixmate -@ ${task.cpus} ${pair_id}_alignment_sorted.bam ${pair_id}_alignment_sorted_fix.bam
+        ${SAMTOOLS} sort -@ ${task.cpus} -m 3G ${pair_id}_alignment_sorted_fix.bam -o ${pair_id}_alignment_sorted_fix.sorted.bam
         rm ${pair_id}_alignment_sorted_fix.bam
         ${SAMTOOLS} rmdup -S ${pair_id}_alignment_sorted_fix.sorted.bam ${pair_id}_alignment_dedup.bam
         rm ${pair_id}_alignment_sorted_fix.sorted.bam
-        ${SAMTOOLS} view -@ ${threads} -h -o ${pair_id}_alignment_dedup.sam ${pair_id}_alignment_dedup.bam
+        ${SAMTOOLS} view -@ ${task.cpus} -h -o ${pair_id}_alignment_dedup.sam ${pair_id}_alignment_dedup.bam
         rm ${pair_id}_alignment_dedup.sam
         """
     else
@@ -107,13 +107,13 @@ process bwa_rm_contaminant_fq {
     path("${pair_id}.samtools.idxstats"), emit: host_rm_stats
     
     """
-    ${BWA} mem ${indexfiles[0]} ${reads[0]} ${reads[1]} -t ${threads} | \
-    ${SAMTOOLS} sort -@ ${threads} -m 4G -o ${pair_id}.host.sorted.bam
+    ${BWA} mem ${indexfiles[0]} ${reads[0]} ${reads[1]} -t ${task.cpus} | \
+    ${SAMTOOLS} sort -@ ${task.cpus} -m 4G -o ${pair_id}.host.sorted.bam
     ${SAMTOOLS} index ${pair_id}.host.sorted.bam && ${SAMTOOLS} idxstats ${pair_id}.host.sorted.bam > ${pair_id}.samtools.idxstats
     ${SAMTOOLS} view -h -f 12 -b ${pair_id}.host.sorted.bam -o ${pair_id}.host.sorted.removed.bam
-    ${SAMTOOLS} sort -n -@ ${threads} -m 3G ${pair_id}.host.sorted.removed.bam -o ${pair_id}.host.resorted.removed.bam
+    ${SAMTOOLS} sort -n -@ ${task.cpus} -m 3G ${pair_id}.host.sorted.removed.bam -o ${pair_id}.host.resorted.removed.bam
     ${SAMTOOLS}  \
-       fastq -@ ${threads} -c 6  \
+       fastq -@ ${task.cpus} -c 6  \
       ${pair_id}.host.resorted.removed.bam \
       -1 ${pair_id}.non.host.R1.fastq.gz \
       -2 ${pair_id}.non.host.R2.fastq.gz \
@@ -143,6 +143,6 @@ process HostRemovalStats {
         path("host.removal.stats"), emit: combo_host_rm_stats
 
     """
-    ${PYTHON3} $baseDir/bin/samtools_idxstats.py -i ${host_rm_stats} -o host.removal.stats
+    ${PYTHON3} /opt/amrplusplus/bin/samtools_idxstats.py -i ${host_rm_stats} -o host.removal.stats
     """
 }
diff --git a/modules/Microbiome/kraken2.nf b/modules/Microbiome/kraken2.nf
index 9219438..e7c9565 100755
--- a/modules/Microbiome/kraken2.nf
+++ b/modules/Microbiome/kraken2.nf
@@ -78,7 +78,7 @@ process krakenresults {
         path("kraken_analytic_matrix.csv")
 
     """
-    ${PYTHON3} $baseDir/bin/kraken2_long_to_wide.py -i ${kraken_reports} -o kraken_analytic_matrix.csv
+    ${PYTHON3} /opt/amrplusplus/bin/kraken2_long_to_wide.py -i ${kraken_reports} -o kraken_analytic_matrix.csv
     """
 }
 
diff --git a/modules/Microbiome/kraken_and_bracken.nf b/modules/Microbiome/kraken_and_bracken.nf
index f7f60a6..36a8466 100755
--- a/modules/Microbiome/kraken_and_bracken.nf
+++ b/modules/Microbiome/kraken_and_bracken.nf
@@ -104,7 +104,7 @@ process krakenresults {
 
 
     """
-    ${PYTHON3} $baseDir/bin/kraken2_long_to_wide_update.py -i ${kraken_reports} -o kraken_analytic_matrix.csv
+    ${PYTHON3} /opt/amrplusplus/bin/kraken2_long_to_wide_update.py -i ${kraken_reports} -o kraken_analytic_matrix.csv
     """
 }
 
diff --git a/modules/Resistome/resistome.nf b/modules/Resistome/resistome.nf
index 3b987df..420f5ea 100755
--- a/modules/Resistome/resistome.nf
+++ b/modules/Resistome/resistome.nf
@@ -40,7 +40,7 @@ process build_dependencies {
     #mv rarefaction ../
     #cd ../
     #rm -rf rarefactionanalyzer
-    cp $baseDir/bin/rarefaction . 
+    cp /opt/amrplusplus/bin/rarefaction . 
 
 
     #git clone https://github.com/cdeanj/resistomeanalyzer.git
@@ -50,7 +50,7 @@ process build_dependencies {
     #mv resistome ../
     #cd ../
     #rm -rf resistomeanalyzer
-    cp $baseDir/bin/resistome .
+    cp /opt/amrplusplus/bin/resistome .
 
     git clone https://github.com/Isabella136/AmrPlusPlus_SNP.git
     chmod -R 777 AmrPlusPlus_SNP/
@@ -119,7 +119,7 @@ process resistomeresults {
         path("${prefix}_analytic_matrix.csv"), emit: snp_count_matrix, optional: true
 
     """
-    ${PYTHON3} $baseDir/bin/amr_long_to_wide.py -i ${resistomes} -o ${prefix}_analytic_matrix.csv
+    ${PYTHON3} /opt/amrplusplus/bin/amr_long_to_wide.py -i ${resistomes} -o ${prefix}_analytic_matrix.csv
     """
 }
 
@@ -189,7 +189,7 @@ process plotrarefaction {
     """
     mkdir data/
     mv *.tsv data/
-    python $baseDir/bin/rfplot.py --dir ./data --nd --s --sd .
+    python /opt/amrplusplus/bin/rfplot.py --dir ./data --nd --s --sd .
     """
 }
 
@@ -219,7 +219,7 @@ process runsnp {
         path("${sample_id}.${prefix}_SNPs${sample_id}/*")
 
     """
-    cp -r $baseDir/bin/AmrPlusPlus_SNP/* .
+    cp -r /opt/amrplusplus/bin/AmrPlusPlus_SNP/* .
 
     # change name to stay consistent with count matrix name, but only if the names don't match
     if [ "${bam}" != "${sample_id}.bam" ]; then
@@ -257,7 +257,7 @@ process snpresults {
 
     """
 
-    ${PYTHON3} $baseDir/bin/snp_long_to_wide.py -i ${snp_counts} -o SNPconfirmed_${prefix}_analytic_matrix.csv
+    ${PYTHON3} /opt/amrplusplus/bin/snp_long_to_wide.py -i ${snp_counts} -o SNPconfirmed_${prefix}_analytic_matrix.csv
 
     """
 }
diff --git a/modules/Trimming/trimmomatic.nf b/modules/Trimming/trimmomatic.nf
index db61c18..a26869f 100755
--- a/modules/Trimming/trimmomatic.nf
+++ b/modules/Trimming/trimmomatic.nf
@@ -41,7 +41,7 @@ process runqc {
     """
      ${TRIMMOMATIC} \
       PE \
-      -threads ${threads} \
+      -threads ${task.cpus} \
       ${reads[0]} ${reads[1]} ${sample_id}.1P.fastq.gz ${sample_id}.1U.fastq.gz ${sample_id}.2P.fastq.gz ${sample_id}.2U.fastq.gz \
       ILLUMINACLIP:${adapters}:2:30:10:3:TRUE \
       LEADING:${leading} \
@@ -73,6 +73,6 @@ process QCstats {
         path("trimmomatic.stats"), emit: combo_trim_stats
 
     """
-    ${PYTHON3} $baseDir/bin/trimmomatic_stats.py -i ${stats} -o trimmomatic.stats
+    ${PYTHON3} /opt/amrplusplus/bin/trimmomatic_stats.py -i ${stats} -o trimmomatic.stats
     """
 }

From e40d30a2738ff627776bcbc3496f7012d14414eb Mon Sep 17 00:00:00 2001
From: Samuel Hornstein <samh@verily.com>
Date: Fri, 15 May 2026 10:23:44 -0700
Subject: [PATCH 5/7] chore: rename container image from amrplusplus-workbench
 to flores-workbench

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
---
 nextflow.config              | 4 ++--
 wb/config/gcp.env            | 2 +-
 wb/config/gcp.env.template   | 2 +-
 wb/config/local.env          | 4 ++--
 wb/config/local.env.template | 4 ++--
 wb/config/wb.env             | 2 +-
 wb/config/wb.env.template    | 2 +-
 7 files changed, 10 insertions(+), 10 deletions(-)

diff --git a/nextflow.config b/nextflow.config
index 785fa3d..6d66c20 100755
--- a/nextflow.config
+++ b/nextflow.config
@@ -75,7 +75,7 @@ profiles {
     'google-batch' {
     includeConfig "config/google_batch.config"
     process.executor = 'google-batch'
-    process.container = "us-central1-docker.pkg.dev/${GOOGLE_CLOUD_PROJECT}/${GOOGLE_ARTIFACT_REPO}/amrplusplus-workbench:latest"
+    process.container = "us-central1-docker.pkg.dev/${GOOGLE_CLOUD_PROJECT}/${GOOGLE_ARTIFACT_REPO}/flores-workbench:latest"
     process.errorStrategy = { task.exitStatus==50001 ? 'retry' : 'terminate' }
     process.maxRetries = 2
 
@@ -99,7 +99,7 @@ profiles {
   workbench {
     includeConfig "config/google_batch.config"
     process.executor = 'google-batch'
-    process.container = "us-central1-docker.pkg.dev/${GOOGLE_CLOUD_PROJECT}/${GOOGLE_ARTIFACT_REPO}/amrplusplus-workbench:latest"
+    process.container = "us-central1-docker.pkg.dev/${GOOGLE_CLOUD_PROJECT}/${GOOGLE_ARTIFACT_REPO}/flores-workbench:latest"
     process.errorStrategy = { task.exitStatus==50001 ? 'retry' : 'terminate' }
     process.maxRetries = 5
 
diff --git a/wb/config/gcp.env b/wb/config/gcp.env
index 75756fc..ff8dbd4 100644
--- a/wb/config/gcp.env
+++ b/wb/config/gcp.env
@@ -32,7 +32,7 @@ export GOOGLE_SERVICE_ACCOUNT_NAME=nextflow-runner
 export GOOGLE_SERVICE_ACCOUNT_EMAIL="${GOOGLE_SERVICE_ACCOUNT_NAME}@${GOOGLE_CLOUD_PROJECT}.iam.gserviceaccount.com"
 
 # Docker image configuration (auto-generated paths)
-IMAGE_NAME="amrplusplus-workbench"
+IMAGE_NAME="flores-workbench"
 IMAGE_TAG="latest"
 REGISTRY_PATH="us-central1-docker.pkg.dev/${GOOGLE_CLOUD_PROJECT}/${GOOGLE_ARTIFACT_REPO}/${IMAGE_NAME}:${IMAGE_TAG}"
 
diff --git a/wb/config/gcp.env.template b/wb/config/gcp.env.template
index 79032ee..609e78c 100644
--- a/wb/config/gcp.env.template
+++ b/wb/config/gcp.env.template
@@ -32,7 +32,7 @@ export GOOGLE_SERVICE_ACCOUNT_NAME=nextflow-runner
 export GOOGLE_SERVICE_ACCOUNT_EMAIL="${GOOGLE_SERVICE_ACCOUNT_NAME}@${GOOGLE_CLOUD_PROJECT}.iam.gserviceaccount.com"
 
 # Docker image configuration (auto-generated paths)
-IMAGE_NAME="amrplusplus-workbench"
+IMAGE_NAME="flores-workbench"
 IMAGE_TAG="latest"
 REGISTRY_PATH="us-central1-docker.pkg.dev/${GOOGLE_CLOUD_PROJECT}/${GOOGLE_ARTIFACT_REPO}/${IMAGE_NAME}:${IMAGE_TAG}"
 
diff --git a/wb/config/local.env b/wb/config/local.env
index ecd7915..a1df9f3 100644
--- a/wb/config/local.env
+++ b/wb/config/local.env
@@ -7,8 +7,8 @@
 
 # Docker image configuration
 # Replace <YOUR_DOCKERHUB_USERNAME> with your Docker Hub username
-# Example: "johndoe/amrplusplus-workbench"
-IMAGE_NAME="passdan/amrplusplus-workbench"
+# Example: "johndoe/flores-workbench"
+IMAGE_NAME="passdan/flores-workbench"
 
 ###############################################################################
 # AUTOMATIC CONFIGURATION - DO NOT MODIFY
diff --git a/wb/config/local.env.template b/wb/config/local.env.template
index cd78e5b..cba30b5 100644
--- a/wb/config/local.env.template
+++ b/wb/config/local.env.template
@@ -7,8 +7,8 @@
 
 # Docker image configuration
 # Replace <YOUR_DOCKERHUB_USERNAME> with your Docker Hub username
-# Example: "johndoe/amrplusplus-workbench"
-IMAGE_NAME="<YOUR_DOCKERHUB_USERNAME>/amrplusplus-workbench"
+# Example: "johndoe/flores-workbench"
+IMAGE_NAME="<YOUR_DOCKERHUB_USERNAME>/flores-workbench"
 
 ###############################################################################
 # AUTOMATIC CONFIGURATION - DO NOT MODIFY
diff --git a/wb/config/wb.env b/wb/config/wb.env
index 69c153b..ee4fb7b 100644
--- a/wb/config/wb.env
+++ b/wb/config/wb.env
@@ -35,7 +35,7 @@ export GOOGLE_SERVICE_ACCOUNT_EMAIL=$(wb auth status 2>&1 | grep "Service accoun
 export GOOGLE_SERVICE_ACCOUNT_NAME=$(echo "${GOOGLE_SERVICE_ACCOUNT_EMAIL}" | cut -d'@' -f1)
 
 # Docker image configuration (auto-generated paths)
-IMAGE_NAME="amrplusplus-workbench"
+IMAGE_NAME="flores-workbench"
 IMAGE_TAG="latest"
 REGISTRY_PATH="us-central1-docker.pkg.dev/${WORKBENCH_GOOGLE_CLOUD_PROJECT}/${GOOGLE_ARTIFACT_REPO}/${IMAGE_NAME}:${IMAGE_TAG}"
 
diff --git a/wb/config/wb.env.template b/wb/config/wb.env.template
index 4c807a1..bbea4c7 100644
--- a/wb/config/wb.env.template
+++ b/wb/config/wb.env.template
@@ -39,7 +39,7 @@ export GOOGLE_SERVICE_ACCOUNT_EMAIL=$(wb auth status 2>&1 | grep "Service accoun
 export GOOGLE_SERVICE_ACCOUNT_NAME=$(echo "${GOOGLE_SERVICE_ACCOUNT_EMAIL}" | cut -d'@' -f1)
 
 # Docker image configuration (auto-generated paths)
-IMAGE_NAME="amrplusplus-workbench"
+IMAGE_NAME="flores-workbench"
 IMAGE_TAG="latest"
 REGISTRY_PATH="us-central1-docker.pkg.dev/${WORKBENCH_GOOGLE_CLOUD_PROJECT}/${GOOGLE_ARTIFACT_REPO}/${IMAGE_NAME}:${IMAGE_TAG}"
 

From 740030934906c2717ded741876598d77c3392b5e Mon Sep 17 00:00:00 2001
From: Sam Hornstein <samhornstein@gmail.com>
Date: Fri, 15 May 2026 22:18:44 +0000
Subject: [PATCH 6/7] fix: resolve workbench pipeline failures and add
 pre-built host index

- Add resource overrides for bowtie2_index and bwa index processes
  (n2-highmem-8, 64GB) to prevent OOM kills on full genome builds
- Fix multiqc output naming for newer multiqc versions by adding
  --outdir and --filename flags
- Add errorStrategy 'ignore' to runbracken for empty taxonomic levels
- Add Domain ('D') taxonomic level to kraken2_long_to_wide_update.py
- Use Nextflow-uploaded bin/ scripts instead of container-baked paths
  in krakenresults process
- Fix GCS glob handling in host index loading by removing Paths.get()
- Add pre-built GRCh38 bowtie2 host index to skip 50-min build step

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
---
 bin/kraken2_long_to_wide_update.py       | 34 +++++++++++++-----------
 config/google_batch.config               | 12 +++++++++
 modules/Fastqc/fastqc.nf                 |  5 ++--
 modules/Microbiome/kraken_and_bracken.nf |  5 ++--
 params_google_batch.config               |  4 +--
 subworkflows/fastq_host_removal.nf       |  4 +--
 6 files changed, 39 insertions(+), 25 deletions(-)

diff --git a/bin/kraken2_long_to_wide_update.py b/bin/kraken2_long_to_wide_update.py
index 23a15e7..afcee85 100755
--- a/bin/kraken2_long_to_wide_update.py
+++ b/bin/kraken2_long_to_wide_update.py
@@ -13,28 +13,30 @@
     'R1':0,
     'R2':1,
     'R3':2,
-    'K': 3,
-    'P': 4,
-    'C': 5,
-    'O': 6,
-    'F': 7,
-    'G': 8,
-    'S': 9,
-    'U': 10
+    'D': 3,
+    'K': 4,
+    'P': 5,
+    'C': 6,
+    'O': 7,
+    'F': 8,
+    'G': 9,
+    'S': 10,
+    'U': 11
 }
 
 taxa_level_names = {
     0: 'Root1',
     1: 'Root2',
     2: 'Root3',
-    3: 'Kingdom',
-    4: 'Phylum',
-    5: 'Class',
-    6: 'Order',
-    7: 'Family',
-    8: 'Genus',
-    9: 'Species',
-    10: 'Unclassified'
+    3: 'Domain',
+    4: 'Kingdom',
+    5: 'Phylum',
+    6: 'Class',
+    7: 'Order',
+    8: 'Family',
+    9: 'Genus',
+    10: 'Species',
+    11: 'Unclassified'
 }
 
 
diff --git a/config/google_batch.config b/config/google_batch.config
index 0457262..335876b 100644
--- a/config/google_batch.config
+++ b/config/google_batch.config
@@ -23,6 +23,18 @@ process {
         containerOptions = '--env _JAVA_OPTIONS="-Xmx60g"'
     }
 
+    withName: 'bowtie2_index' {
+        cpus = 8
+        memory = '64 GB'
+        machineType = 'n2-highmem-8'
+    }
+
+    withName: 'index' {
+        cpus = 8
+        memory = '64 GB'
+        machineType = 'n2-highmem-8'
+    }
+
     withName: 'bowtie2_align' {
         cpus = 32
         memory = '128 GB'
diff --git a/modules/Fastqc/fastqc.nf b/modules/Fastqc/fastqc.nf
index bf3c8d0..893d592 100755
--- a/modules/Fastqc/fastqc.nf
+++ b/modules/Fastqc/fastqc.nf
@@ -51,7 +51,8 @@ process multiqc {
     script:
     """
     cp $config/* .
-    multiqc -v data* --interactive -f --cl-config "max_table_rows: 3000"
-    mv multiqc_data/multiqc_general_stats.txt .
+    multiqc -v data* --interactive -f --cl-config "max_table_rows: 3000" --outdir multiqc_data --filename multiqc_report.html
+    mv multiqc_data/multiqc_report_data/multiqc_general_stats.txt .
+    mv multiqc_data/multiqc_report.html .
     """
 }
diff --git a/modules/Microbiome/kraken_and_bracken.nf b/modules/Microbiome/kraken_and_bracken.nf
index 36a8466..6c73264 100755
--- a/modules/Microbiome/kraken_and_bracken.nf
+++ b/modules/Microbiome/kraken_and_bracken.nf
@@ -104,13 +104,14 @@ process krakenresults {
 
 
     """
-    ${PYTHON3} /opt/amrplusplus/bin/kraken2_long_to_wide_update.py -i ${kraken_reports} -o kraken_analytic_matrix.csv
+    ${PYTHON3} \$HOME/.nextflow-bin/kraken2_long_to_wide_update.py -i ${kraken_reports} -o kraken_analytic_matrix.csv
     """
 }
 
 process runbracken {
     label "microbiome"
-    
+    errorStrategy 'ignore'
+
     input:
        tuple val(sample_id), path(kraken_report), val(level)
        path(krakendb)
diff --git a/params_google_batch.config b/params_google_batch.config
index ce1989d..da16807 100644
--- a/params_google_batch.config
+++ b/params_google_batch.config
@@ -13,7 +13,7 @@ params {
     // -----------------------------------------------------------------
     //  Input Data
     // -----------------------------------------------------------------
-    reads = "gs://${gcs_bucket}/rawdata-wb-farms/S358_MiSeq_BHWNTNDRX5/fastq/C1*{1,2}.f*q.gz"
+    reads = "gs://${gcs_bucket}/data/raw/*_R{1,2}.fastq.gz"
     bam_files = null
     split = ""
 
@@ -21,7 +21,7 @@ params {
     //  Reference Databases
     // -----------------------------------------------------------------
     host = "gs://${gcs_ref_bucket}/grch38_1kgmaj.fa"
-    host_index = null
+    host_index = "gs://${gcs_ref_bucket}/grch38_1kgmaj{.fa,*.bt2}"
 
     kraken_db = "gs://${gcs_ref_bucket}/minikraken_8GB_20200312"
 
diff --git a/subworkflows/fastq_host_removal.nf b/subworkflows/fastq_host_removal.nf
index c93ecc0..cfaacd6 100755
--- a/subworkflows/fastq_host_removal.nf
+++ b/subworkflows/fastq_host_removal.nf
@@ -19,9 +19,7 @@ workflow FASTQ_RM_HOST_WF {
             reference_index_files = bowtie2_index.out
         } else {
             reference_index_files = Channel
-               .fromPath(Paths.get(params.host_index))
-               .map { file(it.toString()) }
-               .filter { file(it).exists() }
+               .fromPath(params.host_index)
                .toList()
                .map { files ->
                    if (files.size() < 6) {

From ad7d59f6774c6c552cb27ccff5735bba1a18d67e Mon Sep 17 00:00:00 2001
From: Sam Hornstein <samhornstein@gmail.com>
Date: Fri, 15 May 2026 22:38:17 +0000
Subject: [PATCH 7/7] fix: address PR review feedback

- Use consistent container path for krakenresults script
  (/opt/amrplusplus/bin/ instead of $HOME/.nextflow-bin/)
- Gitignore wb.env and gcp.env (workspace-specific config)
- Add setup instructions to wb.env.template
- Add helpful error message when env file is missing
- Make runbracken errorStrategy targeted to exit code 1 only

Note: container image must be rebuilt to include the Domain 'D'
fix in kraken2_long_to_wide_update.py for krakenresults to work.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
---
 .gitignore                               |  3 ++
 modules/Microbiome/kraken_and_bracken.nf |  4 +--
 wb/config/gcp.env                        | 41 ----------------------
 wb/config/wb.env                         | 44 ------------------------
 wb/config/wb.env.template                |  6 ++++
 wb/run.sh                                |  1 +
 6 files changed, 12 insertions(+), 87 deletions(-)
 delete mode 100644 wb/config/gcp.env
 delete mode 100644 wb/config/wb.env

diff --git a/.gitignore b/.gitignore
index 6ca4a88..8551b05 100644
--- a/.gitignore
+++ b/.gitignore
@@ -4,3 +4,6 @@ envs/amrplusplus-update.sif
 logs/
 .nextflow*
 nextflow-24.10.9-dist
+trace-*.txt
+wb/config/wb.env
+wb/config/gcp.env
diff --git a/modules/Microbiome/kraken_and_bracken.nf b/modules/Microbiome/kraken_and_bracken.nf
index 6c73264..bd511e2 100755
--- a/modules/Microbiome/kraken_and_bracken.nf
+++ b/modules/Microbiome/kraken_and_bracken.nf
@@ -104,13 +104,13 @@ process krakenresults {
 
 
     """
-    ${PYTHON3} \$HOME/.nextflow-bin/kraken2_long_to_wide_update.py -i ${kraken_reports} -o kraken_analytic_matrix.csv
+    ${PYTHON3} /opt/amrplusplus/bin/kraken2_long_to_wide_update.py -i ${kraken_reports} -o kraken_analytic_matrix.csv
     """
 }
 
 process runbracken {
     label "microbiome"
-    errorStrategy 'ignore'
+    errorStrategy { task.exitStatus == 1 ? 'ignore' : 'terminate' }
 
     input:
        tuple val(sample_id), path(kraken_report), val(level)
diff --git a/wb/config/gcp.env b/wb/config/gcp.env
deleted file mode 100644
index ff8dbd4..0000000
--- a/wb/config/gcp.env
+++ /dev/null
@@ -1,41 +0,0 @@
-# GCP environment configuration
-# This is used when running Nextflow on Google Batch with local orchestration
-# Nextflow runs on your local machine, jobs execute on Google Batch
-
-###############################################################################
-# USER CONFIGURATION - UPDATE THESE VALUES
-###############################################################################
-
-# GCS bucket for storing pipeline data and results
-# Replace <YOUR_BUCKET_NAME> with your GCS bucket name (without gs:// prefix)
-# Example: "my-nextflow-data"
-export GCS_BUCKET=mod-wb-mighty-tangerine-1678
-
-# GCS bucket location/region
-# Common values: us-central1, us-east1, europe-west1
-export GCS_BUCKET_LOCATION=europe-west2
-
-# Google Artifact Registry repository name
-# Replace <YOUR_ARTIFACT_REPO> with your artifact registry repository name
-# Example: "nextflow-containers"
-export GOOGLE_ARTIFACT_REPO=nextflow-containers
-
-###############################################################################
-# AUTOMATIC CONFIGURATION - DO NOT MODIFY
-###############################################################################
-
-# Google Cloud project (auto-detected from gcloud CLI)
-export GOOGLE_CLOUD_PROJECT=$(gcloud config get project)
-
-# Service account configuration (auto-generated)
-export GOOGLE_SERVICE_ACCOUNT_NAME=nextflow-runner
-export GOOGLE_SERVICE_ACCOUNT_EMAIL="${GOOGLE_SERVICE_ACCOUNT_NAME}@${GOOGLE_CLOUD_PROJECT}.iam.gserviceaccount.com"
-
-# Docker image configuration (auto-generated paths)
-IMAGE_NAME="flores-workbench"
-IMAGE_TAG="latest"
-REGISTRY_PATH="us-central1-docker.pkg.dev/${GOOGLE_CLOUD_PROJECT}/${GOOGLE_ARTIFACT_REPO}/${IMAGE_NAME}:${IMAGE_TAG}"
-
-# Nextflow profile and config
-NEXTFLOW_PROFILE="google_batch"
-NEXTFLOW_CONFIG="params_google-batch.config"
diff --git a/wb/config/wb.env b/wb/config/wb.env
deleted file mode 100644
index ee4fb7b..0000000
--- a/wb/config/wb.env
+++ /dev/null
@@ -1,44 +0,0 @@
-# Workbench environment configuration
-# This is used when running Nextflow on Google Batch with Workbench orchestration
-# Both Nextflow orchestration and job execution happen in Workbench/Google Cloud
-
-###############################################################################
-# USER CONFIGURATION - UPDATE THESE VALUES
-###############################################################################
-
-# GCS bucket resource ID (created via Workbench)
-# Replace <YOUR_BUCKET_ID> with your Workbench GCS bucket resource ID
-# Example: "nf-output" or "my-pipeline-data"
-# Note: Use the resource ID, not the full GCS bucket name
-export GCS_BUCKET=nf-output-wb-cagey-coconut-8353
-export GCS_REF_BUCKET=reference-genomes-wb-cagey-coconut-8353
-
-# GCS bucket location/region
-# Common values: us-central1, us-east1, europe-west1
-export GCS_BUCKET_LOCATION=us-central1
-
-# Google Artifact Registry repository name
-# Replace <YOUR_ARTIFACT_REPO> with your artifact registry repository name
-# Example: "nextflow-containers"
-export GOOGLE_ARTIFACT_REPO=nextflow-containers
-
-###############################################################################
-# AUTOMATIC CONFIGURATION - DO NOT MODIFY
-###############################################################################
-
-# Google Cloud project (auto-detected from Workbench workspace)
-export WORKBENCH_GOOGLE_CLOUD_PROJECT=$(wb status 2>/dev/null | grep "Google project" | awk -F': ' '{print $2}' | xargs)
-export GOOGLE_CLOUD_PROJECT="${WORKBENCH_GOOGLE_CLOUD_PROJECT}"
-
-# Service account configuration (Workbench Pet Service Account - auto-detected)
-export GOOGLE_SERVICE_ACCOUNT_EMAIL=$(wb auth status 2>&1 | grep "Service account email" | awk -F': ' '{print $2}' | xargs)
-export GOOGLE_SERVICE_ACCOUNT_NAME=$(echo "${GOOGLE_SERVICE_ACCOUNT_EMAIL}" | cut -d'@' -f1)
-
-# Docker image configuration (auto-generated paths)
-IMAGE_NAME="flores-workbench"
-IMAGE_TAG="latest"
-REGISTRY_PATH="us-central1-docker.pkg.dev/${WORKBENCH_GOOGLE_CLOUD_PROJECT}/${GOOGLE_ARTIFACT_REPO}/${IMAGE_NAME}:${IMAGE_TAG}"
-
-# Nextflow profile and config
-NEXTFLOW_PROFILE="workbench"
-NEXTFLOW_CONFIG="params_google_batch.config"
diff --git a/wb/config/wb.env.template b/wb/config/wb.env.template
index bbea4c7..ab9011a 100644
--- a/wb/config/wb.env.template
+++ b/wb/config/wb.env.template
@@ -1,6 +1,12 @@
 # Workbench environment configuration
 # This is used when running Nextflow on Google Batch with Workbench orchestration
 # Both Nextflow orchestration and job execution happen in Workbench/Google Cloud
+#
+# Setup:
+#   cp wb/config/wb.env.template wb/config/wb.env
+#   # Edit wb/config/wb.env and fill in the values below
+#
+# Note: wb.env is gitignored — each user/workspace needs its own copy.
 
 ###############################################################################
 # USER CONFIGURATION - UPDATE THESE VALUES
diff --git a/wb/run.sh b/wb/run.sh
index 27ee8bf..fbf3e1e 100755
--- a/wb/run.sh
+++ b/wb/run.sh
@@ -46,6 +46,7 @@ fi
 CONFIG_FILE="${SCRIPT_DIR}/config/${ENV}.env"
 if [[ ! -f "$CONFIG_FILE" ]]; then
     echo "Error: Configuration file not found: $CONFIG_FILE"
+    echo "Create it from the template: cp ${SCRIPT_DIR}/config/${ENV}.env.template ${CONFIG_FILE}"
     exit 1
 fi