From 444a508f818a0a8aa9ceb443aadb667a58b1dbfc Mon Sep 17 00:00:00 2001 From: Samuel Hornstein Date: Wed, 13 May 2026 13:00:31 -0700 Subject: [PATCH 1/7] fix: enable workbench execution with correct profile and cloud params Fixes several issues preventing FloRes from running on Verily Workbench: - wb/run.sh now passes -profile and -c flags to nextflow - Adds params_google_batch.config with all gs:// paths for cloud execution - Restores params.config to local-only defaults (no hardcoded google-batch executor) - Moves process resource declarations into config/google_batch.config - Parameterizes hardcoded bucket names with GCS_REF_BUCKET env var --- config/google_batch.config | 34 ++++++++++---- nextflow.config | 4 +- params.config | 63 +++----------------------- params_google_batch.config | 92 ++++++++++++++++++++++++++++++++++++++ wb/config/wb.env | 2 +- wb/config/wb.env.template | 7 ++- wb/run.sh | 3 +- 7 files changed, 134 insertions(+), 71 deletions(-) create mode 100644 params_google_batch.config diff --git a/config/google_batch.config b/config/google_batch.config index e51154e..0457262 100644 --- a/config/google_batch.config +++ b/config/google_batch.config @@ -10,30 +10,46 @@ */ process { - // Default for all processes cpus = 4 memory = '16 GB' machineType = 'n2-standard-4' cache = 'lenient' - executor = 'google-batch' withName: 'runqc' { cpus = 16 - memory = "64.GB" - machineType = "n2-standard-16" + memory = '64 GB' + machineType = 'n2-standard-16' containerOptions = '--env _JAVA_OPTIONS="-Xmx60g"' } - withName: 'bwa_align' { + withName: 'bowtie2_align' { + cpus = 32 + memory = '128 GB' + machineType = 'n2-standard-32' + } + + withName: 'bowtie2_rm_contaminant_fq' { cpus = 32 - memory = "256.GB" - machineType = "n2-highmem-32" + memory = '256 GB' + machineType = 'n2-highmem-32' + } + + withName: 'bwa_align' { + cpus = 16 + memory = '128 GB' + machineType = 'n2-highmem-32' } withName: 'runkraken' { cpus = 16 - memory = "256.GB" - machineType = "n2-highmem-32" + memory = '256 GB' + machineType = 'n2-highmem-32' + } + + withName: 'runkrakenInterleaved' { + cpus = 16 + memory = '256 GB' + machineType = 'n2-highmem-32' } } diff --git a/nextflow.config b/nextflow.config index bb06a4d..785fa3d 100755 --- a/nextflow.config +++ b/nextflow.config @@ -80,7 +80,7 @@ profiles { process.maxRetries = 2 workDir = "gs://${GCS_BUCKET}/scratch" - refDir = "gs://referencegenomes-wb-mighty-tangerine-1678" + refDir = "gs://${GCS_REF_BUCKET}" google.region = "${GCS_BUCKET_LOCATION}" google.project = "$GOOGLE_CLOUD_PROJECT" @@ -104,7 +104,7 @@ profiles { process.maxRetries = 5 workDir = "gs://${GCS_BUCKET}/scratch" - params.refDir = "gs://referencegenomes-wb-mighty-tangerine-1678" + params.refDir = "gs://${GCS_REF_BUCKET}" google.region = "${GCS_BUCKET_LOCATION}" google.project = "$GOOGLE_CLOUD_PROJECT" diff --git a/params.config b/params.config index 9c8aa37..ad563bf 100755 --- a/params.config +++ b/params.config @@ -14,10 +14,10 @@ params { // ----------------------------------------------------------------- /* Location of forward and reverse read pairs */ - reads = "gs://{GCS_BUCKET}/rawdata-wb-farms/S358_MiSeq_BHWNTNDRX5/fastq/C1*{1,2}.f*q.gz" + reads = "${baseDir}/data/raw/*_R{1,2}.fastq.gz" /* Output directory */ - output = "gs://${GCS_BUCKET}/results" + output = "test_results" // ----------------------------------------------------------------- // Reference Databases @@ -28,13 +28,13 @@ params { split = "" /* Location of reference/host genome */ - host = "gs://${GCS_REF_BUCKET}/grch38_1kgmaj.fa" + host = "${baseDir}/data/host/chr21.fasta.gz" /* Optionally, you can specify the location of the host index files created with bwa with the path and wildcard (*): */ - host_index = null - - /* Kraken database location, default is "null" and will download minikraken db */ - kraken_db = "gs://${GCS_REF_BUCKET}/minikraken_8GB_20200312" + host_index = "${baseDir}/data/host/chr21.fasta.gz*" + + /* Kraken database location, default is "null" and will download minikraken db */ + kraken_db = null /* Location of amr index files with wildcard */ /* If you want the bowtie indexes built, use the bareword "null" */ @@ -105,55 +105,6 @@ params { dada2_db = "$baseDir/data/qiime/gg-13-8-99-515-806-nb-classifier.qza" } - // ----------------------------------------------------------------- - // default step resource requirements - // ----------------------------------------------------------------- - - -process { - // Stage resource usages - // See config for singularity install details - cache = 'lenient' - executor = 'google-batch' - - withName: 'runqc' { - cpus = 16 - memory = "64.GB" - machineType = "n2-standard-16" - containerOptions = '--env _JAVA_OPTIONS="-Xmx60g"' - } - - withName: 'bowtie2_align' { - cpus = 32 - memory = "128.GB" - } - - withName: 'bowtie2_rm_contaminant_fq' { - cpus = 32 - memory = "256.GB" - machineType = "n2-highmem-32" - } - - withName: 'bwa_align' { - cpus = 16 - memory = "128.GB" - machineType = "n2-highmem-32" - } - - withName: 'runkraken' { - cpus = 16 - memory = "256.GB" - machineType = "n2-highmem-32" - } - - withName: 'runkrakenInterleaved' { - cpus = 16 - memory = "256.GB" - machineType = "n2-highmem-32" - } -} - - // The location of each dependency binary needs to be specified here. // The examples listed below are assuming the tools are already in the $PATH, however, // the absolute path to each tool can be entered individually. diff --git a/params_google_batch.config b/params_google_batch.config new file mode 100644 index 0000000..ce1989d --- /dev/null +++ b/params_google_batch.config @@ -0,0 +1,92 @@ +/* + * Google Batch Parameter Configuration + * All file paths use gs:// for cloud execution via Google Batch. + * Uses GCS_BUCKET and GCS_REF_BUCKET environment variables. + */ + +def gcs_bucket = System.getenv("GCS_BUCKET") ?: "nf-files" +def gcs_ref_bucket = System.getenv("GCS_REF_BUCKET") ?: gcs_bucket + +params { + help = false + + // ----------------------------------------------------------------- + // Input Data + // ----------------------------------------------------------------- + reads = "gs://${gcs_bucket}/rawdata-wb-farms/S358_MiSeq_BHWNTNDRX5/fastq/C1*{1,2}.f*q.gz" + bam_files = null + split = "" + + // ----------------------------------------------------------------- + // Reference Databases + // ----------------------------------------------------------------- + host = "gs://${gcs_ref_bucket}/grch38_1kgmaj.fa" + host_index = null + + kraken_db = "gs://${gcs_ref_bucket}/minikraken_8GB_20200312" + + amr_index = null + amr = "gs://${gcs_bucket}/data/amr/megares_database_v3.fasta" + annotation = "gs://${gcs_bucket}/data/amr/megares_annotations_v3.00.csv" + + // ----------------------------------------------------------------- + // Output + // ----------------------------------------------------------------- + output = "gs://${gcs_bucket}/results" + + // ----------------------------------------------------------------- + // Pipeline Logic & Analysis Toggles + // ----------------------------------------------------------------- + snp = "Y" + deduped = "N" + prefix = "AMR" + threads = 8 + + // ----------------------------------------------------------------- + // Trimming Parameters + // ----------------------------------------------------------------- + adapters = "gs://${gcs_bucket}/data/adapters/nextera.fa" + leading = 3 + trailing = 3 + slidingwindow = "4:15" + minlen = 36 + + // ----------------------------------------------------------------- + // Resistome Analysis Parameters + // ----------------------------------------------------------------- + threshold = 80 + min = 5 + max = 100 + skip = 5 + samples = 1 + + // ----------------------------------------------------------------- + // Other Tools + // ----------------------------------------------------------------- + multiqc = "gs://${gcs_bucket}/data/multiqc" + + p_trim_left_f = 25 + p_trim_left_r = 26 + p_trunc_len_f = 225 + p_trunc_len_r = 220 + + taxlevel = "R1,R2,R3,K,P,C,O,F,G,S" + + dada2_db = "gs://${gcs_bucket}/data/qiime/gg-13-8-99-515-806-nb-classifier.qza" +} + +env { + JAVA = "java" + TRIMMOMATIC = "trimmomatic" + FASTP = "fastp" + PYTHON3 = "python3" + BWA = "bwa" + BOWTIE2 = "bowtie2" + SAMTOOLS = "samtools" + BEDTOOLS = "bedtools" + RESISTOME = "resistome" + RAREFACTION = "rarefaction" + SNPFINDER = "snpfinder" + KRAKEN2 = "kraken2" + QIIME = "qiime" +} diff --git a/wb/config/wb.env b/wb/config/wb.env index c4e4ea6..53b69aa 100644 --- a/wb/config/wb.env +++ b/wb/config/wb.env @@ -41,4 +41,4 @@ REGISTRY_PATH="us-central1-docker.pkg.dev/${WORKBENCH_GOOGLE_CLOUD_PROJECT}/${GO # Nextflow profile and config NEXTFLOW_PROFILE="workbench" -NEXTFLOW_CONFIG="params.config" +NEXTFLOW_CONFIG="params_google_batch.config" diff --git a/wb/config/wb.env.template b/wb/config/wb.env.template index 3503d55..4c807a1 100644 --- a/wb/config/wb.env.template +++ b/wb/config/wb.env.template @@ -12,6 +12,11 @@ # Note: Use the resource ID, not the full GCS bucket name export GCS_BUCKET= +# GCS reference data bucket +# Replace with the bucket containing reference genomes +# Example: "referencegenomes-wb-my-workspace-1234" +export GCS_REF_BUCKET= + # GCS bucket location/region # Common values: us-central1, us-east1, europe-west1 export GCS_BUCKET_LOCATION=us-central1 @@ -40,4 +45,4 @@ REGISTRY_PATH="us-central1-docker.pkg.dev/${WORKBENCH_GOOGLE_CLOUD_PROJECT}/${GO # Nextflow profile and config NEXTFLOW_PROFILE="workbench" -NEXTFLOW_CONFIG="params_google-batch.config" +NEXTFLOW_CONFIG="params_google_batch.config" diff --git a/wb/run.sh b/wb/run.sh index 67580fe..27ee8bf 100755 --- a/wb/run.sh +++ b/wb/run.sh @@ -132,5 +132,4 @@ fi now=$(date +"%Y-%m-%d--%H-%M") # Run nextflow with Google Batch profile -#nextflow -c "${NEXTFLOW_CONFIG}" run main_AMR++.nf --pipeline "standard_AMR_wKraken_and_Bracken" -profile "${NEXTFLOW_PROFILE}" -with-trace "trace-${now}.txt" -resume -bg -nextflow run main_AMR++.nf --pipeline "standard_AMR_wKraken_and_Bracken" -with-trace "trace-${now}.txt" +nextflow run main_AMR++.nf -profile "${NEXTFLOW_PROFILE}" -c "${NEXTFLOW_CONFIG}" --pipeline "standard_AMR_wKraken_and_Bracken" -with-trace "trace-${now}.txt" From 4abcc98175e86e94b1da34e14b35e5ca31984e9a Mon Sep 17 00:00:00 2001 From: Samuel Hornstein Date: Wed, 13 May 2026 13:04:51 -0700 Subject: [PATCH 2/7] docs: add Workbench quick start guide --- docs/workbench.md | 140 ++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 140 insertions(+) create mode 100644 docs/workbench.md diff --git a/docs/workbench.md b/docs/workbench.md new file mode 100644 index 0000000..59fe718 --- /dev/null +++ b/docs/workbench.md @@ -0,0 +1,140 @@ +# FloRes on Verily Workbench + +**Prerequisites**: +- You must create a Workbench workspace where you have **ADMIN** permissions +- All setup and execution must be done within this workspace + +## Dependencies + +- **Verily Workbench CLI** (`wb`) - Workbench command-line tool +- **Google Cloud SDK** (`gcloud`) - GCP command-line tool +- **Docker** - For building and pushing container images (must be running) +- **Nextflow v24** - Workflow orchestration (installed in Workbench app) + - **Note**: v25 has breaking changes and is not compatible with this pipeline + +## Quick Start: Workbench Orchestration with Google Batch + +This guide walks through setting up and running FloRes with Workbench orchestration and Google Batch compute. The setup is split between local commands (for infrastructure) and Workbench app commands (for execution). + +### Step 1: Create Workspace and App + +Create a new workspace and app in the Workbench UI (or use the CLI if preferred). + +### Step 2: Local Setup + +Run these commands on your **local machine**: + +```bash +# Set your active workspace (replace with your workspace ID) +wb workspace set --id=your-workspace-id + +# Copy the Workbench environment template +cp wb/config/wb.env.template wb/config/wb.env +``` + +Edit `wb/config/wb.env` and set the user-defined variables: +- `GCS_BUCKET`: Your Workbench GCS bucket resource ID (e.g., `nf-output`) +- `GCS_REF_BUCKET`: Bucket containing reference genomes (e.g., `referencegenomes-wb-my-workspace-1234`) +- `GCS_BUCKET_LOCATION`: Region (default: `us-central1`) +- `GOOGLE_ARTIFACT_REPO`: Your artifact registry repo (e.g., `nextflow-containers`) + +**Note**: Project IDs, service accounts, and registry paths are automatically determined from your `gcloud` and `wb` CLI configurations. + +Then run: + +```bash +# Set up infrastructure (creates buckets, service accounts, etc.) +./wb/setup_infra.sh wb + +# Upload input data and reference databases to GCS +./wb/upload_data.sh wb + +# Build Docker image and push to Artifact Registry +# NOTE: Docker must be running before executing this command +./wb/build.sh --env wb --push +``` + +### Step 3: Workbench App Setup + +Open your Workbench app, launch the Terminal, and run: + +```bash +# Clone the repository +cd repos/ && git clone https://github.com/passdan/FloRes.git && cd FloRes/ + +# Copy the environment template +cp wb/config/wb.env.template wb/config/wb.env +``` + +Now copy your local `wb/config/wb.env` configuration into the Workbench app. + +### Step 4: Run the Pipeline + +```bash +./wb/run.sh --env wb +``` + +Results will be stored in your configured GCS bucket. + +**Known Issues**: +- The `gcloud storage cp` command may not correctly resolve Workbench resource names to full `gs://` paths when running `upload_data.sh` or `run.sh`. If you encounter path resolution issues, manually specify the full GCS bucket path in your `wb.env` configuration. + +--- + +## Alternative: Quick Demo in Workbench JupyterLab + +For a simple demonstration without Google Batch (both orchestration and execution running in the same Workbench app): + +Create a new Workbench workspace and add this git repository in the **Apps** tab. + +Create a JupyterLab app instance, launch it, and open the terminal: + +```bash +# Initialize conda +conda init +source ~/.bashrc + +# Navigate to the repository +cd repos/FloRes + +# Create and activate the conda environment +conda env create -f envs/AMR++_env.yaml +conda activate AMR++_env + +# Verify Nextflow version 24 is installed +nextflow -v + +# Run the test pipeline (takes ~5 minutes) +nextflow run main_AMR++.nf +``` + +Expected output: results in `~/repos/FloRes/test_results` + +--- + +## Configuration + +### Resource Scaling on Google Batch + +Google Batch does NOT automatically scale machine types based on CPU/memory requests. Resource scaling is configured in `config/google_batch.config`. + +Each process that needs more than default resources must explicitly specify a matching `machineType`. Current resource allocations: + +| Process | CPUs | Memory | Machine Type | +|---------|------|--------|-------------| +| Default | 4 | 16 GB | n2-standard-4 | +| runqc | 16 | 64 GB | n2-standard-16 | +| bowtie2_align | 32 | 128 GB | n2-standard-32 | +| bowtie2_rm_contaminant_fq | 32 | 256 GB | n2-highmem-32 | +| bwa_align | 16 | 128 GB | n2-highmem-32 | +| runkraken | 16 | 256 GB | n2-highmem-32 | +| runkrakenInterleaved | 16 | 256 GB | n2-highmem-32 | + +### Supporting Environments + +**Local** (testing): `./wb/run.sh --env local` +- Requires Docker and Conda + +**GCP** (debugging): `./wb/run.sh --env gcp` +- For debugging Google Batch jobs with visible logs +- Requires `gcloud` CLI and Docker From 797f0747f53e35297606e69f2778a228686a9167 Mon Sep 17 00:00:00 2001 From: Samuel Hornstein Date: Wed, 13 May 2026 14:36:45 -0700 Subject: [PATCH 3/7] chore: update wb.env to current workspace buckets --- wb/config/wb.env | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/wb/config/wb.env b/wb/config/wb.env index 53b69aa..69c153b 100644 --- a/wb/config/wb.env +++ b/wb/config/wb.env @@ -10,12 +10,12 @@ # Replace with your Workbench GCS bucket resource ID # Example: "nf-output" or "my-pipeline-data" # Note: Use the resource ID, not the full GCS bucket name -export GCS_BUCKET=wb-mighty-tangerine-1678 -export GCS_REF_BUCKET=referencegenomes-wb-mighty-tangerine-1678 +export GCS_BUCKET=nf-output-wb-cagey-coconut-8353 +export GCS_REF_BUCKET=reference-genomes-wb-cagey-coconut-8353 # GCS bucket location/region # Common values: us-central1, us-east1, europe-west1 -export GCS_BUCKET_LOCATION=europe-west2 +export GCS_BUCKET_LOCATION=us-central1 # Google Artifact Registry repository name # Replace with your artifact registry repository name From e995d10083e0f05c1290d6bd9970aa50f48466a7 Mon Sep 17 00:00:00 2001 From: Samuel Hornstein Date: Thu, 14 May 2026 20:57:26 -0700 Subject: [PATCH 4/7] fix: bundle bin/ into Docker and use container paths for workbench execution Apply learnings from AMR workbench conversion: add fastp/bowtie2/make to container, pin nextflow=24, COPY bin/ to /opt/amrplusplus/bin, replace all $baseDir/bin/ refs with container paths, and use ${task.cpus} instead of ${threads} in bwa/trimmomatic modules. Co-Authored-By: Claude Opus 4.6 --- envs/containers/Dockerfile | 17 ++++++++++++- modules/Alignment/bowtie2-for_AMRplusplus.nf | 2 +- modules/Alignment/bwa.nf | 26 ++++++++++---------- modules/Microbiome/kraken2.nf | 2 +- modules/Microbiome/kraken_and_bracken.nf | 2 +- modules/Resistome/resistome.nf | 12 ++++----- modules/Trimming/trimmomatic.nf | 4 +-- 7 files changed, 40 insertions(+), 25 deletions(-) diff --git a/envs/containers/Dockerfile b/envs/containers/Dockerfile index 12b2a90..ba8d795 100755 --- a/envs/containers/Dockerfile +++ b/envs/containers/Dockerfile @@ -20,6 +20,7 @@ RUN apt-get update -q && \ subversion \ wget \ g++ \ + make \ libarchive13 \ && apt-get clean \ && rm -rf /var/lib/apt/lists/* @@ -59,5 +60,19 @@ RUN set -x && \ find /opt/conda/ -follow -type f -name '*.js.map' -delete && \ /opt/conda/bin/conda clean -afy && \ /opt/conda/bin/conda install -c conda-forge mamba && \ - /opt/conda/bin/mamba install -c conda-forge -c bioconda git python=3.9 trimmomatic multiqc bwa samtools bedtools kraken2 multiqc fastqc krona bracken numpy pysam pandas biopython matplotlib nextflow && \ + /opt/conda/bin/mamba install -c conda-forge -c bioconda git python=3.9 trimmomatic multiqc bwa samtools bedtools kraken2 multiqc fastqc krona bracken numpy pysam pandas biopython matplotlib nextflow=24 fastp bowtie2 && \ conda clean --all + +# Copy AMR++ bin scripts into the container +COPY bin /opt/amrplusplus/bin +RUN chmod +x /opt/amrplusplus/bin/*.py && \ + chmod +x /opt/amrplusplus/bin/rarefaction && \ + chmod +x /opt/amrplusplus/bin/resistome + +# Clone AmrPlusPlus_SNP for SNP verification (replaces empty dir copied from bin/) +RUN rm -rf /opt/amrplusplus/bin/AmrPlusPlus_SNP && \ + git clone https://github.com/Isabella136/AmrPlusPlus_SNP.git /opt/amrplusplus/bin/AmrPlusPlus_SNP && \ + chmod -R 755 /opt/amrplusplus/bin/AmrPlusPlus_SNP + +# Add bin directory to PATH +ENV PATH="/opt/amrplusplus/bin:${PATH}" diff --git a/modules/Alignment/bowtie2-for_AMRplusplus.nf b/modules/Alignment/bowtie2-for_AMRplusplus.nf index f7b8dfc..f026f9a 100755 --- a/modules/Alignment/bowtie2-for_AMRplusplus.nf +++ b/modules/Alignment/bowtie2-for_AMRplusplus.nf @@ -145,6 +145,6 @@ process HostRemovalStats { path("host.removal.stats"), emit: combo_host_rm_stats """ - ${PYTHON3} $baseDir/bin/samtools_idxstats.py -i ${host_rm_stats} -o host.removal.stats + ${PYTHON3} /opt/amrplusplus/bin/samtools_idxstats.py -i ${host_rm_stats} -o host.removal.stats """ } diff --git a/modules/Alignment/bwa.nf b/modules/Alignment/bwa.nf index 1617558..cf0a59f 100755 --- a/modules/Alignment/bwa.nf +++ b/modules/Alignment/bwa.nf @@ -63,22 +63,22 @@ process bwa_align { script: if( deduped == "N") """ - ${BWA} mem ${indexfiles[0]} ${reads} -t ${threads} -R '@RG\\tID:${pair_id}\\tSM:${pair_id}' | \ - ${SAMTOOLS} sort -@ ${threads} -m 4G -o ${pair_id}_alignment_sorted.bam + ${BWA} mem ${indexfiles[0]} ${reads} -t ${task.cpus} -R '@RG\\tID:${pair_id}\\tSM:${pair_id}' | \ + ${SAMTOOLS} sort -@ ${task.cpus} -m 4G -o ${pair_id}_alignment_sorted.bam """ else if( deduped == "Y") """ - ${BWA} mem ${indexfiles[0]} ${reads} -t ${threads} -R '@RG\\tID:${pair_id}\\tSM:${pair_id}' > ${pair_id}_alignment.sam - ${SAMTOOLS} view -@ ${threads} -S -b ${pair_id}_alignment.sam > ${pair_id}_alignment.bam + ${BWA} mem ${indexfiles[0]} ${reads} -t ${task.cpus} -R '@RG\\tID:${pair_id}\\tSM:${pair_id}' > ${pair_id}_alignment.sam + ${SAMTOOLS} view -@ ${task.cpus} -S -b ${pair_id}_alignment.sam > ${pair_id}_alignment.bam rm ${pair_id}_alignment.sam - ${SAMTOOLS} sort -@ ${threads} -m 3G -n ${pair_id}_alignment.bam -o ${pair_id}_alignment_sorted.bam + ${SAMTOOLS} sort -@ ${task.cpus} -m 3G -n ${pair_id}_alignment.bam -o ${pair_id}_alignment_sorted.bam rm ${pair_id}_alignment.bam - ${SAMTOOLS} fixmate -@ ${threads} ${pair_id}_alignment_sorted.bam ${pair_id}_alignment_sorted_fix.bam - ${SAMTOOLS} sort -@ ${threads} -m 3G ${pair_id}_alignment_sorted_fix.bam -o ${pair_id}_alignment_sorted_fix.sorted.bam + ${SAMTOOLS} fixmate -@ ${task.cpus} ${pair_id}_alignment_sorted.bam ${pair_id}_alignment_sorted_fix.bam + ${SAMTOOLS} sort -@ ${task.cpus} -m 3G ${pair_id}_alignment_sorted_fix.bam -o ${pair_id}_alignment_sorted_fix.sorted.bam rm ${pair_id}_alignment_sorted_fix.bam ${SAMTOOLS} rmdup -S ${pair_id}_alignment_sorted_fix.sorted.bam ${pair_id}_alignment_dedup.bam rm ${pair_id}_alignment_sorted_fix.sorted.bam - ${SAMTOOLS} view -@ ${threads} -h -o ${pair_id}_alignment_dedup.sam ${pair_id}_alignment_dedup.bam + ${SAMTOOLS} view -@ ${task.cpus} -h -o ${pair_id}_alignment_dedup.sam ${pair_id}_alignment_dedup.bam rm ${pair_id}_alignment_dedup.sam """ else @@ -107,13 +107,13 @@ process bwa_rm_contaminant_fq { path("${pair_id}.samtools.idxstats"), emit: host_rm_stats """ - ${BWA} mem ${indexfiles[0]} ${reads[0]} ${reads[1]} -t ${threads} | \ - ${SAMTOOLS} sort -@ ${threads} -m 4G -o ${pair_id}.host.sorted.bam + ${BWA} mem ${indexfiles[0]} ${reads[0]} ${reads[1]} -t ${task.cpus} | \ + ${SAMTOOLS} sort -@ ${task.cpus} -m 4G -o ${pair_id}.host.sorted.bam ${SAMTOOLS} index ${pair_id}.host.sorted.bam && ${SAMTOOLS} idxstats ${pair_id}.host.sorted.bam > ${pair_id}.samtools.idxstats ${SAMTOOLS} view -h -f 12 -b ${pair_id}.host.sorted.bam -o ${pair_id}.host.sorted.removed.bam - ${SAMTOOLS} sort -n -@ ${threads} -m 3G ${pair_id}.host.sorted.removed.bam -o ${pair_id}.host.resorted.removed.bam + ${SAMTOOLS} sort -n -@ ${task.cpus} -m 3G ${pair_id}.host.sorted.removed.bam -o ${pair_id}.host.resorted.removed.bam ${SAMTOOLS} \ - fastq -@ ${threads} -c 6 \ + fastq -@ ${task.cpus} -c 6 \ ${pair_id}.host.resorted.removed.bam \ -1 ${pair_id}.non.host.R1.fastq.gz \ -2 ${pair_id}.non.host.R2.fastq.gz \ @@ -143,6 +143,6 @@ process HostRemovalStats { path("host.removal.stats"), emit: combo_host_rm_stats """ - ${PYTHON3} $baseDir/bin/samtools_idxstats.py -i ${host_rm_stats} -o host.removal.stats + ${PYTHON3} /opt/amrplusplus/bin/samtools_idxstats.py -i ${host_rm_stats} -o host.removal.stats """ } diff --git a/modules/Microbiome/kraken2.nf b/modules/Microbiome/kraken2.nf index 9219438..e7c9565 100755 --- a/modules/Microbiome/kraken2.nf +++ b/modules/Microbiome/kraken2.nf @@ -78,7 +78,7 @@ process krakenresults { path("kraken_analytic_matrix.csv") """ - ${PYTHON3} $baseDir/bin/kraken2_long_to_wide.py -i ${kraken_reports} -o kraken_analytic_matrix.csv + ${PYTHON3} /opt/amrplusplus/bin/kraken2_long_to_wide.py -i ${kraken_reports} -o kraken_analytic_matrix.csv """ } diff --git a/modules/Microbiome/kraken_and_bracken.nf b/modules/Microbiome/kraken_and_bracken.nf index f7f60a6..36a8466 100755 --- a/modules/Microbiome/kraken_and_bracken.nf +++ b/modules/Microbiome/kraken_and_bracken.nf @@ -104,7 +104,7 @@ process krakenresults { """ - ${PYTHON3} $baseDir/bin/kraken2_long_to_wide_update.py -i ${kraken_reports} -o kraken_analytic_matrix.csv + ${PYTHON3} /opt/amrplusplus/bin/kraken2_long_to_wide_update.py -i ${kraken_reports} -o kraken_analytic_matrix.csv """ } diff --git a/modules/Resistome/resistome.nf b/modules/Resistome/resistome.nf index 3b987df..420f5ea 100755 --- a/modules/Resistome/resistome.nf +++ b/modules/Resistome/resistome.nf @@ -40,7 +40,7 @@ process build_dependencies { #mv rarefaction ../ #cd ../ #rm -rf rarefactionanalyzer - cp $baseDir/bin/rarefaction . + cp /opt/amrplusplus/bin/rarefaction . #git clone https://github.com/cdeanj/resistomeanalyzer.git @@ -50,7 +50,7 @@ process build_dependencies { #mv resistome ../ #cd ../ #rm -rf resistomeanalyzer - cp $baseDir/bin/resistome . + cp /opt/amrplusplus/bin/resistome . git clone https://github.com/Isabella136/AmrPlusPlus_SNP.git chmod -R 777 AmrPlusPlus_SNP/ @@ -119,7 +119,7 @@ process resistomeresults { path("${prefix}_analytic_matrix.csv"), emit: snp_count_matrix, optional: true """ - ${PYTHON3} $baseDir/bin/amr_long_to_wide.py -i ${resistomes} -o ${prefix}_analytic_matrix.csv + ${PYTHON3} /opt/amrplusplus/bin/amr_long_to_wide.py -i ${resistomes} -o ${prefix}_analytic_matrix.csv """ } @@ -189,7 +189,7 @@ process plotrarefaction { """ mkdir data/ mv *.tsv data/ - python $baseDir/bin/rfplot.py --dir ./data --nd --s --sd . + python /opt/amrplusplus/bin/rfplot.py --dir ./data --nd --s --sd . """ } @@ -219,7 +219,7 @@ process runsnp { path("${sample_id}.${prefix}_SNPs${sample_id}/*") """ - cp -r $baseDir/bin/AmrPlusPlus_SNP/* . + cp -r /opt/amrplusplus/bin/AmrPlusPlus_SNP/* . # change name to stay consistent with count matrix name, but only if the names don't match if [ "${bam}" != "${sample_id}.bam" ]; then @@ -257,7 +257,7 @@ process snpresults { """ - ${PYTHON3} $baseDir/bin/snp_long_to_wide.py -i ${snp_counts} -o SNPconfirmed_${prefix}_analytic_matrix.csv + ${PYTHON3} /opt/amrplusplus/bin/snp_long_to_wide.py -i ${snp_counts} -o SNPconfirmed_${prefix}_analytic_matrix.csv """ } diff --git a/modules/Trimming/trimmomatic.nf b/modules/Trimming/trimmomatic.nf index db61c18..a26869f 100755 --- a/modules/Trimming/trimmomatic.nf +++ b/modules/Trimming/trimmomatic.nf @@ -41,7 +41,7 @@ process runqc { """ ${TRIMMOMATIC} \ PE \ - -threads ${threads} \ + -threads ${task.cpus} \ ${reads[0]} ${reads[1]} ${sample_id}.1P.fastq.gz ${sample_id}.1U.fastq.gz ${sample_id}.2P.fastq.gz ${sample_id}.2U.fastq.gz \ ILLUMINACLIP:${adapters}:2:30:10:3:TRUE \ LEADING:${leading} \ @@ -73,6 +73,6 @@ process QCstats { path("trimmomatic.stats"), emit: combo_trim_stats """ - ${PYTHON3} $baseDir/bin/trimmomatic_stats.py -i ${stats} -o trimmomatic.stats + ${PYTHON3} /opt/amrplusplus/bin/trimmomatic_stats.py -i ${stats} -o trimmomatic.stats """ } From e40d30a2738ff627776bcbc3496f7012d14414eb Mon Sep 17 00:00:00 2001 From: Samuel Hornstein Date: Fri, 15 May 2026 10:23:44 -0700 Subject: [PATCH 5/7] chore: rename container image from amrplusplus-workbench to flores-workbench Co-Authored-By: Claude Opus 4.6 --- nextflow.config | 4 ++-- wb/config/gcp.env | 2 +- wb/config/gcp.env.template | 2 +- wb/config/local.env | 4 ++-- wb/config/local.env.template | 4 ++-- wb/config/wb.env | 2 +- wb/config/wb.env.template | 2 +- 7 files changed, 10 insertions(+), 10 deletions(-) diff --git a/nextflow.config b/nextflow.config index 785fa3d..6d66c20 100755 --- a/nextflow.config +++ b/nextflow.config @@ -75,7 +75,7 @@ profiles { 'google-batch' { includeConfig "config/google_batch.config" process.executor = 'google-batch' - process.container = "us-central1-docker.pkg.dev/${GOOGLE_CLOUD_PROJECT}/${GOOGLE_ARTIFACT_REPO}/amrplusplus-workbench:latest" + process.container = "us-central1-docker.pkg.dev/${GOOGLE_CLOUD_PROJECT}/${GOOGLE_ARTIFACT_REPO}/flores-workbench:latest" process.errorStrategy = { task.exitStatus==50001 ? 'retry' : 'terminate' } process.maxRetries = 2 @@ -99,7 +99,7 @@ profiles { workbench { includeConfig "config/google_batch.config" process.executor = 'google-batch' - process.container = "us-central1-docker.pkg.dev/${GOOGLE_CLOUD_PROJECT}/${GOOGLE_ARTIFACT_REPO}/amrplusplus-workbench:latest" + process.container = "us-central1-docker.pkg.dev/${GOOGLE_CLOUD_PROJECT}/${GOOGLE_ARTIFACT_REPO}/flores-workbench:latest" process.errorStrategy = { task.exitStatus==50001 ? 'retry' : 'terminate' } process.maxRetries = 5 diff --git a/wb/config/gcp.env b/wb/config/gcp.env index 75756fc..ff8dbd4 100644 --- a/wb/config/gcp.env +++ b/wb/config/gcp.env @@ -32,7 +32,7 @@ export GOOGLE_SERVICE_ACCOUNT_NAME=nextflow-runner export GOOGLE_SERVICE_ACCOUNT_EMAIL="${GOOGLE_SERVICE_ACCOUNT_NAME}@${GOOGLE_CLOUD_PROJECT}.iam.gserviceaccount.com" # Docker image configuration (auto-generated paths) -IMAGE_NAME="amrplusplus-workbench" +IMAGE_NAME="flores-workbench" IMAGE_TAG="latest" REGISTRY_PATH="us-central1-docker.pkg.dev/${GOOGLE_CLOUD_PROJECT}/${GOOGLE_ARTIFACT_REPO}/${IMAGE_NAME}:${IMAGE_TAG}" diff --git a/wb/config/gcp.env.template b/wb/config/gcp.env.template index 79032ee..609e78c 100644 --- a/wb/config/gcp.env.template +++ b/wb/config/gcp.env.template @@ -32,7 +32,7 @@ export GOOGLE_SERVICE_ACCOUNT_NAME=nextflow-runner export GOOGLE_SERVICE_ACCOUNT_EMAIL="${GOOGLE_SERVICE_ACCOUNT_NAME}@${GOOGLE_CLOUD_PROJECT}.iam.gserviceaccount.com" # Docker image configuration (auto-generated paths) -IMAGE_NAME="amrplusplus-workbench" +IMAGE_NAME="flores-workbench" IMAGE_TAG="latest" REGISTRY_PATH="us-central1-docker.pkg.dev/${GOOGLE_CLOUD_PROJECT}/${GOOGLE_ARTIFACT_REPO}/${IMAGE_NAME}:${IMAGE_TAG}" diff --git a/wb/config/local.env b/wb/config/local.env index ecd7915..a1df9f3 100644 --- a/wb/config/local.env +++ b/wb/config/local.env @@ -7,8 +7,8 @@ # Docker image configuration # Replace with your Docker Hub username -# Example: "johndoe/amrplusplus-workbench" -IMAGE_NAME="passdan/amrplusplus-workbench" +# Example: "johndoe/flores-workbench" +IMAGE_NAME="passdan/flores-workbench" ############################################################################### # AUTOMATIC CONFIGURATION - DO NOT MODIFY diff --git a/wb/config/local.env.template b/wb/config/local.env.template index cd78e5b..cba30b5 100644 --- a/wb/config/local.env.template +++ b/wb/config/local.env.template @@ -7,8 +7,8 @@ # Docker image configuration # Replace with your Docker Hub username -# Example: "johndoe/amrplusplus-workbench" -IMAGE_NAME="/amrplusplus-workbench" +# Example: "johndoe/flores-workbench" +IMAGE_NAME="/flores-workbench" ############################################################################### # AUTOMATIC CONFIGURATION - DO NOT MODIFY diff --git a/wb/config/wb.env b/wb/config/wb.env index 69c153b..ee4fb7b 100644 --- a/wb/config/wb.env +++ b/wb/config/wb.env @@ -35,7 +35,7 @@ export GOOGLE_SERVICE_ACCOUNT_EMAIL=$(wb auth status 2>&1 | grep "Service accoun export GOOGLE_SERVICE_ACCOUNT_NAME=$(echo "${GOOGLE_SERVICE_ACCOUNT_EMAIL}" | cut -d'@' -f1) # Docker image configuration (auto-generated paths) -IMAGE_NAME="amrplusplus-workbench" +IMAGE_NAME="flores-workbench" IMAGE_TAG="latest" REGISTRY_PATH="us-central1-docker.pkg.dev/${WORKBENCH_GOOGLE_CLOUD_PROJECT}/${GOOGLE_ARTIFACT_REPO}/${IMAGE_NAME}:${IMAGE_TAG}" diff --git a/wb/config/wb.env.template b/wb/config/wb.env.template index 4c807a1..bbea4c7 100644 --- a/wb/config/wb.env.template +++ b/wb/config/wb.env.template @@ -39,7 +39,7 @@ export GOOGLE_SERVICE_ACCOUNT_EMAIL=$(wb auth status 2>&1 | grep "Service accoun export GOOGLE_SERVICE_ACCOUNT_NAME=$(echo "${GOOGLE_SERVICE_ACCOUNT_EMAIL}" | cut -d'@' -f1) # Docker image configuration (auto-generated paths) -IMAGE_NAME="amrplusplus-workbench" +IMAGE_NAME="flores-workbench" IMAGE_TAG="latest" REGISTRY_PATH="us-central1-docker.pkg.dev/${WORKBENCH_GOOGLE_CLOUD_PROJECT}/${GOOGLE_ARTIFACT_REPO}/${IMAGE_NAME}:${IMAGE_TAG}" From 740030934906c2717ded741876598d77c3392b5e Mon Sep 17 00:00:00 2001 From: Sam Hornstein Date: Fri, 15 May 2026 22:18:44 +0000 Subject: [PATCH 6/7] fix: resolve workbench pipeline failures and add pre-built host index - Add resource overrides for bowtie2_index and bwa index processes (n2-highmem-8, 64GB) to prevent OOM kills on full genome builds - Fix multiqc output naming for newer multiqc versions by adding --outdir and --filename flags - Add errorStrategy 'ignore' to runbracken for empty taxonomic levels - Add Domain ('D') taxonomic level to kraken2_long_to_wide_update.py - Use Nextflow-uploaded bin/ scripts instead of container-baked paths in krakenresults process - Fix GCS glob handling in host index loading by removing Paths.get() - Add pre-built GRCh38 bowtie2 host index to skip 50-min build step Co-Authored-By: Claude Opus 4.6 (1M context) --- bin/kraken2_long_to_wide_update.py | 34 +++++++++++++----------- config/google_batch.config | 12 +++++++++ modules/Fastqc/fastqc.nf | 5 ++-- modules/Microbiome/kraken_and_bracken.nf | 5 ++-- params_google_batch.config | 4 +-- subworkflows/fastq_host_removal.nf | 4 +-- 6 files changed, 39 insertions(+), 25 deletions(-) diff --git a/bin/kraken2_long_to_wide_update.py b/bin/kraken2_long_to_wide_update.py index 23a15e7..afcee85 100755 --- a/bin/kraken2_long_to_wide_update.py +++ b/bin/kraken2_long_to_wide_update.py @@ -13,28 +13,30 @@ 'R1':0, 'R2':1, 'R3':2, - 'K': 3, - 'P': 4, - 'C': 5, - 'O': 6, - 'F': 7, - 'G': 8, - 'S': 9, - 'U': 10 + 'D': 3, + 'K': 4, + 'P': 5, + 'C': 6, + 'O': 7, + 'F': 8, + 'G': 9, + 'S': 10, + 'U': 11 } taxa_level_names = { 0: 'Root1', 1: 'Root2', 2: 'Root3', - 3: 'Kingdom', - 4: 'Phylum', - 5: 'Class', - 6: 'Order', - 7: 'Family', - 8: 'Genus', - 9: 'Species', - 10: 'Unclassified' + 3: 'Domain', + 4: 'Kingdom', + 5: 'Phylum', + 6: 'Class', + 7: 'Order', + 8: 'Family', + 9: 'Genus', + 10: 'Species', + 11: 'Unclassified' } diff --git a/config/google_batch.config b/config/google_batch.config index 0457262..335876b 100644 --- a/config/google_batch.config +++ b/config/google_batch.config @@ -23,6 +23,18 @@ process { containerOptions = '--env _JAVA_OPTIONS="-Xmx60g"' } + withName: 'bowtie2_index' { + cpus = 8 + memory = '64 GB' + machineType = 'n2-highmem-8' + } + + withName: 'index' { + cpus = 8 + memory = '64 GB' + machineType = 'n2-highmem-8' + } + withName: 'bowtie2_align' { cpus = 32 memory = '128 GB' diff --git a/modules/Fastqc/fastqc.nf b/modules/Fastqc/fastqc.nf index bf3c8d0..893d592 100755 --- a/modules/Fastqc/fastqc.nf +++ b/modules/Fastqc/fastqc.nf @@ -51,7 +51,8 @@ process multiqc { script: """ cp $config/* . - multiqc -v data* --interactive -f --cl-config "max_table_rows: 3000" - mv multiqc_data/multiqc_general_stats.txt . + multiqc -v data* --interactive -f --cl-config "max_table_rows: 3000" --outdir multiqc_data --filename multiqc_report.html + mv multiqc_data/multiqc_report_data/multiqc_general_stats.txt . + mv multiqc_data/multiqc_report.html . """ } diff --git a/modules/Microbiome/kraken_and_bracken.nf b/modules/Microbiome/kraken_and_bracken.nf index 36a8466..6c73264 100755 --- a/modules/Microbiome/kraken_and_bracken.nf +++ b/modules/Microbiome/kraken_and_bracken.nf @@ -104,13 +104,14 @@ process krakenresults { """ - ${PYTHON3} /opt/amrplusplus/bin/kraken2_long_to_wide_update.py -i ${kraken_reports} -o kraken_analytic_matrix.csv + ${PYTHON3} \$HOME/.nextflow-bin/kraken2_long_to_wide_update.py -i ${kraken_reports} -o kraken_analytic_matrix.csv """ } process runbracken { label "microbiome" - + errorStrategy 'ignore' + input: tuple val(sample_id), path(kraken_report), val(level) path(krakendb) diff --git a/params_google_batch.config b/params_google_batch.config index ce1989d..da16807 100644 --- a/params_google_batch.config +++ b/params_google_batch.config @@ -13,7 +13,7 @@ params { // ----------------------------------------------------------------- // Input Data // ----------------------------------------------------------------- - reads = "gs://${gcs_bucket}/rawdata-wb-farms/S358_MiSeq_BHWNTNDRX5/fastq/C1*{1,2}.f*q.gz" + reads = "gs://${gcs_bucket}/data/raw/*_R{1,2}.fastq.gz" bam_files = null split = "" @@ -21,7 +21,7 @@ params { // Reference Databases // ----------------------------------------------------------------- host = "gs://${gcs_ref_bucket}/grch38_1kgmaj.fa" - host_index = null + host_index = "gs://${gcs_ref_bucket}/grch38_1kgmaj{.fa,*.bt2}" kraken_db = "gs://${gcs_ref_bucket}/minikraken_8GB_20200312" diff --git a/subworkflows/fastq_host_removal.nf b/subworkflows/fastq_host_removal.nf index c93ecc0..cfaacd6 100755 --- a/subworkflows/fastq_host_removal.nf +++ b/subworkflows/fastq_host_removal.nf @@ -19,9 +19,7 @@ workflow FASTQ_RM_HOST_WF { reference_index_files = bowtie2_index.out } else { reference_index_files = Channel - .fromPath(Paths.get(params.host_index)) - .map { file(it.toString()) } - .filter { file(it).exists() } + .fromPath(params.host_index) .toList() .map { files -> if (files.size() < 6) { From ad7d59f6774c6c552cb27ccff5735bba1a18d67e Mon Sep 17 00:00:00 2001 From: Sam Hornstein Date: Fri, 15 May 2026 22:38:17 +0000 Subject: [PATCH 7/7] fix: address PR review feedback - Use consistent container path for krakenresults script (/opt/amrplusplus/bin/ instead of $HOME/.nextflow-bin/) - Gitignore wb.env and gcp.env (workspace-specific config) - Add setup instructions to wb.env.template - Add helpful error message when env file is missing - Make runbracken errorStrategy targeted to exit code 1 only Note: container image must be rebuilt to include the Domain 'D' fix in kraken2_long_to_wide_update.py for krakenresults to work. Co-Authored-By: Claude Opus 4.6 (1M context) --- .gitignore | 3 ++ modules/Microbiome/kraken_and_bracken.nf | 4 +-- wb/config/gcp.env | 41 ---------------------- wb/config/wb.env | 44 ------------------------ wb/config/wb.env.template | 6 ++++ wb/run.sh | 1 + 6 files changed, 12 insertions(+), 87 deletions(-) delete mode 100644 wb/config/gcp.env delete mode 100644 wb/config/wb.env diff --git a/.gitignore b/.gitignore index 6ca4a88..8551b05 100644 --- a/.gitignore +++ b/.gitignore @@ -4,3 +4,6 @@ envs/amrplusplus-update.sif logs/ .nextflow* nextflow-24.10.9-dist +trace-*.txt +wb/config/wb.env +wb/config/gcp.env diff --git a/modules/Microbiome/kraken_and_bracken.nf b/modules/Microbiome/kraken_and_bracken.nf index 6c73264..bd511e2 100755 --- a/modules/Microbiome/kraken_and_bracken.nf +++ b/modules/Microbiome/kraken_and_bracken.nf @@ -104,13 +104,13 @@ process krakenresults { """ - ${PYTHON3} \$HOME/.nextflow-bin/kraken2_long_to_wide_update.py -i ${kraken_reports} -o kraken_analytic_matrix.csv + ${PYTHON3} /opt/amrplusplus/bin/kraken2_long_to_wide_update.py -i ${kraken_reports} -o kraken_analytic_matrix.csv """ } process runbracken { label "microbiome" - errorStrategy 'ignore' + errorStrategy { task.exitStatus == 1 ? 'ignore' : 'terminate' } input: tuple val(sample_id), path(kraken_report), val(level) diff --git a/wb/config/gcp.env b/wb/config/gcp.env deleted file mode 100644 index ff8dbd4..0000000 --- a/wb/config/gcp.env +++ /dev/null @@ -1,41 +0,0 @@ -# GCP environment configuration -# This is used when running Nextflow on Google Batch with local orchestration -# Nextflow runs on your local machine, jobs execute on Google Batch - -############################################################################### -# USER CONFIGURATION - UPDATE THESE VALUES -############################################################################### - -# GCS bucket for storing pipeline data and results -# Replace with your GCS bucket name (without gs:// prefix) -# Example: "my-nextflow-data" -export GCS_BUCKET=mod-wb-mighty-tangerine-1678 - -# GCS bucket location/region -# Common values: us-central1, us-east1, europe-west1 -export GCS_BUCKET_LOCATION=europe-west2 - -# Google Artifact Registry repository name -# Replace with your artifact registry repository name -# Example: "nextflow-containers" -export GOOGLE_ARTIFACT_REPO=nextflow-containers - -############################################################################### -# AUTOMATIC CONFIGURATION - DO NOT MODIFY -############################################################################### - -# Google Cloud project (auto-detected from gcloud CLI) -export GOOGLE_CLOUD_PROJECT=$(gcloud config get project) - -# Service account configuration (auto-generated) -export GOOGLE_SERVICE_ACCOUNT_NAME=nextflow-runner -export GOOGLE_SERVICE_ACCOUNT_EMAIL="${GOOGLE_SERVICE_ACCOUNT_NAME}@${GOOGLE_CLOUD_PROJECT}.iam.gserviceaccount.com" - -# Docker image configuration (auto-generated paths) -IMAGE_NAME="flores-workbench" -IMAGE_TAG="latest" -REGISTRY_PATH="us-central1-docker.pkg.dev/${GOOGLE_CLOUD_PROJECT}/${GOOGLE_ARTIFACT_REPO}/${IMAGE_NAME}:${IMAGE_TAG}" - -# Nextflow profile and config -NEXTFLOW_PROFILE="google_batch" -NEXTFLOW_CONFIG="params_google-batch.config" diff --git a/wb/config/wb.env b/wb/config/wb.env deleted file mode 100644 index ee4fb7b..0000000 --- a/wb/config/wb.env +++ /dev/null @@ -1,44 +0,0 @@ -# Workbench environment configuration -# This is used when running Nextflow on Google Batch with Workbench orchestration -# Both Nextflow orchestration and job execution happen in Workbench/Google Cloud - -############################################################################### -# USER CONFIGURATION - UPDATE THESE VALUES -############################################################################### - -# GCS bucket resource ID (created via Workbench) -# Replace with your Workbench GCS bucket resource ID -# Example: "nf-output" or "my-pipeline-data" -# Note: Use the resource ID, not the full GCS bucket name -export GCS_BUCKET=nf-output-wb-cagey-coconut-8353 -export GCS_REF_BUCKET=reference-genomes-wb-cagey-coconut-8353 - -# GCS bucket location/region -# Common values: us-central1, us-east1, europe-west1 -export GCS_BUCKET_LOCATION=us-central1 - -# Google Artifact Registry repository name -# Replace with your artifact registry repository name -# Example: "nextflow-containers" -export GOOGLE_ARTIFACT_REPO=nextflow-containers - -############################################################################### -# AUTOMATIC CONFIGURATION - DO NOT MODIFY -############################################################################### - -# Google Cloud project (auto-detected from Workbench workspace) -export WORKBENCH_GOOGLE_CLOUD_PROJECT=$(wb status 2>/dev/null | grep "Google project" | awk -F': ' '{print $2}' | xargs) -export GOOGLE_CLOUD_PROJECT="${WORKBENCH_GOOGLE_CLOUD_PROJECT}" - -# Service account configuration (Workbench Pet Service Account - auto-detected) -export GOOGLE_SERVICE_ACCOUNT_EMAIL=$(wb auth status 2>&1 | grep "Service account email" | awk -F': ' '{print $2}' | xargs) -export GOOGLE_SERVICE_ACCOUNT_NAME=$(echo "${GOOGLE_SERVICE_ACCOUNT_EMAIL}" | cut -d'@' -f1) - -# Docker image configuration (auto-generated paths) -IMAGE_NAME="flores-workbench" -IMAGE_TAG="latest" -REGISTRY_PATH="us-central1-docker.pkg.dev/${WORKBENCH_GOOGLE_CLOUD_PROJECT}/${GOOGLE_ARTIFACT_REPO}/${IMAGE_NAME}:${IMAGE_TAG}" - -# Nextflow profile and config -NEXTFLOW_PROFILE="workbench" -NEXTFLOW_CONFIG="params_google_batch.config" diff --git a/wb/config/wb.env.template b/wb/config/wb.env.template index bbea4c7..ab9011a 100644 --- a/wb/config/wb.env.template +++ b/wb/config/wb.env.template @@ -1,6 +1,12 @@ # Workbench environment configuration # This is used when running Nextflow on Google Batch with Workbench orchestration # Both Nextflow orchestration and job execution happen in Workbench/Google Cloud +# +# Setup: +# cp wb/config/wb.env.template wb/config/wb.env +# # Edit wb/config/wb.env and fill in the values below +# +# Note: wb.env is gitignored — each user/workspace needs its own copy. ############################################################################### # USER CONFIGURATION - UPDATE THESE VALUES diff --git a/wb/run.sh b/wb/run.sh index 27ee8bf..fbf3e1e 100755 --- a/wb/run.sh +++ b/wb/run.sh @@ -46,6 +46,7 @@ fi CONFIG_FILE="${SCRIPT_DIR}/config/${ENV}.env" if [[ ! -f "$CONFIG_FILE" ]]; then echo "Error: Configuration file not found: $CONFIG_FILE" + echo "Create it from the template: cp ${SCRIPT_DIR}/config/${ENV}.env.template ${CONFIG_FILE}" exit 1 fi