From 0f13d6f170d0201bbc8b28b58988faf6ae281fd6 Mon Sep 17 00:00:00 2001 From: Kelly Sovacool Date: Fri, 16 May 2025 11:03:07 -0400 Subject: [PATCH 1/4] feat: update spooker & jobby for ccbr_tools v0.4 --- config/config.yaml | 3 ++- workflow/Snakefile | 4 ++-- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/config/config.yaml b/config/config.yaml index 07d3cef..621b0d0 100644 --- a/config/config.yaml +++ b/config/config.yaml @@ -205,5 +205,6 @@ ucscdocker: "docker://nciccbr/ccbr_ucsc_v385:v2-feat" # pipeline information pipeline: "ASPEN" version: "$ASPENVERSION" +pipeline_home: "$PIPELINE_HOME" -ccbr_tools_path: "/data/CCBR_Pipeliner/Tools/ccbr_tools/v0.2/bin/" +ccbr_tools_path: "/data/CCBR_Pipeliner/Tools/ccbr_tools/v0.4/bin/" diff --git a/workflow/Snakefile b/workflow/Snakefile index dccb72f..d95558b 100644 --- a/workflow/Snakefile +++ b/workflow/Snakefile @@ -78,8 +78,8 @@ for cmd in spooker run_jobby_on_snakemake_log; do export PATH="$PATH:{config['ccbr_tools_path']}" fi done -run_jobby_on_snakemake_log logs/snakemake.log | tee logs/snakemake.log.jobby | cut -f2,3,18 > logs/snakemake.log.jobby.short -spooker {WORKDIR} {config['pipeline']} {config['version']} +jobby --tsv logs/snakemake.log | tee logs/snakemake.log.jobby | cut -f2,3,18 > logs/snakemake.log.jobby.short +spooker {WORKDIR} {config['pipeline']} {config['version']} {config['pipeline_home']} > logs/spooker.log 2>&1 """ onsuccess: From 5b2c718f51b67977221a72ab43ef1a6c7638b489 Mon Sep 17 00:00:00 2001 From: Kelly Sovacool Date: Fri, 16 May 2025 11:05:37 -0400 Subject: [PATCH 2/4] chore: update CHANGELOG.md --- CHANGELOG.md | 39 +++++++++++++++++++-------------------- 1 file changed, 19 insertions(+), 20 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 10175d0..3f06bad 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,29 +1,27 @@ ## ASPEN development version - - This version features a major overhaul of the pipeline with changes in the following areas: ### Spike-in alignment (#94, @kopardev) - - Added support for spike-in alignment and scaling factor computation. (#94, @kopardev) - - This new feature is controlled by two new parameters in the config file: `spikein` and `spikein_genome`. (#69) - -### Peak-calling (#94, @kopardev) +- Added support for spike-in alignment and scaling factor computation. (#94, @kopardev) +- This new feature is controlled by two new parameters in the config file: `spikein` and `spikein_genome`. (#69) + +### Peak-calling (#94, @kopardev) - - Peak-called narrowPeak files are now q-value filtered by default, with a default q-value threshold of 0.1. Unfiltered files are still available for users who want to apply their own filters. (#90) - - Streamlined the output directory structure. - - Added the name of the peak-caller to ROI filenames (#86) - - Added missing annotations (#79) +- Peak-called narrowPeak files are now q-value filtered by default, with a default q-value threshold of 0.1. Unfiltered files are still available for users who want to apply their own filters. (#90) +- Streamlined the output directory structure. +- Added the name of the peak-caller to ROI filenames (#86) +- Added missing annotations (#79) ### Differential accessibility (#94, @kopardev) - - Add new rules for scaling counts and annotating regions of interest. (#68) - - DiffATAC analysis is now run for both MACS2 and Genrich peak calls, with results stored in separate directories. - - DiffATAC analysis now includes spike-in scaling factors when `spikein` is `TRUE`. - - Removed redundant steps in the differential accessibility analysis to streamline the process. - - create Tn5-based and reads-based counts matrices (#67) - - create spike-in scaled counts matrices (#62) +- Add new rules for scaling counts and annotating regions of interest. (#68) +- DiffATAC analysis is now run for both MACS2 and Genrich peak calls, with results stored in separate directories. +- DiffATAC analysis now includes spike-in scaling factors when `spikein` is `TRUE`. +- Removed redundant steps in the differential accessibility analysis to streamline the process. +- create Tn5-based and reads-based counts matrices (#67) +- create spike-in scaled counts matrices (#62) - Quality control - Updated FRiP calculation to use `tagAlign.gz` files instead of deduplicated BAM files. - Removed unnecessary QC metrics and simplified the QC workflow. @@ -31,13 +29,14 @@ This version features a major overhaul of the pipeline with changes in the follo ### Output directory (#94, @kopardev) - - Consolidated peak calling outputs into a single directory for each peak caller. (#91) - - Simplified the output directory structure. (#92) - - Decreased output digital footprint by removing unwanted intermediate files, gzipping annotated files, etc. (#87) +- Consolidated peak calling outputs into a single directory for each peak caller. (#91) +- Simplified the output directory structure. (#92) +- Decreased output digital footprint by removing unwanted intermediate files, gzipping annotated files, etc. (#87) +- Improved slurm job logging with jobby (now depends on ccbr_tools v0.4). (#98, @kelly-sovacool) ### Documentation (#94, @kopardev) - - Simplified the documentation to focus on the core functionalities of the pipeline, as well as reflect all of the changes in this version. +- Simplified the documentation to focus on the core functionalities of the pipeline, as well as reflect all of the changes in this version. ## ASPEN 1.0.6 From 7ec8b28b79615bfa6b79c3432b57b3b52a9f81d2 Mon Sep 17 00:00:00 2001 From: Kelly Sovacool Date: Mon, 19 May 2025 15:31:26 -0400 Subject: [PATCH 3/4] fix: remove conda activate --- bin/redirect | 28 +++++----------------------- 1 file changed, 5 insertions(+), 23 deletions(-) diff --git a/bin/redirect b/bin/redirect index 09350cb..f2610a7 100755 --- a/bin/redirect +++ b/bin/redirect @@ -17,34 +17,16 @@ TOOLDIR=$(dirname "$SCRIPTDIRNAME") hpc_name=$(scontrol show config | grep ClusterName | sed "s/^.*= //") # load conda if [[ $hpc_name == biowulf ]]; then - . "/data/CCBR_Pipeliner/db/PipeDB/Conda/etc/profile.d/conda.sh" - conda activate py311 + module load singularity snakemake/7 elif [[ $hpc_name == fnlcr ]]; then - . "/mnt/projects/CCBR-Pipelines/resources/miniconda3/etc/profile.d/conda.sh" - conda activate py311 + module load singularity + export PATH="/mnt/projects/CCBR-Pipelines/bin:$PATH" else echo "You are NOT running on BIOWULF or on FRCE" echo "Please make sure that:" - echo " - py311 conda environment is activated" + echo " - python >= 3.11 is in PATH" echo " - singularity is in PATH" - echo " - snakemake is in PATH" -fi -# if not on biowulf or frce then -# use py311.environment.yml in resources folder to create the py311 conda environment -# and load py311 - -# load required modules -# if running somewhere other than biowulf or frce, then ensure that -# - singularity -# - snakemake -# are in PATH -if [[ $hpc_name == biowulf ]];then - module load singularity snakemake/7 -elif [[ $hpc_name == fnlcr ]];then - # snakemake module on FRCE does not work as expected - # use the conda installed version of snakemake instead - module load singularity - export PATH="/mnt/projects/CCBR-Pipelines/bin:$PATH" + echo " - snakemake = 7 is in PATH" fi ${TOOLDIR}/aspen "$@" || true From c5b7f08b7ac8f9825d32aa6b12d58cd6ab94900b Mon Sep 17 00:00:00 2001 From: Kelly Sovacool Date: Mon, 19 May 2025 15:32:21 -0400 Subject: [PATCH 4/4] chore: update spooker usage (https://github.com/CCBR/Tools/pull/85) --- workflow/Snakefile | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/workflow/Snakefile b/workflow/Snakefile index d95558b..db75fe8 100644 --- a/workflow/Snakefile +++ b/workflow/Snakefile @@ -79,7 +79,11 @@ for cmd in spooker run_jobby_on_snakemake_log; do fi done jobby --tsv logs/snakemake.log | tee logs/snakemake.log.jobby | cut -f2,3,18 > logs/snakemake.log.jobby.short -spooker {WORKDIR} {config['pipeline']} {config['version']} {config['pipeline_home']} > logs/spooker.log 2>&1 +spooker --outdir {WORKDIR} \ + --name {config['pipeline']} \ + --version {config['version']} \ + --path {config['pipeline_home']} \ + > logs/spooker.log 2>&1 """ onsuccess: