From 7c26f41d9c6e055f36d2439e8c90d9f62b7394f7 Mon Sep 17 00:00:00 2001 From: kopardev Date: Wed, 21 May 2025 11:25:28 -0400 Subject: [PATCH 1/8] fix: fix #100 --- workflow/scripts/DESeq2.Rmd | 19 +++++++++++++++---- 1 file changed, 15 insertions(+), 4 deletions(-) diff --git a/workflow/scripts/DESeq2.Rmd b/workflow/scripts/DESeq2.Rmd index 6aca44b..99d10c3 100755 --- a/workflow/scripts/DESeq2.Rmd +++ b/workflow/scripts/DESeq2.Rmd @@ -252,10 +252,21 @@ EnhancedVolcano::EnhancedVolcano(resdf_w_anno, ## Open ROIs ```{r updown,include=TRUE,echo=FALSE,cache=FALSE,warning=FALSE} +# sometimes FC and FDR are too strict and up_roi/down_roi can be zero +# to work with that scenario without errors: x=as.data.frame(rbind(table(up_roi$shortAnno),table(down_roi$shortAnno))) -rownames(x)=c(paste("Open in",params$contrast_numerator),paste("Open in",params$contrast_denominator)) -x$Total=rowSums(x) -DT::datatable(x,rownames = TRUE) -heatmap_matrix=assay(rld2) +if (nrow(x) == 2) { + rownames(x) <- c(paste("Open in", params$contrast_numerator), + paste("Open in", params$contrast_denominator)) + x$Total <- rowSums(x) + DT::datatable(x, rownames = TRUE) +} else if (nrow(x) > 0) { + rownames(x) <- paste("Group", seq_len(nrow(x))) + x$Total <- rowSums(x) + DT::datatable(x, rownames = TRUE) +} else { + cat("No significant up/down regions to summarize.\n") +} +# heatmap_matrix=assay(rld2) ``` From 3cd761367da748c715015433a76dba7174e96ec1 Mon Sep 17 00:00:00 2001 From: kopardev Date: Wed, 21 May 2025 11:26:56 -0400 Subject: [PATCH 2/8] refactor: remove stats folder handling, consolidate outputs under logs - Removed creation and use of the 'stats' folder - Moved all logging and metadata (e.g., runinfo.yaml, snakemake logs, jobby files) to the 'logs' folder - Updated comments and variable references accordingly - Deprecated create_runinfo as spooker handles it via ccbr_tools v0.4+ --- aspen | 25 ++++++++++++++----------- 1 file changed, 14 insertions(+), 11 deletions(-) diff --git a/aspen b/aspen index 1eca11a..52755db 100755 --- a/aspen +++ b/aspen @@ -180,7 +180,7 @@ function init() { # This function initializes the workdir by: # 1. creating the working dir # 2. copying essential files like config.yaml and samples.tsv into the workdir -# 3. setting up logs and stats folders +# 3. setting up logs folder printbanner $ASPENVERSION @@ -224,9 +224,8 @@ done cd ${WORKDIR} -#create log and stats folders +#create log folder if [ ! -d $WORKDIR/logs ]; then mkdir -p $WORKDIR/logs;echo "Logs Dir: $WORKDIR/logs";fi -if [ ! -d $WORKDIR/stats ];then mkdir -p $WORKDIR/stats;echo "Stats Dir: $WORKDIR/stats";fi cat << EOF Done Initializing : $WORKDIR @@ -440,7 +439,7 @@ function create_runinfo { modtime=$(stat ${WORKDIR}/runinfo.yaml 2>/dev/null|grep Modify|awk '{print $2,$3}'|awk -F"." '{print $1}'|sed "s/ //g"|sed "s/-//g"|sed "s/://g") fi if [ -f ${WORKDIR}/runinfo.yaml ];then - mv ${WORKDIR}/runinfo.yaml ${WORKDIR}/stats/runinfo.${modtime}.yaml + mv ${WORKDIR}/runinfo.yaml ${WORKDIR}/logs/runinfo.${modtime}.yaml fi echo "Pipeline Dir: $PIPELINE_HOME" > ${WORKDIR}/runinfo.yaml echo "Git Commit/Tag: $GIT_COMMIT_TAG" >> ${WORKDIR}/runinfo.yaml @@ -459,7 +458,7 @@ function create_runinfo { } ########################################################################################## -# PRERUN CLEANUP ... get ready to run .. park old logs/stats etc. +# PRERUN CLEANUP ... get ready to run .. park old logs ########################################################################################## function preruncleanup() { @@ -475,12 +474,14 @@ function preruncleanup() { if [ -f ${WORKDIR}/snakemake.log ];then modtime=$(stat ${WORKDIR}/snakemake.log |grep Modify|awk '{print $2,$3}'|awk -F"." '{print $1}'|sed "s/ //g"|sed "s/-//g"|sed "s/://g") mv ${WORKDIR}/snakemake.log ${WORKDIR}/logs/snakemake.${modtime}.log - if [ -f ${WORKDIR}/snakemake.log.HPC_summary.txt ];then - mv ${WORKDIR}/snakemake.log.HPC_summary.txt ${WORKDIR}/stats/snakemake.${modtime}.log.HPC_summary.txt + if [ -f ${WORKDIR}/snakemake.log.jobby ];then + mv ${WORKDIR}/snakemake.log.jobby ${WORKDIR}/logs/snakemake.${modtime}.log.jobby fi - if [ -f ${WORKDIR}/snakemake.stats ];then - mv ${WORKDIR}/snakemake.stats ${WORKDIR}/stats/snakemake.${modtime}.stats + if [ -f ${WORKDIR}/snakemake.log.jobby.short ];then + mv ${WORKDIR}/snakemake.log.jobby.short ${WORKDIR}/logs/snakemake.${modtime}.log.jobby.short fi + if [ -f ${WORKDIR}/runslurm_snakemake_report.html ];then + mv ${WORKDIR}/runslurm_snakemake_report.html ${WORKDIR}/logs/runslurm_snakemake_report.${modtime}.html if [ -f ${WORKDIR}/run_git_commit.txt ];then mv ${WORKDIR}/run_git_commit.txt ${WORKDIR}/logs/run_git_commit.${modtime}.txt fi @@ -490,7 +491,8 @@ function preruncleanup() { for f in $(ls ${WORKDIR}/slurm-*.out);do mv ${f} ${WORKDIR}/logs/;done fi - create_runinfo $modtime + # runinfo not needed as now handled by spooker from ccbr_tools v0.4+ + # create_runinfo $modtime } @@ -503,10 +505,11 @@ function run() { # 3. unlock or # 4. slurm +echo "Git Commit/Tag: $GIT_COMMIT_TAG" > ${WORKDIR}/run_git_commit.txt + ########################################################################################## # local run ########################################################################################## - if [ "$1" == "local" ];then preruncleanup From f319a4db595930f8e1810bf060d37b6e6060b83a Mon Sep 17 00:00:00 2001 From: kopardev Date: Wed, 21 May 2025 11:29:10 -0400 Subject: [PATCH 3/8] refactor: Replaced 'run_jobby_on_snakemake_log' with direct 'jobby' call; Dropped 'logs/' prefix from snakemake log paths ... these are outside logs folder as previous versions --- workflow/Snakefile | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/workflow/Snakefile b/workflow/Snakefile index db75fe8..faf1515 100644 --- a/workflow/Snakefile +++ b/workflow/Snakefile @@ -73,12 +73,12 @@ rule all: expand(join(PEAKSDIR, "{peakcaller}", "DiffATAC", "{method}", "all_diff_atacs.tsv"), method=COUNTING_METHODS, peakcaller=PEAKCALLERS) if CONTRASTS.shape[0] > 0 else [], on_complete = f""" -for cmd in spooker run_jobby_on_snakemake_log; do +for cmd in spooker jobby; do if ! command -v $cmd 2>&1 >/dev/null; then export PATH="$PATH:{config['ccbr_tools_path']}" fi done -jobby --tsv logs/snakemake.log | tee logs/snakemake.log.jobby | cut -f2,3,18 > logs/snakemake.log.jobby.short +jobby --tsv snakemake.log | tee snakemake.log.jobby | cut -f2,3,18 > snakemake.log.jobby.short spooker --outdir {WORKDIR} \ --name {config['pipeline']} \ --version {config['version']} \ From e2232afc44d3fda55bf2ef3fbe9bebd79f50da86 Mon Sep 17 00:00:00 2001 From: kopardev Date: Wed, 21 May 2025 11:42:36 -0400 Subject: [PATCH 4/8] fix: adding missing fi --- aspen | 1 + 1 file changed, 1 insertion(+) diff --git a/aspen b/aspen index 52755db..0d063f8 100755 --- a/aspen +++ b/aspen @@ -482,6 +482,7 @@ function preruncleanup() { fi if [ -f ${WORKDIR}/runslurm_snakemake_report.html ];then mv ${WORKDIR}/runslurm_snakemake_report.html ${WORKDIR}/logs/runslurm_snakemake_report.${modtime}.html + fi if [ -f ${WORKDIR}/run_git_commit.txt ];then mv ${WORKDIR}/run_git_commit.txt ${WORKDIR}/logs/run_git_commit.${modtime}.txt fi From 74eda112e5ba0c57fc06654d6ef7e699ef34d84b Mon Sep 17 00:00:00 2001 From: kopardev Date: Wed, 21 May 2025 11:43:55 -0400 Subject: [PATCH 5/8] fix: Replaced literal 'PIPELINE_HOME' with '${PIPELINE_HOME}' in fastq paths for samples.tsv ... used for testing --- config/samples.tsv | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/config/samples.tsv b/config/samples.tsv index 15f7ef2..f8d122f 100644 --- a/config/samples.tsv +++ b/config/samples.tsv @@ -1,5 +1,5 @@ replicateName sampleName path_to_R1_fastq path_to_R2_fastq -D4_Meso_iCre_Dox_1 D4_Meso_iCre_Dox PIPELINE_HOME/.test/rawdata/D4_Meso_iCre_Dox_1.subset.R1.fastq.paired.fq.gz PIPELINE_HOME/.test/rawdata/D4_Meso_iCre_Dox_1.subset.R2.fastq.paired.fq.gz -D4_Meso_iCre_Dox_2 D4_Meso_iCre_Dox PIPELINE_HOME/.test/rawdata/D4_Meso_iCre_Dox_2.subset.R1.fastq.paired.fq.gz PIPELINE_HOME/.test/rawdata/D4_Meso_iCre_Dox_2.subset.R2.fastq.paired.fq.gz -iCre_D0_1 iCre_D0 PIPELINE_HOME/.test/rawdata/iCre_D0_1.subset.R1.fastq.paired.fq.gz PIPELINE_HOME/.test/rawdata/iCre_D0_1.subset.R2.fastq.paired.fq.gz -iCre_D0_2 iCre_D0 PIPELINE_HOME/.test/rawdata/iCre_D0_2.subset.R1.fastq.paired.fq.gz PIPELINE_HOME/.test/rawdata/iCre_D0_2.subset.R2.fastq.paired.fq.gz +D4_Meso_iCre_Dox_1 D4_Meso_iCre_Dox ${PIPELINE_HOME}/.test/rawdata/D4_Meso_iCre_Dox_1.subset.R1.fastq.paired.fq.gz ${PIPELINE_HOME}/.test/rawdata/D4_Meso_iCre_Dox_1.subset.R2.fastq.paired.fq.gz +D4_Meso_iCre_Dox_2 D4_Meso_iCre_Dox ${PIPELINE_HOME}/.test/rawdata/D4_Meso_iCre_Dox_2.subset.R1.fastq.paired.fq.gz ${PIPELINE_HOME}/.test/rawdata/D4_Meso_iCre_Dox_2.subset.R2.fastq.paired.fq.gz +iCre_D0_1 iCre_D0 ${PIPELINE_HOME}/.test/rawdata/iCre_D0_1.subset.R1.fastq.paired.fq.gz ${PIPELINE_HOME}/.test/rawdata/iCre_D0_1.subset.R2.fastq.paired.fq.gz +iCre_D0_2 iCre_D0 ${PIPELINE_HOME}/.test/rawdata/iCre_D0_2.subset.R1.fastq.paired.fq.gz ${PIPELINE_HOME}/.test/rawdata/iCre_D0_2.subset.R2.fastq.paired.fq.gz From 23cdb79c499021fc2d5cbeb31245cdda8fdf09ec Mon Sep 17 00:00:00 2001 From: kopardev Date: Wed, 21 May 2025 11:54:02 -0400 Subject: [PATCH 6/8] docs: CHANGELOG updates --- CHANGELOG.md | 3 +++ 1 file changed, 3 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index c5ededd..680db42 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,8 @@ ## ASPEN development version +- Fix Diffatac error (#100, @kopardev): Adds a defensive check to prevent invalid 'row.names' length error when up_roi or down_roi are empty (due to strict FC/FDR thresholds in DiffATAC) +- Minor refactoring to accomodate moving to ccbr_tools v0.4+ + ## ASPEN 1.1.0 This version features a major overhaul of the pipeline with changes in the following areas: From 8c2c0ab0f09537b9552c66dcf201de84c083f7b5 Mon Sep 17 00:00:00 2001 From: kopardev Date: Wed, 21 May 2025 16:36:00 -0400 Subject: [PATCH 7/8] docs: update outputs.md with new log and metadata files: - Added dryrun_git_commit.txt and run_git_commit.txt to the output file list and table. - Documented snakemake.log.jobby and snakemake.log.jobby.short as additional log outputs. - Expanded description of logs folder to reflect storage of older runinfo.yaml and snakemake.stats files. - Removed obsolete stats folder reference. --- docs/outputs.md | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/docs/outputs.md b/docs/outputs.md index 1795070..c1db587 100644 --- a/docs/outputs.md +++ b/docs/outputs.md @@ -9,10 +9,12 @@ WORKDIR ├── cluster.json ├── config.yaml ├── contrasts.tsv +├── dryrun_git_commit.txt ├── dryrun.log ├── fastqs ├── logs ├── results +├── run_git_commit.txt ├── runinfo.yaml ├── runslurm_snakemake_report.html ├── sampleinfo.txt @@ -20,8 +22,9 @@ WORKDIR ├── scripts ├── slurm-XXXXXXX.out ├── snakemake.log +├── snakemake.log.jobby +├── snakemake.log.jobby.short ├── snakemake.stats -├── stats ├── submit_script.sbatch └── tools.yaml ``` @@ -36,17 +39,17 @@ Here are more details about these files: | `dryrun_git_commit.txt` | TXT | dryrun | The git commit hash of the version of ASPEN used at dryrun | | `dryrun.log` | TXT | dryrun | Log from `-m=dryrun` | | `fastqs` | FOLDER | dryrun | Folder containing symlinks to raw data | -| `logs` | FOLDER | dryrun | Folder containing all logs including Slurm `.out` and `.err` files | +| `logs` | FOLDER | dryrun | Folder containing all logs including Slurm `.out` and `.err` files. Also contains older timestamped `runinfo.yaml` and `snakemake.stats` files. | | `results` | FOLDER | Created at dryrun but populated during run | Main outputs folder | | `runinfo.yaml` | YAML | After completion of run | Metadata about the run executor, etc. | | `runslurm_snakemake_report.html` | HTML | After completion of run | HTML report including DAG and resource utilization | | `sampleinfo.txt` | TXT | dryrun, run | Tab-delimited mappings between `replicateNames` and `sampleNames` | | `samples.tsv` | TSV | init; can be edited later | Tab-delimited manifest with `replicateName`, `sampleName`, `path_to_R1_fastq`, `path_to_R2_fastq`. This file has a header. | | `scripts` | FOLDER | init | Folder keeps local copy of scripts called by various rules | +| `run_git_commit.txt` | TXT | run | The git commit hash of the version of ASPEN used at run | | `slurm-XXXXXXX.out` | TXT | run | Slurm `.out` file for the master job | | `snakemake.log` | TXT | run | Snakemake `.log` file for the master job; older copies timestamped and moved into `logs` folder | | `snakemake.stats` | JSON | run | per rule runtime stats | -| `stats` | FOLDER | Created at dryrun but populated during run | Contains older timestamped `runinfo.yaml` and `snakemake.stats` files | | `submit_script.sbatch` | TXT | run | Slurm script to kickstart the main Snakemake job | | `tools.yaml` | YAML | run | YAML containing the version of tools used in the pipeline (obsolete; was used to load specific module versions prior to moving over to Docker/Singularity containers) | From cbd1ac812c96ef8329f547039408ff3106320251 Mon Sep 17 00:00:00 2001 From: "Kelly Sovacool, PhD" Date: Wed, 21 May 2025 16:48:02 -0400 Subject: [PATCH 8/8] chore: Update CHANGELOG.md --- CHANGELOG.md | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 680db42..98c4891 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,7 +1,8 @@ ## ASPEN development version -- Fix Diffatac error (#100, @kopardev): Adds a defensive check to prevent invalid 'row.names' length error when up_roi or down_roi are empty (due to strict FC/FDR thresholds in DiffATAC) -- Minor refactoring to accomodate moving to ccbr_tools v0.4+ +- Fix Diffatac error (#101, @kopardev) + - Adds a defensive check to prevent invalid 'row.names' length error when up_roi or down_roi are empty (due to strict FC/FDR thresholds in DiffATAC) +- Minor refactoring to accomodate moving to ccbr_tools >= v0.4 (#101, @kopardev) ## ASPEN 1.1.0