diff --git a/aspen b/aspen index c767af2..64ae457 100755 --- a/aspen +++ b/aspen @@ -40,7 +40,7 @@ EOF ########################################################################################## # ## setting PIPELINE_HOME -PIPELINE_HOME=$(readlink -f $(dirname "$0")) +export PIPELINE_HOME=$(readlink -f $(dirname "$0")) # set snakefile SNAKEFILE="${PIPELINE_HOME}/workflow/Snakefile" @@ -58,10 +58,10 @@ PYTHONVERSION="python/3.10" SNAKEMAKEVERSION="snakemake" #SINGULARITYVERSION="singularity/3.7.4" SINGULARITYVERSION="singularity" -ASPENVERSION=$(head -n1 $VERSIONFILE|awk '{print $1}') +export ASPENVERSION=$(head -n1 $VERSIONFILE|awk '{print $1}') # set defaults -GENOME="hg38" +export GENOME="hg38" SUPPORTED_GENOMES="hg19 hg38 mm10 mmul10 bosTau9 hs1" # essential files @@ -193,9 +193,7 @@ mkdir -p $WORKDIR f="${PIPELINE_HOME}/config/config.yaml" echo "Copying essential file: $f" fbn=$(basename $f) -sed -e "s/PIPELINE_HOME/${PIPELINE_HOME//\//\\/}/g" \ - -e "s/WORKDIR/${WORKDIR//\//\\/}/g" \ - -e "s/GENOME/${GENOME}/g" $f > $WORKDIR/$fbn +cat $f | envsubst '$PIPELINE_HOME $WORKDIR $GENOME $ASPENVERSION' > $WORKDIR/$fbn for f in ${PIPELINE_HOME}/resources/cluster.json ${PIPELINE_HOME}/resources/tools.yaml do @@ -215,9 +213,7 @@ if [[ "$MANIFEST_SUPPLIED" == "false" ]];then f=$MANIFEST echo "Copying essential file: $f" fbn=$(basename $f) -sed -e "s/PIPELINE_HOME/${PIPELINE_HOME//\//\\/}/g" \ - -e "s/WORKDIR/${WORKDIR//\//\\/}/g" \ - -e "s/GENOME/${GENOME}/g" $f > $WORKDIR/$fbn +cat $f | envsubst '$PIPELINE_HOME $WORKDIR $GENOME $ASPENVERSION' > $WORKDIR/$fbn fi # copy essential folders @@ -296,10 +292,9 @@ function reconfig(){ # this is only for dev purposes when new key-value pairs are being added to the config file check_essential_files - sed -e "s/PIPELINE_HOME/${PIPELINE_HOME//\//\\/}/g" \ - -e "s/WORKDIR/${WORKDIR//\//\\/}/g" \ - -e "s/GENOME/${GENOME}/g" \ - ${PIPELINE_HOME}/config/config.yaml > $WORKDIR/config.yaml + cat ${PIPELINE_HOME}/config/config.yaml |\ + envsubst '$PIPELINE_HOME $WORKDIR $GENOME $ASPENVERSION' \ + > $WORKDIR/config.yaml echo "$WORKDIR/config.yaml has been updated!" } @@ -716,7 +711,7 @@ function main(){ if [ ! -f $MANIFEST ];then err "File $MANIFEST does NOT exist!";fi ;; -g=*|--genome=*) - GENOME="${i#*=}" + export GENOME="${i#*=}" found=0 for g in $SUPPORTED_GENOMES;do if [[ "$GENOME" == "$g" ]];then @@ -739,7 +734,7 @@ function main(){ ;; esac done - WORKDIR=$(readlink -f "$WORKDIR") + export WORKDIR=$(readlink -f "$WORKDIR") MANIFEST_SUPPLIED="true" # if manifest is empty ... aka not supplied at cli if [[ -z $MANIFEST ]];then diff --git a/config/config.yaml b/config/config.yaml index 153c46f..9909372 100644 --- a/config/config.yaml +++ b/config/config.yaml @@ -1,20 +1,20 @@ ## you probably need to change or comment or uncomment some of these # # The working dir... output will be in the results subfolder of the workdir -workdir: "WORKDIR" +workdir: "$WORKDIR" # tools scriptsdir resourcesdir # to use the workdir version of tools.yaml comment out the following line -tools: "PIPELINE_HOME/resources/tools.yaml" +tools: "$PIPELINE_HOME/resources/tools.yaml" # to use the workdir version of cluster.json comment out the following line -# clusterjson: "PIPELINE_HOME/resources/cluster.json" +# clusterjson: "$PIPELINE_HOME/resources/cluster.json" # to use the workdir version of scriptsdir comment out the following line -# scriptsdir: "PIPELINE_HOME/workflow/scripts" -scriptsdir: "WORKDIR/scripts" +# scriptsdir: "$PIPELINE_HOME/workflow/scripts" +scriptsdir: "$WORKDIR/scripts" -resourcesdir: "PIPELINE_HOME/resources" +resourcesdir: "$PIPELINE_HOME/resources" # tab delimited samples file ... should have the following 4 columns # @@ -23,10 +23,10 @@ resourcesdir: "PIPELINE_HOME/resources" # multiple replicates may belong to the same sample # PE data is required! # -samplemanifest: "WORKDIR/samples.tsv" +samplemanifest: "$WORKDIR/samples.tsv" # uncomment the genome of interest -genome: "GENOME" +genome: "$GENOME" # genome: "hg38" # genome: "hg19" # genome: "mm10" @@ -43,7 +43,7 @@ multimapping: 4 fixed_width: 500 # contrasts info -contrasts: "WORKDIR/contrasts.tsv" +contrasts: "$WORKDIR/contrasts.tsv" contrasts_fc_cutoff: 2 contrasts_fdr_cutoff: 0.05 @@ -51,71 +51,71 @@ contrasts_fdr_cutoff: 0.05 hs1: indexdir: "/data/CCBR_Pipeliner/db/PipeDB/Indices/hs1/indexes" - blacklistFa: "PIPELINE_HOME/resources/blacklistFa/hs1.blacklist.fa.gz" - tssBed: "PIPELINE_HOME/resources/tssBed/hs1_tssbeds.tar.gz" - homermotif: "PIPELINE_HOME/resources/motif/HOCOMOCOv11_full_HUMAN_mono_homer_format_0.001.motif" - mememotif: "PIPELINE_HOME/resources/motif/HOCOMOCOv11_core_HUMAN_mono_meme_format.tar.gz" + blacklistFa: "$PIPELINE_HOME/resources/blacklistFa/hs1.blacklist.fa.gz" + tssBed: "$PIPELINE_HOME/resources/tssBed/hs1_tssbeds.tar.gz" + homermotif: "$PIPELINE_HOME/resources/motif/HOCOMOCOv11_full_HUMAN_mono_homer_format_0.001.motif" + mememotif: "$PIPELINE_HOME/resources/motif/HOCOMOCOv11_core_HUMAN_mono_meme_format.tar.gz" effectiveGenomeSize: 3000000000 fripextra: - dhsbed: "PIPELINE_HOME/resources/frip/hs1.DHS.bed.gz" - promoterbed: "PIPELINE_HOME/resources/frip/hs1.promoters.bed.gz" - enhancerbed: "PIPELINE_HOME/resources/frip/hs1.enhancers.bed.gz" + dhsbed: "$PIPELINE_HOME/resources/frip/hs1.DHS.bed.gz" + promoterbed: "$PIPELINE_HOME/resources/frip/hs1.promoters.bed.gz" + enhancerbed: "$PIPELINE_HOME/resources/frip/hs1.enhancers.bed.gz" hg38: indexdir: "/data/CCBR_Pipeliner/db/PipeDB/Indices/hg38_basic/indexes" - blacklistFa: "PIPELINE_HOME/resources/blacklistFa/hg38.blacklist.fa.gz" - tssBed: "PIPELINE_HOME/resources/tssBed/hg38_tssbeds.tar.gz" - homermotif: "PIPELINE_HOME/resources/motif/HOCOMOCOv11_full_HUMAN_mono_homer_format_0.001.motif" - mememotif: "PIPELINE_HOME/resources/motif/HOCOMOCOv11_core_HUMAN_mono_meme_format.tar.gz" + blacklistFa: "$PIPELINE_HOME/resources/blacklistFa/hg38.blacklist.fa.gz" + tssBed: "$PIPELINE_HOME/resources/tssBed/hg38_tssbeds.tar.gz" + homermotif: "$PIPELINE_HOME/resources/motif/HOCOMOCOv11_full_HUMAN_mono_homer_format_0.001.motif" + mememotif: "$PIPELINE_HOME/resources/motif/HOCOMOCOv11_core_HUMAN_mono_meme_format.tar.gz" effectiveGenomeSize: 2700000000 fripextra: - dhsbed: "PIPELINE_HOME/resources/frip/hg38.DHS.bed.gz" - promoterbed: "PIPELINE_HOME/resources/frip/hg38.promoters.bed.gz" - enhancerbed: "PIPELINE_HOME/resources/frip/hg38.enhancers.bed.gz" + dhsbed: "$PIPELINE_HOME/resources/frip/hg38.DHS.bed.gz" + promoterbed: "$PIPELINE_HOME/resources/frip/hg38.promoters.bed.gz" + enhancerbed: "$PIPELINE_HOME/resources/frip/hg38.enhancers.bed.gz" hg19: indexdir: "/data/CCBR_Pipeliner/db/PipeDB/Indices/hg19_basic/indexes" - blacklistFa: "PIPELINE_HOME/resources/blacklistFa/hg19.blacklist.fa.gz" - tssBed: "PIPELINE_HOME/resources/tssBed/hg19_tssbeds.tar.gz" - homermotif: "PIPELINE_HOME/resources/motif/HOCOMOCOv11_full_HUMAN_mono_homer_format_0.001.motif" - mememotif: "PIPELINE_HOME/resources/motif/HOCOMOCOv11_core_HUMAN_mono_meme_format.tar.gz" + blacklistFa: "$PIPELINE_HOME/resources/blacklistFa/hg19.blacklist.fa.gz" + tssBed: "$PIPELINE_HOME/resources/tssBed/hg19_tssbeds.tar.gz" + homermotif: "$PIPELINE_HOME/resources/motif/HOCOMOCOv11_full_HUMAN_mono_homer_format_0.001.motif" + mememotif: "$PIPELINE_HOME/resources/motif/HOCOMOCOv11_core_HUMAN_mono_meme_format.tar.gz" effectiveGenomeSize: 2700000000 fripextra: - dhsbed: "PIPELINE_HOME/resources/frip/hg19.DHS.bed.gz" - promoterbed: "PIPELINE_HOME/resources/frip/hg19.promoters.bed.gz" - enhancerbed: "PIPELINE_HOME/resources/frip/hg19.enhancers.bed.gz" + dhsbed: "$PIPELINE_HOME/resources/frip/hg19.DHS.bed.gz" + promoterbed: "$PIPELINE_HOME/resources/frip/hg19.promoters.bed.gz" + enhancerbed: "$PIPELINE_HOME/resources/frip/hg19.enhancers.bed.gz" mm10: indexdir: "/data/CCBR_Pipeliner/db/PipeDB/Indices/mm10_basic/indexes" - blacklistFa: "PIPELINE_HOME/resources/blacklistFa/mm10.blacklist.fa.gz" - tssBed: "PIPELINE_HOME/resources/tssBed/mm10_tssbeds.tar.gz" - homermotif: "PIPELINE_HOME/resources/motif/HOCOMOCOv11_full_MOUSE_mono_homer_format_0.001.motif" - mememotif: "PIPELINE_HOME/resources/motif/HOCOMOCOv11_core_MOUSE_mono_meme_format.tar.gz" + blacklistFa: "$PIPELINE_HOME/resources/blacklistFa/mm10.blacklist.fa.gz" + tssBed: "$PIPELINE_HOME/resources/tssBed/mm10_tssbeds.tar.gz" + homermotif: "$PIPELINE_HOME/resources/motif/HOCOMOCOv11_full_MOUSE_mono_homer_format_0.001.motif" + mememotif: "$PIPELINE_HOME/resources/motif/HOCOMOCOv11_core_MOUSE_mono_meme_format.tar.gz" effectiveGenomeSize: 1870000000 fripextra: - dhsbed: "PIPELINE_HOME/resources/frip/mm10.DHS.bed.gz" - promoterbed: "PIPELINE_HOME/resources/frip/mm10.promoters.bed.gz" - enhancerbed: "PIPELINE_HOME/resources/frip/mm10.enhancers.bed.gz" + dhsbed: "$PIPELINE_HOME/resources/frip/mm10.DHS.bed.gz" + promoterbed: "$PIPELINE_HOME/resources/frip/mm10.promoters.bed.gz" + enhancerbed: "$PIPELINE_HOME/resources/frip/mm10.enhancers.bed.gz" mmul10: indexdir: "/data/CCBR_Pipeliner/db/PipeDB/Indices/mmul10" - blacklistFa: "PIPELINE_HOME/resources/blacklistFa/mmul10.blacklist.fa.gz" - tssBed: "PIPELINE_HOME/resources/tssBed/mmul10_v108_tssbeds.tar.gz" - homermotif: "PIPELINE_HOME/resources/motif/HOCOMOCOv11_full_HUMAN_MOUSE_mono_homer_format_0.001.motif" - mememotif: "PIPELINE_HOME/resources/motif/HOCOMOCOv11_core_HUMAN_MOUSE_mono_meme_format.tar.gz" + blacklistFa: "$PIPELINE_HOME/resources/blacklistFa/mmul10.blacklist.fa.gz" + tssBed: "$PIPELINE_HOME/resources/tssBed/mmul10_v108_tssbeds.tar.gz" + homermotif: "$PIPELINE_HOME/resources/motif/HOCOMOCOv11_full_HUMAN_MOUSE_mono_homer_format_0.001.motif" + mememotif: "$PIPELINE_HOME/resources/motif/HOCOMOCOv11_core_HUMAN_MOUSE_mono_meme_format.tar.gz" effectiveGenomeSize: 2000000000 fripextra: - promoterbed: "PIPELINE_HOME/resources/frip/mmul10.promoters.bed.gz" + promoterbed: "$PIPELINE_HOME/resources/frip/mmul10.promoters.bed.gz" bosTau9: indexdir: "/data/CCBR_Pipeliner/db/PipeDB/Indices/bosTau9" - blacklistFa: "PIPELINE_HOME/resources/blacklistFa/bosTau9.blacklist.fa.gz" - tssBed: "PIPELINE_HOME/resources/tssBed/bosTau9_v108_tssbeds.tar.gz" - homermotif: "PIPELINE_HOME/resources/motif/HOCOMOCOv11_full_HUMAN_MOUSE_mono_homer_format_0.001.motif" - mememotif: "PIPELINE_HOME/resources/motif/HOCOMOCOv11_core_HUMAN_MOUSE_mono_meme_format.tar.gz" + blacklistFa: "$PIPELINE_HOME/resources/blacklistFa/bosTau9.blacklist.fa.gz" + tssBed: "$PIPELINE_HOME/resources/tssBed/bosTau9_v108_tssbeds.tar.gz" + homermotif: "$PIPELINE_HOME/resources/motif/HOCOMOCOv11_full_HUMAN_MOUSE_mono_homer_format_0.001.motif" + mememotif: "$PIPELINE_HOME/resources/motif/HOCOMOCOv11_core_HUMAN_MOUSE_mono_meme_format.tar.gz" effectiveGenomeSize: 2000000000 fripextra: - promoterbed: "PIPELINE_HOME/resources/frip/bosTau9.promoters.bed.gz" + promoterbed: "$PIPELINE_HOME/resources/frip/bosTau9.promoters.bed.gz" # MACS2 arguments/parameters for peak calling # annotatePeaks: True ensures annotation of called peaks using ChIPSeeker @@ -154,13 +154,13 @@ roi_min_spm: 2 # report customized using multiqc_atacseq_config.yaml multiqc: extraparams: "" - configfile: "PIPELINE_HOME/config/multiqc_atacseq_config.yaml" + configfile: "$PIPELINE_HOME/config/multiqc_atacseq_config.yaml" # set min peaks to use for jaccard calculations jaccard_min_peaks: 1000 # fastq screen configuration file -fastqscreen_config: "PIPELINE_HOME/config/fastq_screen_config.txt" +fastqscreen_config: "$PIPELINE_HOME/config/fastq_screen_config.txt" # URLs for containers # masterdocker: "docker://nciccbr/ccbr_atacseq:v0.1.29" @@ -170,3 +170,7 @@ fastqscreendocker: "docker://nciccbr/ccbr_fastq_screen_0.14.1:v1.0" # featurecounts: "docker://genomicpariscentre/featurecounts:1.5.3" featurecountsdocker: "docker://dsaha0295/featurecounts:latest" baser: "docker://nciccbr/ccbr_baser:230531" + +# pipeline information +pipeline: "ASPEN" +version: "$ASPENVERSION" diff --git a/workflow/Snakefile b/workflow/Snakefile index 513c169..d96d546 100644 --- a/workflow/Snakefile +++ b/workflow/Snakefile @@ -53,38 +53,35 @@ rule all: expand(join(RESULTSDIR,"peaks","genrich","{sample}.consensus.genrich.peakfiles"),sample=SAMPLES), expand(join(RESULTSDIR,"peaks","genrich","{sample}.replicate.genrich.peakfiles"),sample=SAMPLES), expand(join(RESULTSDIR,"peaks","genrich","{sample}.genrich.tn5nicksbedfiles"),sample=SAMPLES), - # roi gtf + # roi gtf expand(join(RESULTSDIR,"peaks","{peakcaller}","fixed_width","ROI.gtf"),peakcaller=PEAKCALLERS), # counts matrix expand(join(RESULTSDIR,"peaks","{peakcaller}","ROI.counts.tsv"),peakcaller=PEAKCALLERS), # diffatac expand(join(RESULTSDIR, "peaks", "{peakcaller}", "DiffATAC", "all_diff_atacs.tsv"),peakcaller=PEAKCALLERS) if CONTRASTS.shape[0] > 0 else [], - -# create jobby tables -jobby_cmd = 'run_jobby_on_snakemake_log snakemake.log | tee logs/snakemake.log.jobby | cut -f2,3,18 > logs/snakemake.log.jobby.short' -spook_cmd = f'spooker {WORKDIR} ASPEN' +on_complete = f""" +for cmd in spooker run_jobby_on_snakemake_log; do + if ! command -v $cmd 2>&1 >/dev/null; then + export PATH="$PATH:{config['ccbr_tools_path']}" + fi +done +run_jobby_on_snakemake_log logs/snakemake.log | tee logs/snakemake.log.jobby | cut -f2,3,18 > logs/snakemake.log.jobby.short +spooker {WORKDIR} {config['pipeline']} {config['version']} +""" onsuccess: #subprocess.run(shlex.split(jobby_cmd),capture_output=False,shell=False,text=True) print("OnSuccess") shell("printenv") shell("module list") - print(jobby_cmd) - print("The above command may fail if run_jobby_on_snakemake_log is not in PATH!") - shell(jobby_cmd) - print(spook_cmd) - shell(spook_cmd) - print("The above command may fail if spooker is not in PATH or not running on BIOWULF/FRCE!") + print(on_complete) + shell(on_complete) onerror: #subprocess.run(shlex.split(jobby_cmd),capture_output=False,shell=False,text=True) print("OnError") shell("printenv") shell("module list") - print(jobby_cmd) - shell(jobby_cmd) - print("The above command may fail if run_jobby_on_snakemake_log is not in PATH!") - print(spook_cmd) - shell(spook_cmd) - print("The above command may fail if spooker is not in PATH or not running on BIOWULF/FRCE!") + print(on_complete) + shell(on_complete)