Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
25 changes: 10 additions & 15 deletions aspen
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@kelly-sovacool .. I did not know about the envsubst command... this is cool!!

Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ EOF
##########################################################################################

# ## setting PIPELINE_HOME
PIPELINE_HOME=$(readlink -f $(dirname "$0"))
export PIPELINE_HOME=$(readlink -f $(dirname "$0"))

# set snakefile
SNAKEFILE="${PIPELINE_HOME}/workflow/Snakefile"
Expand All @@ -58,10 +58,10 @@ PYTHONVERSION="python/3.10"
SNAKEMAKEVERSION="snakemake"
#SINGULARITYVERSION="singularity/3.7.4"
SINGULARITYVERSION="singularity"
ASPENVERSION=$(head -n1 $VERSIONFILE|awk '{print $1}')
export ASPENVERSION=$(head -n1 $VERSIONFILE|awk '{print $1}')

# set defaults
GENOME="hg38"
export GENOME="hg38"
SUPPORTED_GENOMES="hg19 hg38 mm10 mmul10 bosTau9 hs1"

# essential files
Expand Down Expand Up @@ -193,9 +193,7 @@ mkdir -p $WORKDIR
f="${PIPELINE_HOME}/config/config.yaml"
echo "Copying essential file: $f"
fbn=$(basename $f)
sed -e "s/PIPELINE_HOME/${PIPELINE_HOME//\//\\/}/g" \
-e "s/WORKDIR/${WORKDIR//\//\\/}/g" \
-e "s/GENOME/${GENOME}/g" $f > $WORKDIR/$fbn
cat $f | envsubst '$PIPELINE_HOME $WORKDIR $GENOME $ASPENVERSION' > $WORKDIR/$fbn

for f in ${PIPELINE_HOME}/resources/cluster.json ${PIPELINE_HOME}/resources/tools.yaml
do
Expand All @@ -215,9 +213,7 @@ if [[ "$MANIFEST_SUPPLIED" == "false" ]];then
f=$MANIFEST
echo "Copying essential file: $f"
fbn=$(basename $f)
sed -e "s/PIPELINE_HOME/${PIPELINE_HOME//\//\\/}/g" \
-e "s/WORKDIR/${WORKDIR//\//\\/}/g" \
-e "s/GENOME/${GENOME}/g" $f > $WORKDIR/$fbn
cat $f | envsubst '$PIPELINE_HOME $WORKDIR $GENOME $ASPENVERSION' > $WORKDIR/$fbn
fi

# copy essential folders
Expand Down Expand Up @@ -296,10 +292,9 @@ function reconfig(){
# this is only for dev purposes when new key-value pairs are being added to the config file

check_essential_files
sed -e "s/PIPELINE_HOME/${PIPELINE_HOME//\//\\/}/g" \
-e "s/WORKDIR/${WORKDIR//\//\\/}/g" \
-e "s/GENOME/${GENOME}/g" \
${PIPELINE_HOME}/config/config.yaml > $WORKDIR/config.yaml
cat ${PIPELINE_HOME}/config/config.yaml |\
envsubst '$PIPELINE_HOME $WORKDIR $GENOME $ASPENVERSION' \
> $WORKDIR/config.yaml
echo "$WORKDIR/config.yaml has been updated!"

}
Expand Down Expand Up @@ -716,7 +711,7 @@ function main(){
if [ ! -f $MANIFEST ];then err "File $MANIFEST does NOT exist!";fi
;;
-g=*|--genome=*)
GENOME="${i#*=}"
export GENOME="${i#*=}"
found=0
for g in $SUPPORTED_GENOMES;do
if [[ "$GENOME" == "$g" ]];then
Expand All @@ -739,7 +734,7 @@ function main(){
;;
esac
done
WORKDIR=$(readlink -f "$WORKDIR")
export WORKDIR=$(readlink -f "$WORKDIR")
MANIFEST_SUPPLIED="true"
# if manifest is empty ... aka not supplied at cli
if [[ -z $MANIFEST ]];then
Expand Down
102 changes: 53 additions & 49 deletions config/config.yaml
Original file line number Diff line number Diff line change
@@ -1,20 +1,20 @@
## you probably need to change or comment or uncomment some of these
#
# The working dir... output will be in the results subfolder of the workdir
workdir: "WORKDIR"
workdir: "$WORKDIR"

# tools scriptsdir resourcesdir
# to use the workdir version of tools.yaml comment out the following line
tools: "PIPELINE_HOME/resources/tools.yaml"
tools: "$PIPELINE_HOME/resources/tools.yaml"

# to use the workdir version of cluster.json comment out the following line
# clusterjson: "PIPELINE_HOME/resources/cluster.json"
# clusterjson: "$PIPELINE_HOME/resources/cluster.json"

# to use the workdir version of scriptsdir comment out the following line
# scriptsdir: "PIPELINE_HOME/workflow/scripts"
scriptsdir: "WORKDIR/scripts"
# scriptsdir: "$PIPELINE_HOME/workflow/scripts"
scriptsdir: "$WORKDIR/scripts"

resourcesdir: "PIPELINE_HOME/resources"
resourcesdir: "$PIPELINE_HOME/resources"

# tab delimited samples file ... should have the following 4 columns
#
Expand All @@ -23,10 +23,10 @@ resourcesdir: "PIPELINE_HOME/resources"
# multiple replicates may belong to the same sample
# PE data is required!
#
samplemanifest: "WORKDIR/samples.tsv"
samplemanifest: "$WORKDIR/samples.tsv"

# uncomment the genome of interest
genome: "GENOME"
genome: "$GENOME"
# genome: "hg38"
# genome: "hg19"
# genome: "mm10"
Expand All @@ -43,79 +43,79 @@ multimapping: 4
fixed_width: 500

# contrasts info
contrasts: "WORKDIR/contrasts.tsv"
contrasts: "$WORKDIR/contrasts.tsv"
contrasts_fc_cutoff: 2
contrasts_fdr_cutoff: 0.05

# genome specific resource locations on biowulf

hs1:
indexdir: "/data/CCBR_Pipeliner/db/PipeDB/Indices/hs1/indexes"
blacklistFa: "PIPELINE_HOME/resources/blacklistFa/hs1.blacklist.fa.gz"
tssBed: "PIPELINE_HOME/resources/tssBed/hs1_tssbeds.tar.gz"
homermotif: "PIPELINE_HOME/resources/motif/HOCOMOCOv11_full_HUMAN_mono_homer_format_0.001.motif"
mememotif: "PIPELINE_HOME/resources/motif/HOCOMOCOv11_core_HUMAN_mono_meme_format.tar.gz"
blacklistFa: "$PIPELINE_HOME/resources/blacklistFa/hs1.blacklist.fa.gz"
tssBed: "$PIPELINE_HOME/resources/tssBed/hs1_tssbeds.tar.gz"
homermotif: "$PIPELINE_HOME/resources/motif/HOCOMOCOv11_full_HUMAN_mono_homer_format_0.001.motif"
mememotif: "$PIPELINE_HOME/resources/motif/HOCOMOCOv11_core_HUMAN_mono_meme_format.tar.gz"
effectiveGenomeSize: 3000000000
fripextra:
dhsbed: "PIPELINE_HOME/resources/frip/hs1.DHS.bed.gz"
promoterbed: "PIPELINE_HOME/resources/frip/hs1.promoters.bed.gz"
enhancerbed: "PIPELINE_HOME/resources/frip/hs1.enhancers.bed.gz"
dhsbed: "$PIPELINE_HOME/resources/frip/hs1.DHS.bed.gz"
promoterbed: "$PIPELINE_HOME/resources/frip/hs1.promoters.bed.gz"
enhancerbed: "$PIPELINE_HOME/resources/frip/hs1.enhancers.bed.gz"

hg38:
indexdir: "/data/CCBR_Pipeliner/db/PipeDB/Indices/hg38_basic/indexes"
blacklistFa: "PIPELINE_HOME/resources/blacklistFa/hg38.blacklist.fa.gz"
tssBed: "PIPELINE_HOME/resources/tssBed/hg38_tssbeds.tar.gz"
homermotif: "PIPELINE_HOME/resources/motif/HOCOMOCOv11_full_HUMAN_mono_homer_format_0.001.motif"
mememotif: "PIPELINE_HOME/resources/motif/HOCOMOCOv11_core_HUMAN_mono_meme_format.tar.gz"
blacklistFa: "$PIPELINE_HOME/resources/blacklistFa/hg38.blacklist.fa.gz"
tssBed: "$PIPELINE_HOME/resources/tssBed/hg38_tssbeds.tar.gz"
homermotif: "$PIPELINE_HOME/resources/motif/HOCOMOCOv11_full_HUMAN_mono_homer_format_0.001.motif"
mememotif: "$PIPELINE_HOME/resources/motif/HOCOMOCOv11_core_HUMAN_mono_meme_format.tar.gz"
effectiveGenomeSize: 2700000000
fripextra:
dhsbed: "PIPELINE_HOME/resources/frip/hg38.DHS.bed.gz"
promoterbed: "PIPELINE_HOME/resources/frip/hg38.promoters.bed.gz"
enhancerbed: "PIPELINE_HOME/resources/frip/hg38.enhancers.bed.gz"
dhsbed: "$PIPELINE_HOME/resources/frip/hg38.DHS.bed.gz"
promoterbed: "$PIPELINE_HOME/resources/frip/hg38.promoters.bed.gz"
enhancerbed: "$PIPELINE_HOME/resources/frip/hg38.enhancers.bed.gz"

hg19:
indexdir: "/data/CCBR_Pipeliner/db/PipeDB/Indices/hg19_basic/indexes"
blacklistFa: "PIPELINE_HOME/resources/blacklistFa/hg19.blacklist.fa.gz"
tssBed: "PIPELINE_HOME/resources/tssBed/hg19_tssbeds.tar.gz"
homermotif: "PIPELINE_HOME/resources/motif/HOCOMOCOv11_full_HUMAN_mono_homer_format_0.001.motif"
mememotif: "PIPELINE_HOME/resources/motif/HOCOMOCOv11_core_HUMAN_mono_meme_format.tar.gz"
blacklistFa: "$PIPELINE_HOME/resources/blacklistFa/hg19.blacklist.fa.gz"
tssBed: "$PIPELINE_HOME/resources/tssBed/hg19_tssbeds.tar.gz"
homermotif: "$PIPELINE_HOME/resources/motif/HOCOMOCOv11_full_HUMAN_mono_homer_format_0.001.motif"
mememotif: "$PIPELINE_HOME/resources/motif/HOCOMOCOv11_core_HUMAN_mono_meme_format.tar.gz"
effectiveGenomeSize: 2700000000
fripextra:
dhsbed: "PIPELINE_HOME/resources/frip/hg19.DHS.bed.gz"
promoterbed: "PIPELINE_HOME/resources/frip/hg19.promoters.bed.gz"
enhancerbed: "PIPELINE_HOME/resources/frip/hg19.enhancers.bed.gz"
dhsbed: "$PIPELINE_HOME/resources/frip/hg19.DHS.bed.gz"
promoterbed: "$PIPELINE_HOME/resources/frip/hg19.promoters.bed.gz"
enhancerbed: "$PIPELINE_HOME/resources/frip/hg19.enhancers.bed.gz"

mm10:
indexdir: "/data/CCBR_Pipeliner/db/PipeDB/Indices/mm10_basic/indexes"
blacklistFa: "PIPELINE_HOME/resources/blacklistFa/mm10.blacklist.fa.gz"
tssBed: "PIPELINE_HOME/resources/tssBed/mm10_tssbeds.tar.gz"
homermotif: "PIPELINE_HOME/resources/motif/HOCOMOCOv11_full_MOUSE_mono_homer_format_0.001.motif"
mememotif: "PIPELINE_HOME/resources/motif/HOCOMOCOv11_core_MOUSE_mono_meme_format.tar.gz"
blacklistFa: "$PIPELINE_HOME/resources/blacklistFa/mm10.blacklist.fa.gz"
tssBed: "$PIPELINE_HOME/resources/tssBed/mm10_tssbeds.tar.gz"
homermotif: "$PIPELINE_HOME/resources/motif/HOCOMOCOv11_full_MOUSE_mono_homer_format_0.001.motif"
mememotif: "$PIPELINE_HOME/resources/motif/HOCOMOCOv11_core_MOUSE_mono_meme_format.tar.gz"
effectiveGenomeSize: 1870000000
fripextra:
dhsbed: "PIPELINE_HOME/resources/frip/mm10.DHS.bed.gz"
promoterbed: "PIPELINE_HOME/resources/frip/mm10.promoters.bed.gz"
enhancerbed: "PIPELINE_HOME/resources/frip/mm10.enhancers.bed.gz"
dhsbed: "$PIPELINE_HOME/resources/frip/mm10.DHS.bed.gz"
promoterbed: "$PIPELINE_HOME/resources/frip/mm10.promoters.bed.gz"
enhancerbed: "$PIPELINE_HOME/resources/frip/mm10.enhancers.bed.gz"

mmul10:
indexdir: "/data/CCBR_Pipeliner/db/PipeDB/Indices/mmul10"
blacklistFa: "PIPELINE_HOME/resources/blacklistFa/mmul10.blacklist.fa.gz"
tssBed: "PIPELINE_HOME/resources/tssBed/mmul10_v108_tssbeds.tar.gz"
homermotif: "PIPELINE_HOME/resources/motif/HOCOMOCOv11_full_HUMAN_MOUSE_mono_homer_format_0.001.motif"
mememotif: "PIPELINE_HOME/resources/motif/HOCOMOCOv11_core_HUMAN_MOUSE_mono_meme_format.tar.gz"
blacklistFa: "$PIPELINE_HOME/resources/blacklistFa/mmul10.blacklist.fa.gz"
tssBed: "$PIPELINE_HOME/resources/tssBed/mmul10_v108_tssbeds.tar.gz"
homermotif: "$PIPELINE_HOME/resources/motif/HOCOMOCOv11_full_HUMAN_MOUSE_mono_homer_format_0.001.motif"
mememotif: "$PIPELINE_HOME/resources/motif/HOCOMOCOv11_core_HUMAN_MOUSE_mono_meme_format.tar.gz"
effectiveGenomeSize: 2000000000
fripextra:
promoterbed: "PIPELINE_HOME/resources/frip/mmul10.promoters.bed.gz"
promoterbed: "$PIPELINE_HOME/resources/frip/mmul10.promoters.bed.gz"

bosTau9:
indexdir: "/data/CCBR_Pipeliner/db/PipeDB/Indices/bosTau9"
blacklistFa: "PIPELINE_HOME/resources/blacklistFa/bosTau9.blacklist.fa.gz"
tssBed: "PIPELINE_HOME/resources/tssBed/bosTau9_v108_tssbeds.tar.gz"
homermotif: "PIPELINE_HOME/resources/motif/HOCOMOCOv11_full_HUMAN_MOUSE_mono_homer_format_0.001.motif"
mememotif: "PIPELINE_HOME/resources/motif/HOCOMOCOv11_core_HUMAN_MOUSE_mono_meme_format.tar.gz"
blacklistFa: "$PIPELINE_HOME/resources/blacklistFa/bosTau9.blacklist.fa.gz"
tssBed: "$PIPELINE_HOME/resources/tssBed/bosTau9_v108_tssbeds.tar.gz"
homermotif: "$PIPELINE_HOME/resources/motif/HOCOMOCOv11_full_HUMAN_MOUSE_mono_homer_format_0.001.motif"
mememotif: "$PIPELINE_HOME/resources/motif/HOCOMOCOv11_core_HUMAN_MOUSE_mono_meme_format.tar.gz"
effectiveGenomeSize: 2000000000
fripextra:
promoterbed: "PIPELINE_HOME/resources/frip/bosTau9.promoters.bed.gz"
promoterbed: "$PIPELINE_HOME/resources/frip/bosTau9.promoters.bed.gz"

# MACS2 arguments/parameters for peak calling
# annotatePeaks: True ensures annotation of called peaks using ChIPSeeker
Expand Down Expand Up @@ -154,13 +154,13 @@ roi_min_spm: 2
# report customized using multiqc_atacseq_config.yaml
multiqc:
extraparams: ""
configfile: "PIPELINE_HOME/config/multiqc_atacseq_config.yaml"
configfile: "$PIPELINE_HOME/config/multiqc_atacseq_config.yaml"

# set min peaks to use for jaccard calculations
jaccard_min_peaks: 1000

# fastq screen configuration file
fastqscreen_config: "PIPELINE_HOME/config/fastq_screen_config.txt"
fastqscreen_config: "$PIPELINE_HOME/config/fastq_screen_config.txt"

# URLs for containers
# masterdocker: "docker://nciccbr/ccbr_atacseq:v0.1.29"
Expand All @@ -170,3 +170,7 @@ fastqscreendocker: "docker://nciccbr/ccbr_fastq_screen_0.14.1:v1.0"
# featurecounts: "docker://genomicpariscentre/featurecounts:1.5.3"
featurecountsdocker: "docker://dsaha0295/featurecounts:latest"
baser: "docker://nciccbr/ccbr_baser:230531"

# pipeline information
pipeline: "ASPEN"
version: "$ASPENVERSION"
31 changes: 14 additions & 17 deletions workflow/Snakefile
Original file line number Diff line number Diff line change
Expand Up @@ -53,38 +53,35 @@ rule all:
expand(join(RESULTSDIR,"peaks","genrich","{sample}.consensus.genrich.peakfiles"),sample=SAMPLES),
expand(join(RESULTSDIR,"peaks","genrich","{sample}.replicate.genrich.peakfiles"),sample=SAMPLES),
expand(join(RESULTSDIR,"peaks","genrich","{sample}.genrich.tn5nicksbedfiles"),sample=SAMPLES),
# roi gtf
# roi gtf
expand(join(RESULTSDIR,"peaks","{peakcaller}","fixed_width","ROI.gtf"),peakcaller=PEAKCALLERS),
# counts matrix
expand(join(RESULTSDIR,"peaks","{peakcaller}","ROI.counts.tsv"),peakcaller=PEAKCALLERS),
# diffatac
expand(join(RESULTSDIR, "peaks", "{peakcaller}", "DiffATAC", "all_diff_atacs.tsv"),peakcaller=PEAKCALLERS) if CONTRASTS.shape[0] > 0 else [],


# create jobby tables
jobby_cmd = 'run_jobby_on_snakemake_log snakemake.log | tee logs/snakemake.log.jobby | cut -f2,3,18 > logs/snakemake.log.jobby.short'
spook_cmd = f'spooker {WORKDIR} ASPEN'
on_complete = f"""
for cmd in spooker run_jobby_on_snakemake_log; do
if ! command -v $cmd 2>&1 >/dev/null; then
export PATH="$PATH:{config['ccbr_tools_path']}"
fi
done
run_jobby_on_snakemake_log logs/snakemake.log | tee logs/snakemake.log.jobby | cut -f2,3,18 > logs/snakemake.log.jobby.short
spooker {WORKDIR} {config['pipeline']} {config['version']}
"""

onsuccess:
#subprocess.run(shlex.split(jobby_cmd),capture_output=False,shell=False,text=True)
print("OnSuccess")
shell("printenv")
shell("module list")
print(jobby_cmd)
print("The above command may fail if run_jobby_on_snakemake_log is not in PATH!")
shell(jobby_cmd)
print(spook_cmd)
shell(spook_cmd)
print("The above command may fail if spooker is not in PATH or not running on BIOWULF/FRCE!")
print(on_complete)
shell(on_complete)

onerror:
#subprocess.run(shlex.split(jobby_cmd),capture_output=False,shell=False,text=True)
print("OnError")
shell("printenv")
shell("module list")
print(jobby_cmd)
shell(jobby_cmd)
print("The above command may fail if run_jobby_on_snakemake_log is not in PATH!")
print(spook_cmd)
shell(spook_cmd)
print("The above command may fail if spooker is not in PATH or not running on BIOWULF/FRCE!")
print(on_complete)
shell(on_complete)