-
Notifications
You must be signed in to change notification settings - Fork 2
Expand file tree
/
Copy pathchip2quality
More file actions
executable file
·112 lines (99 loc) · 4.01 KB
/
chip2quality
File metadata and controls
executable file
·112 lines (99 loc) · 4.01 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
#!/bin/bash
#PBS -l nodes=1:ppn=4
## initialize variables with default values
PROGDIR="/home/pundhir/software/idrCode"
IDR_THRESHOLD="0.01"
PROCESSOR=1
CONFIG_FILTER="qual"
#### usage ####
usage() {
echo Program: "chip2quality (perform quality control on ChIP-seq data)"
echo Author: BRIC, University of Copenhagen, Denmark
echo Version: 1.0
echo Contact: pundhir@binf.ku.dk
echo "Usage: chip2quality -i <file> -o <dir> [OPTIONS]"
echo "Options:"
echo " -i <file> [mapped ChIP files in BAM format]"
echo " [if multiple separate them by a comma]"
echo " **OR**"
echo " [input configuration file containing bam file information]"
echo " [<id> <bam file> (id should start with qual)]"
echo " -o <dir> [output directory]"
echo "[OPTIONS]"
echo " -p <int> [number of processors (default: 1)]"
echo " -t <float> [IDR threshold (default: 0.01)]"
echo " -d <dir> [path to dependent R scripts (default: /home/pundhir/software/idrCode)]"
echo " -f <string> [filter bam files from configuration file based on input indentifier (default: qual)]"
echo " -h [help]"
echo
exit 0
}
#### parse options ####
while getopts i:o:p:t:d:f:h ARG; do
case "$ARG" in
i) BAMFILE=$OPTARG;;
o) OUTDIR=$OPTARG;;
p) PROCESSOR=$OPTARG;;
t) IDR_THRESHOLD=$OPTARG;;
d) PROGDIR=$OPTARG;;
f) CONFIG_FILTER=$OPTARG;;
h) HELP=1;;
esac
done
## usage, if necessary file and directories and not given/exist
if [ ! "$BAMFILE" -o ! "$OUTDIR" -o "$HELP" ]; then
usage
fi
## create appropriate directory structure
echo
echo -n "Create output directory structure.. "
if [ ! -d "$OUTDIR" ]; then
mkdir -p $OUTDIR
fi
echo "done"
## check if input is BAM files or configuration file containing BAM file information
INPUT=$(echo $BAMFILE | perl -ane '$_=~s/\,.*//g; print $_;')
if [ "$(samtools view -H $INPUT | wc -l)" -le 0 ]; then
## read configuration file
INPUT=$(cat $BAMFILE | perl -ane '
if($_=~/^'$CONFIG_FILTER'/) {
$file.="$F[1],";
} END {
$file=~s/\,$//g;
print "$file\n";
}'
)
BAMFILE=$INPUT
fi
###############################################
## determine number of input ChIP samples
IFS=","
BAMFILES=($BAMFILE)
BAMFILES_COUNT=${#BAMFILES[@]}
IFS=" "
# for (( i=0; i<$BAMFILES_COUNT; i++ )); do echo ${BAMFILES[$i]}; done; exit
##########################################################################
############ CALL PEAKS ON INDIVIDUAL REPLICATES
##########################################################################
<<"COMMENT1"
COMMENT1
## compute enrichment and quality measure for input ChIP-seq data
echo -n "Compute enrichment and quality measure for input ChIP-seq data.. "
if [ -e "$PROGDIR/phantompeakqualtools/run_spp.R" ]; then
for (( i=0; i<$BAMFILES_COUNT; i++ )); do
ID=$(echo ${BAMFILES[$i]} | perl -an -F'/\,/' -e '$ID=(); foreach(@F) { $_=~s/^.+\///g; $_=~s/\..+$//g; chomp($_); $ID.=$_."_"; } $ID=~s/\_$//g; print "$ID\n";' | perl -an -F'//' -e 'chomp($_); if(scalar(@F)>50) { $_=~s/\_R[0-9]+.*$//g; print "$_\n"; } else { print "$_\n"; }');
echo -n "$ID.."
#Rscript $PROGDIR/phantompeakqualtools/run_spp.R -c=$INDIR/$CHIP_ID"Rep"$i.bam -fdr=$IDR_THRESHOLD -savp -odir=$OUTDIR/quality -out=$OUTDIR/quality/quality.txt -tmpdir=$OUTDIR/quality &> $OUTDIR/logs/quality.log
samtools view -b -F 1548 -q 30 ${BAMFILES[$i]} | bamToBed -i stdin | awk 'BEGIN{S="\t";OFS="\t"}{$4="N";print $0}' | gzip -c > $OUTDIR/$ID.tagAlign.gz
Rscript $PROGDIR/phantompeakqualtools/run_spp.R -c=$OUTDIR/$ID.tagAlign.gz -fdr=$IDR_THRESHOLD -savp -odir=$OUTDIR -out=$OUTDIR/$ID.quality.txt &> $OUTDIR/$ID.quality.log
rm $OUTDIR/$ID.tagAlign.gz
done
fi
echo "done"
## perform reproducibility analysis, if more than two BAM files are provided
echo -n "Perform reproducibility analysis.. "
if [ "$BAMFILES_COUNT" -ge 2 ]; then
bam2reproducibility -i $BAMFILE -o $OUTDIR -c -p $PROCESSOR
fi
echo "done"
exit