-
Notifications
You must be signed in to change notification settings - Fork 2
Expand file tree
/
Copy pathmutect2.sh
More file actions
executable file
·106 lines (85 loc) · 3.7 KB
/
mutect2.sh
File metadata and controls
executable file
·106 lines (85 loc) · 3.7 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
#!/bin/bash
#$ -cwd
##########################################################################
# Allysia Mak #
# mutect2.sh runs SplitNCigarReads for read preprocessing before running #
# Mutect2 on all bams in the 'parentdir' #
# #
# The number of bams run in parallel is specified by 'numProcesses' #
# Input to stdin: $1 = number of processes $2 = 'pon' mode or null for #
# matched normal mode #
# Change lines 21-27 to fit your file structures #
# #
# !! Make sure there is only 1 '*.bam' file per sample, if more than one #
# then change first for loop to find -name '*your.bam' #
##########################################################################
# Change parentdir to directory running on (normal or tumor bam directory)
#parentdir="/scratch/jakutagawa/RNA-seq/GTEx_bams/STAR_aligned/subset-SRA"
parentdir="/scratch/jakutagawa/RNA-seq/realigned_bams/tumor"
#parentdir="/scratch/jakutagawa/RNA-seq/realigned_bams/normal"
hs37="/pod/pstore/groups/brookslab/reference_indices/hs37/hs37d5.fa"
hg19="/pod/pstore/groups/brookslab/reference_indices/hg19/hg19.fa"
outDir="/scratch/amak/varCalls/Mutect/pon-tumor-vcfs"
pon="/pod/pstore/groups/brookslab/amak/var-prediction-testing/gtex-vcfs/GTEX_PON.vcf"
numProcesses=$1
matchedOrPON=$2
if [ $((numProcesses)) -eq 0 ]; then
echo "Max number of processes not supplied. Defaulting to 2"
numProcesses=2
fi
function maxProcesses {
# Waits until there are less than 'numProcesses' jobs running before starting a new job
while [ $(jobs | wc -l) -ge $numProcesses ]; do
#echo 'waiting'
sleep 5
done
}
##### Comment this for loop out if split bams are already made by SplitNCigarReads for all bams
for bam in $(find $parentdir -mindepth 1 -name '*.bam'); do
uid=$(basename $bam)
IFS='.'
set $uid
uid=$(echo $1)
unset IFS
bamdir=$(dirname $bam)
if [[ -z "$(find $parentdir -name $uid'.split.bam')" ]]; then
echo "SplitNCigarReads running on " $uid
maxProcesses; nice time gatk SplitNCigarReads -fixNDN -R $hs37 -I $bam -O $bamdir'/'$uid'.split.bam' &
echo $(find $parentdir -name $uid'.split.bam')
else
echo $uid ' already done'
fi
done
wait
for bam in $(find $parentdir -mindepth 1 -name '*.split.bam'); do #'*split_picard_edited.bam'); do #'*hs37d5.bam'); do
uid=$(basename $bam)
IFS='.'
set $uid
uid=$(echo $1)
unset IFS
bamdir=$(dirname $bam)
if [ ! -d "$outDir/$uid" ]; then
mkdir $outDir/$uid
echo $bam
normal_ID_line=$(samtools view -H $bam | grep '@RG')
IFS=$':\t'
set $normal_ID_line
## !!! May need to change depending on bam header !! ##
#GTEX bams, normal ID is at $9
# normal_ID=$(echo $9)
#PCAWG bams, normal ID is at $6
normal_ID=$(echo $6)
unset IFS
echo 'normal_ID ' $normal_ID
# maxProcesses; nice time gatk Mutect2 -R $hs37 -I $bamdir'/'$uid'.split.bam' -tumor $normal_ID -O $outDir'/'$uid'/'$uid'.vcf' &
## if running in no-match (tumor-only) mode, take out -pon argument
if [[ $matchedOrPON = "pon" ]]; then
maxProcesses; nice time gatk Mutect2 -R $hs37 -I $bam -tumor $normal_ID -pon $pon -O $outDir'/'$uid'/'$uid'.vcf' &
else
maxProcesses; nice time gatk Mutect2 -R $hs37 -I $bam -tumor $normal_ID -O $outDir'/'$uid'/'$uid'.vcf' &
fi
fi
# rm $tumordir'/'$uid'.split.bam'
echo $uid " complete"
done
wait