-
Notifications
You must be signed in to change notification settings - Fork 6
Expand file tree
/
Copy pathfunseq2.sh
More file actions
executable file
·188 lines (167 loc) · 6.2 KB
/
funseq2.sh
File metadata and controls
executable file
·188 lines (167 loc) · 6.2 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
#!/bin/bash
user_input=
maf=0
genome_mode=1
input_format=
output_format=vcf
gene_list=
expression=
class=
nc_mode=0
weight_mode=1
exp_format=
parallel=5
out_path=out
cancer_type=all
score_cut=1.5
user_anno=data_context/user_annotations
recurdb_use=0
sv_length_cut=20
#conf_in="config.txt"
function usage
{
echo "
FunSeq version 2.1.6, upated May 06, 2016.
bugs fixed; Now support multiple alt alleles entry for the same locus.
* Usage : $0 -f file -maf MAF -m <1/2> -len length_cut -inf <bed/vcf> -outf <bed/vcf> -nc -o path -g file -exp file -cls file -exf <rpkm/raw> -p int -cancer cancer_type -s score -uw -ua user_annotations_directory -db
The default config file is config.txt. You can also assign a new config file using command:
export FunSeqConfig=xxx
and then run: $0 [Options].
Options :
-f [Required] User Input SNVs File
-inf [Required] Input format - BED or VCF
-len [Optional] Maximum length cutoff for Indel analysis, default = 20. Set to 'inf', if no filter.
-maf [Optional] Minor Allele Frequency Threshold to filter 1KG SNVs,default = 0
-m [Optional] 1 - Somatic Genome (default); 2 - Germline or Personal Genome
-outf [Optional] Output format - BED or VCF,default is VCF
-nc [Optional] Only do non-coding analysis, no need of VAT (variant annotation tool)
-o [Optional] Output path, default is the directory 'out'
-g [Optional] gene list, only output variants associated with selected genes.
-exp [Optional] gene expression matrix
-cls [Optional] class file for samples in gene expression matrix
-exf [Optional] gene expression format - rpkm / raw
-p [Optional] Number of genomes to parallel, default = 5
-cancer [Optional] cancer type from recurrence database, default is all of the cancer type
-uw [Optional] Use unweighted scoring scheme, defalut is weighted
-s [Optional] Score threshold to call non-coding candidates, default = 1.5 for weighted scoring & default = 5 for unweighted scoring
-ua [Optional] The directory for user-specific annotations, default will be read from directory 'data/user_annotations'
-db [Optional] Use the recurrence database to score variants. Recurrence gets a additional score.
* Multiple Genomes with Recurrent Output
Option 1: Separate multiple files by ','
Example: ./run.sh -f file1,file2,file3,... -maf MAF -m <1/2> -inf <bed/vcf> -outf <bed/vcf> ...
Option 2: Use the 6th column of BED file to specify samples
Example: ./run.sh -f file -maf MAF -m <1/2> -inf bed -outf <bed/vcf> ...
NOTE: Please make sure you have sufficient memory, at least 3G.
"
}
## Get inputs
if [ -e $1 ];then
usage
exit
fi
while [ "$1" != "" ]; do
case $1 in
-f | --file ) shift
user_input=$1
;;
-maf) shift
maf=$1
;;
-conf)
shift
conf_in=$1
;;
-len) shift
sv_length_cut=$1
;;
-m | --mode) shift
genome_mode=$1
;;
-inf) shift
input_format=$1
;;
-outf) shift
output_format=$1
;;
-nc) nc_mode=1
;;
-o) shift
out_path=$1
;;
-g) shift
gene_list=$1
;;
-exp) shift
expression=$1
;;
-cls) shift
class=$1
;;
-exf) shift
exp_format=$1
;;
-p) shift
parallel=$1
;;
-cancer) shift
cancer_type=$1
;;
-uw) weight_mode=0
score_cut=5
;;
-s) shift
score_cut=$1
;;
-ua) shift
user_anno=$1
;;
-db) recurdb_use=1
;;
-h | --help ) usage
exit
esac
shift
done
## check commands ...
if [[ $nc_mode == 0 ]]
then
NEEDED_COMMANDS="bedtools tabix perl snpMapper TFMpvalue-sc2pv awk sed"
else
NEEDED_COMMANDS="bedtools tabix perl TFMpvalue-sc2pv awk sed"
fi
for cmd in ${NEEDED_COMMANDS} ; do
if ! command -v ${cmd} &> /dev/null ; then
echo Please install ${cmd}!
exit -1
fi
done
#export FunSeqConfig=$conf_in
## run programs...
if [[ $user_input != "" && $maf != "" && $genome_mode != "" && $input_format != "" && $output_format != "" && $out_path != "" ]]
then
if [[ $expression != "" || $class != "" || $exp_format != "" ]]
then
if [[ $expression != "" && $class != "" && $exp_format != "" && $gene_list != "" ]]
then
echo -e "perl code/funseq2.pl $user_input $maf $genome_mode $input_format $output_format $nc_mode $out_path $parallel $cancer_type $score_cut $weight_mode $user_anno $recurdb_use $sv_length_cut $gene_list $expression $class $exp_format"
perl code/funseq2.pl $user_input $maf $genome_mode $input_format $output_format $nc_mode $out_path $parallel $cancer_type $score_cut $weight_mode $user_anno $recurdb_use $sv_length_cut $gene_list $expression $class $exp_format
elif [[ $expression != "" && $class != "" && $exp_format != "" && $gene_list == "" ]]
then
echo -e "perl code/funseq2.pl $user_input $maf $genome_mode $input_format $output_format $nc_mode $out_path $parallel $cancer_type $score_cut $weight_mode $user_anno $recurdb_use $sv_length_cut $expression $class $exp_format"
perl code/funseq2.pl $user_input $maf $genome_mode $input_format $output_format $nc_mode $out_path $parallel $cancer_type $score_cut $weight_mode $user_anno $recurdb_use $sv_length_cut $expression $class $exp_format
else
echo "Please input both expression , class label and expression format data"
fi
else
if [[ $gene_list != "" ]]
then
echo -e "perl code/funseq2.pl $user_input $maf $genome_mode $input_format $output_format $nc_mode $out_path $parallel $cancer_type $score_cut $weight_mode $user_anno $recurdb_use $sv_length_cut $gene_list"
perl code/funseq2.pl $user_input $maf $genome_mode $input_format $output_format $nc_mode $out_path $parallel $cancer_type $score_cut $weight_mode $user_anno $recurdb_use $sv_length_cut $gene_list
else
echo -e "perl code/funseq2.pl $user_input $maf $genome_mode $input_format $output_format $nc_mode $out_path $parallel $cancer_type $score_cut $weight_mode $user_anno $recurdb_use $sv_length_cut"
perl code/funseq2.pl $user_input $maf $genome_mode $input_format $output_format $nc_mode $out_path $parallel $cancer_type $score_cut $weight_mode $user_anno $recurdb_use $sv_length_cut
fi
fi
else
usage
fi