-
Notifications
You must be signed in to change notification settings - Fork 6
Expand file tree
/
Copy pathquickdb.bash
More file actions
executable file
·132 lines (122 loc) · 4.96 KB
/
quickdb.bash
File metadata and controls
executable file
·132 lines (122 loc) · 4.96 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
#!/usr/bin/env bash
# Supported genomes
GENOMES=("hg19" "hg38" "rn6" "mm10" "sacCer3" "hg19mito" "hg38mito" "mm10mito")
# Help function
function print_usage() {
echo "USAGE: $0 databasename location(optional)" >&2
echo " databasename: ${GENOMES[@]}" >&2
echo " location: Directory to store the database (default = /rnadb)" >&2
}
# Function to build the database
function db_builder() {
downloaddb=false
echo "${2}"
if test "${1}" = "hg19"; then
GTF_URL="ftp://ftp.ensembl.org/pub/grch37/current/gtf/homo_sapiens/Homo_sapiens.GRCh37.87.gtf.gz"
gtRNAdb_URL="http://gtrnadb.ucsc.edu/genomes/eukaryota/Hsapi19/hg19-tRNAs.tar.gz"
gtRNAdb_OUT="hg19-tRNAs-detailed.out"
gtRNAdb_NAME="hg19-tRNAs_name_map.txt"
GENOME_URL="http://hgdownload.soe.ucsc.edu/goldenPath/hg19/bigZips/hg19.fa.gz"
FASTA=true
elif test "${1}" = "hg38"; then
GTF_URL="ftp://ftp.ensembl.org/pub/release-102/gtf/homo_sapiens/Homo_sapiens.GRCh38.102.gtf.gz"
gtRNAdb_URL="http://gtrnadb.ucsc.edu/GtRNAdb2/genomes/eukaryota/Hsapi38/hg38-tRNAs.tar.gz"
gtRNAdb_OUT="hg38-tRNAs-detailed.out"
gtRNAdb_NAME="hg38-tRNAs_name_map.txt"
GENOME_URL="http://hgdownload.soe.ucsc.edu/goldenPath/hg38/bigZips/hg38.fa.gz"
FASTA=true
elif test "${1}" = "mm10"; then
GTF_URL="ftp://ftp.ensembl.org/pub/release-102/gtf/mus_musculus/Mus_musculus.GRCm38.102.gtf.gz"
gtRNAdb_URL="http://gtrnadb.ucsc.edu/genomes/eukaryota/Mmusc10/mm10-tRNAs.tar.gz"
gtRNAdb_OUT="mm10-tRNAs-confidence-set.out"
gtRNAdb_NAME="mm10-tRNAs_name_map.txt"
GENOME_URL="https://hgdownload.soe.ucsc.edu/goldenPath/mm10/bigZips/mm10.2bit"
FASTA=false
elif test "${1}" = "mm10mito"; then
GTF_URL="ftp://ftp.ensembl.org/pub/release-102/gtf/mus_musculus/Mus_musculus.GRCm38.102.gtf.gz"
gtRNAdb_URL="http://gtrnadb.ucsc.edu/genomes/eukaryota/Mmusc10/mm10-tRNAs.tar.gz"
gtRNAdb_OUT="mm10-tRNAs-confidence-set.out"
gtRNAdb_NAME="mm10-tRNAs_name_map.txt"
GENOME_URL="https://hgdownload.soe.ucsc.edu/goldenPath/mm10/bigZips/mm10.2bit"
FASTA=false
elif test "${1}" = "rn6"; then
GTF_URL="ftp://ftp.ensembl.org/pub/release-102/gtf/rattus_norvegicus/Rattus_norvegicus.Rnor_6.0.102.gtf.gz"
gtRNAdb_URL="http://gtrnadb.ucsc.edu/genomes/eukaryota/Rnorv6/rn6-tRNAs.tar.gz"
gtRNAdb_OUT="rn6-tRNAs-detailed.out"
gtRNAdb_NAME="rn6-tRNAs_name_map.txt"
GENOME_URL="https://hgdownload.soe.ucsc.edu/goldenPath/rn6/bigZips/rn6.fa.gz"
FASTA=true
elif test "${1}" = "sacCer3"; then
GTF_URL="ftp://ftp.ensembl.org/pub/release-97/gtf/saccharomyces_cerevisiae/Saccharomyces_cerevisiae.R64-1-1.97.gtf.gz"
gtRNAdb_URL="http://gtrnadb.ucsc.edu/genomes/eukaryota/Scere3/sacCer3-tRNAs.tar.gz"
gtRNAdb_OUT="sacCer3-tRNAs.out-noChrM"
gtRNAdb_NAME="sacCer3-tRNAs_name_map.txt"
GENOME_URL="https://hgdownload.soe.ucsc.edu/goldenPath/sacCer3/bigZips/sacCer3.2bit"
FASTA=false
elif test "${1}" = "hg19mito"; then
GTF_URL="ftp://ftp.ensembl.org/pub/grch37/current/gtf/homo_sapiens/Homo_sapiens.GRCh37.87.gtf.gz"
dburl="http://trna.ucsc.edu/tRAX/data/refdb/hg19mito.tar.gz"
downloaddb=true
elif test "${1}" = "hg38mito"; then
GTF_URL="ftp://ftp.ensembl.org/pub/release-102/gtf/homo_sapiens/Homo_sapiens.GRCh38.102.gtf.gz"
dburl="http://trna.ucsc.edu/tRAX/data/refdb/hg38mito.tar.gz"
downloaddb=true
elif test "${1}" = "mm10mito"; then
GTF_URL="ftp://ftp.ensembl.org/pub/release-102/gtf/mus_musculus/Mus_musculus.GRCm38.102.gtf.gz"
dburl="http://trna.ucsc.edu/tRAX/data/refdb/mm10mito.tar.gz"
downloaddb=true
else
echo "Could not generate RNA database, non-supported DB or parameter missing"
return
fi
# GTF File from Ensembl
echo "Generating GTF"
wget -q -O - ${GTF_URL} |
gzip -cd |
grep -v '^#' |
awk '{print "chr" $0;}' |
sed 's/chrMT/chrM/g' |
grep -e Mt_rRNA -e Mt_tRNA -e miRNA -e misc_RNA -e rRNA -e snRNA -e snoRNA -e ribozyme -e sRNA -e scaRNA \
>${2}/genes.gtf
echo "Generating GTF Done"
if test ${downloaddb} = true; then
echo "Downloading TRAX db"
wget -O ${2}/mitodb.tar.gz ${dburl}
tar zxf ${2}/mitodb.tar.gz -C ${2}
exit 0
fi
# gtRNAdb Files
echo "Generating gtRNAdb"
wget -q -O ${2}/tse.tar.gz ${gtRNAdb_URL}
tar zxf ${2}/tse.tar.gz -C ${2}
rm ${2}/tse.tar.gz
echo "Generating gtRNAdb Done"
# Genome Fasta File from UCSC
echo "Generating Fasta"
if test ${FASTA} = true; then
wget -q -O - ${GENOME_URL} | gzip -cd >${DB_LOCATION}/genome.fa
else
wget -q -O ${2}/genome.2bit ${GENOME_URL}
twoBitToFa ${DB_LOCATION}/genome.2bit ${DB_LOCATION}/genome.fa
fi
echo "Generating Fasta Done"
# TRAX maketrnadb
echo "Starting TRAX makernadb"
maketrnadb.py \
--databasename=${2}/db \
--genomefile=${2}/genome.fa \
--trnascanfile=${2}/${gtRNAdb_OUT} \
--namemapfile=${2}/${gtRNAdb_NAME}
}
# Init test
if [ -z "$2" ]; then
DB_LOCATION="/rnadb"
else
DB_LOCATION=${2}
mkdir -p ${2}
fi
if [ -z "$1" ]; then
print_usage
else
db_builder ${1} ${DB_LOCATION}
fi