-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathcpDNA_mapping.sh
More file actions
52 lines (47 loc) · 2.11 KB
/
cpDNA_mapping.sh
File metadata and controls
52 lines (47 loc) · 2.11 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
#(1) BWA mapping of filtered reads to cpDNA reference (use filtered reads without duplicates from HybPhyloMaker, copy them to a single folder)
#(2) Consensus call with kindel
#(3) Combine sequences to single FASTA file
#Requirements:
#BWA
#samtools
#kindel v.0.1.4 (pip3 install 'kindel==0.1.4')
#USAGE: ./cpDNA_mapping.sh
#------------------------------------------------------------------------------------
#Tomas Fer, 2018
#Department of Botany, Charles University, Prague, Czech Republic
#tomas.fer@natur.cuni.cz
#------------------------------------------------------------------------------------
#SETTINGS
reference=cpDNA.fas
mincov=2
majthres=0.9
#Index reference
bwa index $reference #index reference
ls *-1P_no-dups* | cut -d"-" -f1,2 > list.txt
for file in $(cat list.txt); do
echo -e "Processing $file"
echo -e "...mapping"
bwa mem $reference ${file}-1P_no-dups.fastq.gz ${file}-2P_no-dups.fastq.gz > ${file}.sam #mapping
echo -e "...converting"
samtools view -bS -o ${file}.bam ${file}.sam #convert to bam
nrmapped=$(samtools view -F 0x04 -c ${file}.bam) #number of mapped reads in BAM
nrall=$(samtools view -c ${file}.bam) #number of all reads in BAM
percmapped=$(echo -e "scale=5;100 * ($nrmapped / $nrall)" | bc) #calculate percentage of mapped reads
echo -e "$file\t$nrall\t$nrmapped\t$percmapped" >> mapping_summary.txt
echo -e "...sorting"
samtools sort ${file}.bam ${file}_sorted
echo -e "...indexing"
samtools index ${file}_sorted.bam
kindel -m $mincov -t $majthres ${file}_sorted.bam > ${file}.fasta
#change name in fasta file
sed -i '1d' ${file}.fasta #delete first line
echo ">$file" > header.txt
cat header.txt ${file}.fasta > tmp && mv tmp ${file}.fasta
rm header.txt ${file}.sam ${file}.bam #cleaning
#Remove line breaks from fasta file
awk '!/^>/ { printf "%s", $0; n = "\n" } /^>/ { print n $0; n = "" } END { printf "%s", n }' ${file}.fasta > tmp && mv tmp ${file}.fasta
done
#combine fasta files
cat *.fasta > consensus.fasta
#Remove line breaks from fasta file
awk '!/^>/ { printf "%s", $0; n = "\n" } /^>/ { print n $0; n = "" } END { printf "%s", n }' consensus.fasta > tmp && mv tmp consensus.fasta