-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathmacse2.sh
More file actions
59 lines (49 loc) · 2.49 KB
/
macse2.sh
File metadata and controls
59 lines (49 loc) · 2.49 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
#!/bin/bash
#SBATCH -D /home/dmvelasc/Projects/Prunus/Data/fasta/
#SBATCH -o /home/dmvelasc/Projects/Prunus/slurm-log/%A_%a-stdout-macse2.txt
#SBATCH -e /home/dmvelasc/Projects/Prunus/slurm-log/%A_%a-stderr-macse2.txt
#SBATCH -J macse2
#SBATCH -a 14427
#SBATCH -p bigmemm
#SBATCH -n 1
#SBATCH -c 2
#SBATCH --mem=16G
#SBATCH -t 8-00:00:00
#SBATCH --exclude=bigmem1
#SBATCH --mail-user=dmvelasco@ucdavis.edu
#SBATCH --mail-type=ALL
set -e
set -u
# number of total array jobs
# -a 1-27864
# Load modules
module load java
# Declare number variables
x=$SLURM_ARRAY_TASK_ID
i=$(( x-1 ))
# Declare directories
macse="/home/dmvelasc/Software/macse2/macse_v2.01.jar" # macsev2 program
ref="/home/dmvelasc/Data/references/persica-SCF" # reference directory
scratch="/scratch/dmvelasc/fasta-pep" # scratch directory
dna_dir="/home/dmvelasc/Projects/Prunus/Data/fasta/fasta-concat" # directory of CDS fasta dna sequences
dna_aln="/home/dmvelasc/Projects/Prunus/Data/fasta/fasta-macse2" # directory of CDS aligned fasta backtranslated dna sequences
dna_aln_final="/home/dmvelasc/Projects/Prunus/Data/fasta/fasta-nostop" # directory of CDS aligned fasta backtranslated dna sequences without stops
pep_dir="/home/dmvelasc/Projects/Prunus/Data/fasta/fasta-pep" # amino acid alignment directory
####################
### Begin script ###
####################
# multi-sequence alignment with MACSE v2
# translates nucleotide sequence to amino acids and simultaneously aligns both
# outputs both alignments
#### STEP 1: CAPTURE GENE ID
mapfile -s "$i" -n 1 -t gene < "$ref"/Prunus_persica_v1.0_genes_list.gff3
echo -e "begin MACSE v2 alignment and export for CDS sequence ${gene[0]}"
date
#### STEP 2: ALIGN MULTI-SEQUENCE DNA FASTA
# sequence alignment of nucleotides and translated amino acid sequences
java -Xmx16G -jar "$macse" -prog alignSequences -seq "$dna_dir"/"${gene[0]}"_cds.fa -out_NT "$dna_aln"/"${gene[0]}"_cds_aln.fasta -out_AA "$pep_dir"/"${gene[0]}"_cds_pep_aln.fasta
#### STEP 3: EXPORT NUCLEOTIDE ALIGNMENT FOR BUSTED
# modify alignment to insert gaps at frameshifts and stop codons for use with HyPhy's BUSTED
java -Xmx16G -jar "$macse" -prog exportAlignment -align "$dna_aln"/"${gene[0]}"_cds_aln.fasta -charForRemainingFS - -codonForExternalFS --- -codonForFinalStop --- -codonForInternalFS --- -codonForInternalStop --- -out_NT "$dna_aln_final"/"${gene[0]}"_cds_aln_nostop.fasta -out_stat_per_site "$dna_aln_final"/"${gene[0]}"_cds_aln_stats.csv
echo -e "end MACSE v2 alignment and export for CDS sequence ${gene[0]}"
date