-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathcurated_alignment.py
More file actions
25 lines (21 loc) · 893 Bytes
/
curated_alignment.py
File metadata and controls
25 lines (21 loc) · 893 Bytes
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
#open the .fasta file for raw alignment
fh=open("raw_data.fasta",'r')
sequences=[]
for line in fh.readlines():
#for a line starting from '>'
if line[0]=='>':
name=line[1:].replace("\n",'')
entry={'Species':name, 'sequence':''}
sequences.append(entry)
else:
entry['sequence']= entry['sequence']+line.replace('\n','')
fh.close()
#setting the cut-off for sequences, start position for the first common unit
cutsite=
output=open("sequences.fasta", "w")
for seq in sequences:
seq['sequence'][:cutsite] #looking at the seq in 'seq' key and reading it from [:cutsite]
seq['sequence']=seq['sequence'][cutsite:]+seq['sequence'][:cutsite] #rearranging the sequences
output.write('>' + seq['Species'] + '\n' + seq['sequence'] + '\n') #write output within the loop to go through the whole list of dictionaries
print(sequences)
output.close()