-
Notifications
You must be signed in to change notification settings - Fork 1
Expand file tree
/
Copy pathreplace_db_seqs.py
More file actions
executable file
·55 lines (49 loc) · 2.45 KB
/
replace_db_seqs.py
File metadata and controls
executable file
·55 lines (49 loc) · 2.45 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
#!/usr/bin/env python3
from ArgditLib.ProcLog import ProcLog
from Bio import SeqIO
from Bio.Seq import Seq
import argparse
import sys
if __name__ == '__main__':
parser = argparse.ArgumentParser()
parser.add_argument('seq_db_path', help = 'nucleotide/protein database FASTA file path')
parser.add_argument('replace_seq_file_path', help = 'FASTA file path for replacement sequences')
parser.add_argument('output_seq_db_path', help = 'output database file path')
args = parser.parse_args()
replace_seq_record_count = 0
with open(args.replace_seq_file_path, 'r') as f:
replace_seq_records = dict()
for seq_record in SeqIO.parse(f, 'fasta'):
replace_seq_records[seq_record.description] = seq_record
replace_seq_record_count += 1
input_seq_record_count = 0
export_seq_record_count = 0
direct_export_count = 0
replace_count = 0
with open(args.output_seq_db_path, 'w') as fw:
with open(args.seq_db_path, 'r') as f:
for seq_record in SeqIO.parse(f, 'fasta'):
input_seq_record_count += 1
if seq_record.description in replace_seq_records:
SeqIO.write(replace_seq_records[seq_record.description], fw, 'fasta')
export_seq_record_count += 1
replace_count += 1
else:
SeqIO.write(seq_record, fw, 'fasta')
export_seq_record_count += 1
direct_export_count += 1
summary = list()
summary_stmt = ProcLog.create_summary_stmt(input_seq_record_count, 'read from {}'.format(args.seq_db_path))
summary.append(summary_stmt)
summary_stmt = ProcLog.create_summary_stmt(replace_seq_record_count,
'read from {}'.format(args.replace_seq_file_path), 'replacement')
summary.append(summary_stmt)
summary_stmt = ProcLog.create_summary_stmt(direct_export_count,
'directly exported to {}'.format(args.output_seq_db_path), 'input')
summary.append(summary_stmt)
summary_stmt = ProcLog.create_summary_stmt(replace_count, 'exported to {}'.format(args.output_seq_db_path),
'replacement')
summary.append(summary_stmt)
summary_stmt = ProcLog.create_summary_stmt(export_seq_record_count, 'exported in total')
summary.append(summary_stmt)
ProcLog.export_ext_summary(sys.stdout, summary)