-
Notifications
You must be signed in to change notification settings - Fork 18
Expand file tree
/
Copy pathgetSpecificSequences.py
More file actions
127 lines (100 loc) · 3.21 KB
/
getSpecificSequences.py
File metadata and controls
127 lines (100 loc) · 3.21 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
#!/lab/64/bin/python
# getSpecificSequences.py
# Author: Angela Brooks
# Program Completion Date:
# Description: Take a list of titles and a fasta file and only outputs
# sequences that are in the fasta file.
# Modification Date(s):
import sys
import optparse
import pdb
from Bio import SeqIO
#############
# CONSTANTS #
#############
#################
# END CONSTANTS #
#################
###########
# CLASSES #
###########
class OptionParser(optparse.OptionParser):
"""
Adding a method for required arguments.
Taken from:
http://www.python.org/doc/2.3/lib/optparse-extending-examples.html
"""
def check_required(self, opt):
option = self.get_option(opt)
# Assumes the option's 'default' is set to None!
if getattr(self.values, option.dest) is None:
print "%s option not supplied" % option
self.print_help()
sys.exit(1)
###############
# END CLASSES #
###############
########
# MAIN #
########
def main():
opt_parser = OptionParser()
# Add Options. Required options should have default=None
opt_parser.add_option("-f",
dest="fasta_file",
type="string",
help="Fasta file of sequences.",
default=None)
opt_parser.add_option("-t",
dest="titles",
type="string",
help="Titles of sequences that you want to extract.",
default=None)
opt_parser.add_option("-c",
dest="isContained",
action="store_true",
help="""Title input just has to be contained in fasta
record. Does not have to be an exact
match.""",
default=False)
(options, args) = opt_parser.parse_args()
# validate the command line arguments
opt_parser.check_required("-f")
opt_parser.check_required("-t")
fasta_file = open(options.fasta_file)
title_file = open(options.titles)
isContained = options.isContained
# Put titles into a dictionary for searching
# Title maps to arbitrary number, 0
title_set = set([])
for line in title_file:
line = formatLine(line)
if line.startswith(">"):
line = line[1:] # Chomp off the >
title_set.add(line)
# Check for title in dictionary
for record in SeqIO.parse(fasta_file, "fasta"):
title = record.id
if isContained:
for t in title_set:
if t in title:
print record.format("fasta"),
break
else:
if title in title_set:
print record.format("fasta"),
sys.exit(0)
############
# END_MAIN #
############
#############
# FUNCTIONS #
#############
def formatLine(line):
line = line.replace("\r","")
line = line.replace("\n","")
return line
#################
# END FUNCTIONS #
#################
if __name__ == "__main__": main()