-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathfasta.py
More file actions
103 lines (84 loc) · 2.96 KB
/
fasta.py
File metadata and controls
103 lines (84 loc) · 2.96 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
class GenericFile(object):
def __init__(self, fileNameS):
try:
fileF = open(fileNameS)
self.fileS = fileF.read()
fileF.close()
except:
self.fileS = fileNameS.read()
class FastaFile(GenericFile):
"""
This class represents a FastaFile.
"""
def __init__(self, fasta, fileName=True):
"""
:param fasta: This can be either a file name (string) or a HTML file upload element.
:param fileName: This is a boolean value. If False, then it is the fasta string
:return: does not return anything.
"""
if fileName == True:
super(FastaFile, self).__init__(fasta)
elif fileName == False:
if ">" not in fasta:
raise ValueError("It is not in Fasta Format")
self.fileS = fasta
else:
raise Exception
self.sequences = None
self.sequences = self.getSequences()
def __getitem__(self, item):
if self.sequences is None:
self.getSequences()
return self.sequences[item]
def __len__(self):
return len(self.getSequences())
def __repr__(self):
return "< Fasta file: %s sequences >" % len(self)
def __str__(self):
return "< Fasta file: %s sequences >" % len(self)
def getSequences(self):
if self.sequences is None:
firstSplitL = self.fileS.strip().split(">")[1:]
eachByLineL = [i.splitlines() for i in firstSplitL]
sequences = []
for i in eachByLineL:
header = i[0]
sequence = "".join(i[1:]).upper()
sequence = sequence.replace(" ", "")
sequence = sequence.replace("\t", "")
sequences.append(Sequence(name=header, sequence=sequence))
self.sequences = sequences
return self.sequences
def getMaxLength(self):
"""
This returns the max length of a sequence in this object
"""
sequences = self.getSequences()
maxLength = 0
for sequence in sequences:
seqLength = len(sequence)
if seqLength > maxLength:
maxLength = seqLength
return maxLength
def areDuplicatesPresent(self):
sequences = self.getSequences()
sequenceL = [i.sequence for i in sequences]
if len(sequenceL) > len(set(sequenceL)):
return True
return False
class Sequence(object):
"""
This class represents a DNA/RNA sequence.
Each object needs to have a name.
"""
def __init__(self, name, sequence):
self.name = name
self.sequence = sequence
def __repr__(self):
return "%s : %s" %(self.name, self.sequence)
def __getitem__(self, item):
return self.sequence[item]
def __len__(self):
return len(self.sequence)
def __str__(self):
return "%s : %s " % (self.name, self.sequence)