Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 1 addition & 3 deletions bioinformatics/algorithms/alignment_graph.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,5 @@ def score(self,r,c):

def prnt(self):
for r in range(len(self.s1)+1):
o = []
for c in range(len(self.s2)+1):
o.append(self.pos(r,c))
o = [self.pos(r,c) for c in range(len(self.s2)+1)]
Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Function Alignment_Graph.prnt refactored with the following changes:

print(o)
16 changes: 8 additions & 8 deletions bioinformatics/algorithms/binary_search.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,13 +15,13 @@ def search(self, first=True):
left = mid + 1
elif comp_val > 0:
right = mid
else:
if first:
if mid == self.start or self.comparer(self.emitter(mid-1)) != 0:
return mid
right = mid
elif first:
if mid == self.start or self.comparer(self.emitter(mid-1)) != 0:
return mid
else:
if mid == self.end or self.comparer(self.emitter(mid+1)) != 0:
return mid
left = mid
right = mid
elif mid == self.end or self.comparer(self.emitter(mid+1)) != 0:
return mid
else:
left = mid
Comment on lines +18 to +26
Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Function BinarySearch.search refactored with the following changes:

return left if self.comparer(self.emitter(left)) == 0 else None
15 changes: 7 additions & 8 deletions bioinformatics/algorithms/binary_tree.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ def __init__(self,nodes,edges,root=None,val=0):
self.nodes = nodes #a dict of node name to val pairs
self.edges = edges # a dict of node name to a list of neighbours (child name, edgeval)
self.root = root #the name of the root
if self.root ==None:
if self.root is None:
Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Function BinaryTree.__init__ refactored with the following changes:

self.root = list(set(self.edges.keys()) - set(self.nodes.keys()))[0]
self.nodes[self.root] = val

Expand All @@ -20,7 +20,7 @@ def leaves(self,node="ROOT"):

if node == "ROOT":
return self.leaves(self.root)
if node==None:
if node is None:
Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Function BinaryTree.leaves refactored with the following changes:

return []
if self.is_leaf(node):
return [node]
Expand Down Expand Up @@ -51,10 +51,9 @@ def edgecost(self,node="ROOT"):
return self.edgecost(self.root)
if self.is_leaf(node):
return 0
else:
ln,lw = self.left(node)
rn,rw = self.right(node)
return lw+rw+self.edgecost(ln)+self.edgecost(rn)
ln,lw = self.left(node)
rn,rw = self.right(node)
return lw+rw+self.edgecost(ln)+self.edgecost(rn)
Comment on lines -54 to +56
Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Function BinaryTree.edgecost refactored with the following changes:


def copy(self):
return BinaryTree(copy.deepcopy(self.nodes),copy.deepcopy(self.edges),self.root,self.nodes[self.root])
Expand All @@ -74,10 +73,10 @@ def merge(self,other,fv,fe): # Can be improved a lot
weight1 = edge1[1]
for edge2 in other.edges[f]:
t2 = edge2[0]
weight2= edge2[1]
if t1==t2:
weight2= edge2[1]
final_edges[f].append((t1,fe(weight1,weight2)))

Comment on lines -77 to +79
Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Function BinaryTree.merge refactored with the following changes:

self.edges = final_edges
return self

Expand Down
29 changes: 9 additions & 20 deletions bioinformatics/algorithms/burrows_wheeler.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,22 +8,11 @@ def bwt(text, s_array=None):
:param s_array: suffix array
:return: BWT of text
"""
# bwt = ""
# for i in s_array:
# bwt += text[i]
# return bwt
if s_array is None:
c_rot = [text[i:] + text[:i] for i in range(len(text))]
c_rot = sorted(c_rot)
return "".join(map(lambda x: x[-1], c_rot))
else:
output = ""
for s_i in s_array:
if s_i == 0:
output += text[-1]
else:
output += text[s_i-1]
return output
if s_array is not None:
return "".join(text[-1] if s_i == 0 else text[s_i-1] for s_i in s_array)
c_rot = [text[i:] + text[:i] for i in range(len(text))]
c_rot = sorted(c_rot)
return "".join(map(lambda x: x[-1], c_rot))
Comment on lines -11 to +15
Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Function bwt refactored with the following changes:

This removes the following comments ( why? ):

# for i in s_array:
#    bwt += text[i]
# bwt = ""
# return bwt



def number_letters(text):
Expand Down Expand Up @@ -104,7 +93,7 @@ def bwt_matching(first_col, last_col, pattern, ltf_mapping): # O(n^2) but can b
def bwt_matching_all(bwt, patterns):
f_col, l_col = number_letters(first_col_from_bwt(bwt)), number_letters(bwt)
ltf_mapping = last_to_first_mapping(bwt)
matches = []
for pattern in patterns:
matches.append(bwt_matching(f_col, l_col, pattern, ltf_mapping))
return matches
return [
bwt_matching(f_col, l_col, pattern, ltf_mapping)
for pattern in patterns
]
Comment on lines -107 to +99
Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Function bwt_matching_all refactored with the following changes:

14 changes: 7 additions & 7 deletions bioinformatics/algorithms/clustering.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,15 +3,17 @@


def distance(x, y):
return sum([(a - b)**2 for a, b in zip(x, y)])**0.5
return sum((a - b)**2 for a, b in zip(x, y))**0.5
Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Function distance refactored with the following changes:



def dist_to_cluster(point, centres): # how close is the point to the cluster it is assigned to (i.e. the nearest centre)?
return min([distance(point, centre) for centre in centres])
return min(distance(point, centre) for centre in centres)
Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Function dist_to_cluster refactored with the following changes:



def distortion(points, centres):
return sum([dist_to_cluster(point, centres)**2 for point in points])/len(points)
return sum(dist_to_cluster(point, centres) ** 2 for point in points) / len(
points
)
Comment on lines -14 to +16
Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Function distortion refactored with the following changes:



def farthest_first_clustering(points, k):
Expand All @@ -34,7 +36,7 @@ def assign_to_cluster(point, centres):


def add_vector(a, b):
if b == None:
if b is None:
Comment on lines -37 to +39
Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Function add_vector refactored with the following changes:

print("STOP")
return tuple(map(lambda x: x[0]+x[1], zip(a, b)))

Expand Down Expand Up @@ -72,9 +74,7 @@ def lloyd_kmeans(points, k, initializer=False):


def get_responsibilities(point, centres, beta):
h_vals = []
for centre in centres:
h_vals.append(exp(-beta*distance(point, centre)))
h_vals = [exp(-beta*distance(point, centre)) for centre in centres]
Comment on lines -75 to +77
Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Function get_responsibilities refactored with the following changes:

return [h_val/sum(h_vals) for h_val in h_vals]


Expand Down
8 changes: 3 additions & 5 deletions bioinformatics/algorithms/eulerian_cycle.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ def random_cycle(start, graph: Graph):
edges = graph.edges
cycle = [start]
current = start
while(current != start or len(cycle) == 1):
while current != current or len(cycle) == 1:
Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Function random_cycle refactored with the following changes:

nxt = edges[current].pop()
if len(edges[current]) == 0:
del edges[current]
Expand Down Expand Up @@ -44,14 +44,12 @@ def aux(subset: set, ends):
if (subset, ends) in memo:
return memo[(subset, ends)]
if len(subset) == 1:
if ends == subset.pop():
return [ends]
return None
return [ends] if ends == subset.pop() else None
for neighbour in graph.edges[ends]:
if neighbour not in subset:
continue
subpath = aux(set.difference(subset, [ends]), neighbour)
if subpath == None:
if subpath is None:
Comment on lines -47 to +52
Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Function hamiltonian_path refactored with the following changes:

continue
res = subpath+[ends]
memo[(subset, ends)] = res
Expand Down
13 changes: 5 additions & 8 deletions bioinformatics/algorithms/four_russians_binary_encoding.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,12 +20,12 @@ def fill_graph_needleman(graph,indel,offset=0,left=None,top=None):
if r==c==0:
graph.set(r,c,offset)
elif r==0:
if top==None:
if top is None:
graph.set(r,c,graph.pos(r,c-1)+indel,2)
else:
graph.set(r,c,top[c-1])
elif c==0:
if left==None:
if left is None:
Comment on lines -23 to +28
Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Function fill_graph_needleman refactored with the following changes:

graph.set(r,c,graph.pos(r-1,c)+indel,1)
else:
graph.set(r,c,left[r-1])
Expand All @@ -45,14 +45,11 @@ def __init__(self,chars,t,empty=False):
self.empty= empty

def random(self):
output = ""
for i in range(self.t):
output += random.choice(self.chars)
return output
return "".join(random.choice(self.chars) for _ in range(self.t))
Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Function Sequence.random refactored with the following changes:

def objWithNum(self,i):
output = ""
v = i
for ind in range(self.t):
for _ in range(self.t):
Comment on lines -55 to +52
Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Function Sequence.objWithNum refactored with the following changes:

output = str(self.chars[v%len(self.chars)]) +output
v //= len(self.chars)
return output
Expand Down Expand Up @@ -87,7 +84,7 @@ def __next__(self):
def accumulate_diff(diffs):
o = []
for i in diffs:
if len(o) > 0:
if o:
Comment on lines -90 to +87
Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Function accumulate_diff refactored with the following changes:

o.append(i+o[-1])
else:
o.append(i)
Expand Down
4 changes: 1 addition & 3 deletions bioinformatics/algorithms/hmm.py
Original file line number Diff line number Diff line change
Expand Up @@ -68,9 +68,7 @@ def backward(self):
def __getitem__(self, key):
if key == "START":
return self.graph[0, "START"]
if key == "END":
return self.graph[len(self.x)+1, "END"]
return self.graph[key]
return self.graph[len(self.x)+1, "END"] if key == "END" else self.graph[key]
Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Function ViterbiGraph.__getitem__ refactored with the following changes:



class HMM:
Expand Down
28 changes: 15 additions & 13 deletions bioinformatics/algorithms/middle_node.py
Original file line number Diff line number Diff line change
Expand Up @@ -97,17 +97,21 @@
'Z': {'A':1, 'R':2, 'N':3, 'D':4, 'C':-4, 'Q':5, 'E':5, 'G':1, 'H':3, 'I':-1, 'L':-1, 'K':2, 'M':0, 'F':-4,'P': 1, 'S':1, 'T':1,'W': -4, 'Y':-3, 'V':0, 'B':5, 'Z':6}}

def blosum62score(a,b):
if (a,b) in blosum62:
return blosum62[(a,b)]
return blosum62[(b,a)]
return blosum62[(a,b)] if (a,b) in blosum62 else blosum62[(b,a)]
Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Function blosum62score refactored with the following changes:


def prefix(s1,s2,col):
height = len(s1)
c1 = [i*indel for i in range(0,height+1)]
for i in range(1,col+1):
for _ in range(1,col+1):
c2 = [c1[0]+indel]
for row in range(1,height+1):
c2.append(max(c2[row-1]+indel,c1[row]+indel,c1[row-1]+blosum62score(s1[row-1],s2[col-1])))
c2.extend(
max(
c2[row - 1] + indel,
c1[row] + indel,
c1[row - 1] + blosum62score(s1[row - 1], s2[col - 1]),
)
for row in range(1, height + 1)
)
Comment on lines -107 to +114
Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Function prefix refactored with the following changes:

c1 = c2
return c1

Expand Down Expand Up @@ -147,13 +151,11 @@ def middleEdge(s1,s2):
print(middleNode(s1,s2))

def rosalind(filei,filej,func):
f = open(filei,"r")
w1 = f.readline().rstrip('\n')
w2 = f.readline().rstrip('\n')
f.close()
with open(filei,"r") as f:
w1 = f.readline().rstrip('\n')
w2 = f.readline().rstrip('\n')
o = func(w1,w2)
f = open(filej,"w")
f.writelines(str(o))
f.close()
with open(filej,"w") as f:
f.writelines(str(o))
Comment on lines -150 to +159
Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Function rosalind refactored with the following changes:


rosalind("i1.txt","o1.txt",middleEdge)
4 changes: 2 additions & 2 deletions bioinformatics/algorithms/needleman_wunsch.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,12 +35,12 @@ def trace_pointers_needleman(graph): #just goes for diagonal then insert then de
o2 = graph.s2[c-1]+o2
pointer = (r-1,c-1)
elif p>=2:
o1 = "-"+o1
o1 = f"-{o1}"
o2 = graph.s2[c-1]+o2
pointer = (r,c-1)
elif p==1:
o1 = graph.s1[r-1]+o1
o2 = "-"+o2
o2 = f"-{o2}"
Comment on lines -38 to +43
Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Function trace_pointers_needleman refactored with the following changes:

pointer = (r-1,c)
else:
print("ERROR")
Expand Down
6 changes: 4 additions & 2 deletions bioinformatics/algorithms/neighbour_joining.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
import math


def neighbour_joining(d_mat: DistanceMatrix, inner_node_next_label=None): # DO NOT USE heaping distance matrix
def neighbour_joining(d_mat: DistanceMatrix, inner_node_next_label=None): # DO NOT USE heaping distance matrix
Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Function neighbour_joining refactored with the following changes:

"""Performs the neighbour joining algorithm on the given distance matrix. This is a recurisve algorithm, so base and inductive cases need to be considered. It is also a greedy algorithm in which the closest pair of nodes in the augmented distance matrix are joined.

Args:
Expand All @@ -30,7 +30,9 @@ def neighbour_joining(d_mat: DistanceMatrix, inner_node_next_label=None): # DO

# INDUCTIVE CASE
# 1) Compute total_distance for each element in distance matrix
total_distance = {i: sum([d_mat.get(i, k) for k in d_mat.names]) for i in d_mat.names}
total_distance = {
i: sum(d_mat.get(i, k) for k in d_mat.names) for i in d_mat.names
}

# 2) Find nodes which are closest to one another in D*, which is the same matrix except D*i,j = (n-2)*Dij - TotalDistance(i) - TotalDistance(j). The matrix is not explicitly stored, but its entries are searched.
nexti, nextj, bestscore = None, None, math.inf
Expand Down
3 changes: 1 addition & 2 deletions bioinformatics/algorithms/phylogeny_tree.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,8 +37,7 @@ def is_neighbour(self, i, k):

def shortest_path(self, i, k):
queue = [i]
visited = set()
visited.add(i)
visited = {i}
Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Function AdditivePhylogenyTree.shortest_path refactored with the following changes:

parents = {i: None}
while queue:
node = queue.pop(0)
Expand Down
3 changes: 1 addition & 2 deletions bioinformatics/algorithms/reconstruct_genome_string.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,8 +23,7 @@ def construct_k_universal_circular_string(k:int):
kmers = list(Sequence(["0","1"],k))
graph = kmers_to_debruijn(kmers)
genome_path = eulerian_cycle(graph)
const = reconstruct_from_genome_path(genome_path)[:-(k-1)]
return const
return reconstruct_from_genome_path(genome_path)[:-(k-1)]
Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Function construct_k_universal_circular_string refactored with the following changes:


def reconstruct_from_paired_kmers(paired_kmers:list,k:int,d:int):
graph = paired_kmers_to_debruijn(paired_kmers)
Expand Down
2 changes: 1 addition & 1 deletion bioinformatics/algorithms/scoring_functions.py
Original file line number Diff line number Diff line change
Expand Up @@ -94,7 +94,7 @@
'B': {'A': 2, 'R': 1, 'N': 4, 'D': 5, 'C': -3, 'Q': 3, 'E': 4, 'G': 2, 'H': 3, 'I': -1, 'L': -2, 'K': 2, 'M': -1, 'F': -3, 'P': 1, 'S': 2, 'T': 2, 'W': -4, 'Y': -2, 'V': 0, 'B': 6, 'Z': 5},
'Z': {'A': 1, 'R': 2, 'N': 3, 'D': 4, 'C': -4, 'Q': 5, 'E': 5, 'G': 1, 'H': 3, 'I': -1, 'L': -1, 'K': 2, 'M': 0, 'F': -4, 'P': 1, 'S': 1, 'T': 1, 'W': -4, 'Y': -3, 'V': 0, 'B': 5, 'Z': 6}}

Blosum62 = lambda x,y:(blosum62_matrix[(x,y)] if (x,y) in blosum62_matrix else blosum62_matrix[(y,x)])
Blosum62 = lambda x,y: blosum62_matrix.get((x, y), blosum62_matrix[(y,x)])
Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Lines 97-97 refactored with the following changes:

  • Simplify dictionary access using default get (default-get)


PAM250 = lambda x,y:PAM250_matrix[(x,y)]

Expand Down
15 changes: 8 additions & 7 deletions bioinformatics/algorithms/sequencing_graph.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,10 +11,11 @@ def __init__(self, nodes:set,edges:dict):
self.edges = {i:edges[i] for i in edges if len(edges[i])!=0}

def __str__(self):
output = ""
for so in self.edges:
if len(self.edges[so])!=0:
output += str(so) + " -> " + ",".join(map(str,self.edges[so])) + "\n"
output = "".join(
f"{str(so)} -> " + ",".join(map(str, self.edges[so])) + "\n"
for so in self.edges
if len(self.edges[so]) != 0
)
Comment on lines -14 to +18
Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Function Graph.__str__ refactored with the following changes:

return output.strip()

def __repr__(self):
Expand All @@ -29,11 +30,11 @@ def add_edge(self,a,b):
if a in self.edges:
self.edges[a].add(b)
else:
self.edges[a] = set([b])
self.edges[a] = {b}
Comment on lines -32 to +33
Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Function Graph.add_edge refactored with the following changes:

class Overlap_Graph(Graph):
def __init__(self, patterns):
nodes = set(patterns)
edges = {i:set([j for j in patterns if prefix(j)==suffix(i)])for i in patterns}
edges = {i: {j for j in patterns if prefix(j)==suffix(i)} for i in patterns}
Comment on lines -36 to +37
Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Function Overlap_Graph.__init__ refactored with the following changes:

Graph.__init__(self,nodes,edges)

class DeBruijn_Graph(Graph):
Expand All @@ -44,7 +45,7 @@ def __init__(self, nodes,edges):

def text_to_debruijn(k,text):
raw_edges = [(text[i:i+k-1],text[i+1:i+k]) for i in range(len(text)-k+1)]
nodes = set([i for i,j in raw_edges])
nodes = {i for i,j in raw_edges}
Comment on lines -47 to +48
Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Function text_to_debruijn refactored with the following changes:

edges = {i:set() for i in nodes}
for i,j in raw_edges:
edges[i].add(j)
Expand Down
Loading