A-F-V · SourceryAI · Feb 17, 2024 · SourceryAI · Feb 17, 2024 · SourceryAI
diff --git a/bioinformatics/algorithms/alignment_graph.py b/bioinformatics/algorithms/alignment_graph.py
@@ -35,7 +35,5 @@ def score(self,r,c):
 
     def prnt(self):
         for r in range(len(self.s1)+1):
-            o = []
-            for c in range(len(self.s2)+1):
-                o.append(self.pos(r,c))
+            o = [self.pos(r,c) for c in range(len(self.s2)+1)]
             print(o)
diff --git a/bioinformatics/algorithms/binary_search.py b/bioinformatics/algorithms/binary_search.py
@@ -15,13 +15,13 @@ def search(self, first=True):
                 left = mid + 1
             elif comp_val > 0:
                 right = mid
-            else:
-                if first:
-                    if mid == self.start or self.comparer(self.emitter(mid-1)) != 0:
-                        return mid
-                    right = mid
+            elif first:
+                if mid == self.start or self.comparer(self.emitter(mid-1)) != 0:
+                    return mid
                 else:
-                    if mid == self.end or self.comparer(self.emitter(mid+1)) != 0:
-                        return mid
-                    left = mid
+                    right = mid
+            elif mid == self.end or self.comparer(self.emitter(mid+1)) != 0:
+                return mid
+            else:
+                left = mid
         return left if self.comparer(self.emitter(left)) == 0 else None
diff --git a/bioinformatics/algorithms/binary_tree.py b/bioinformatics/algorithms/binary_tree.py
@@ -9,7 +9,7 @@ def __init__(self,nodes,edges,root=None,val=0):
         self.nodes = nodes #a dict of node name to val pairs
         self.edges = edges # a dict of node name to a list of neighbours (child name, edgeval)
         self.root = root #the name of the root
-        if self.root ==None:
+        if self.root is None:
             self.root = list(set(self.edges.keys()) - set(self.nodes.keys()))[0]
             self.nodes[self.root] = val
 
@@ -20,7 +20,7 @@ def leaves(self,node="ROOT"):
 
         if node == "ROOT":
             return self.leaves(self.root)
-        if node==None:
+        if node is None:
             return []
         if self.is_leaf(node):
             return [node]
@@ -51,10 +51,9 @@ def edgecost(self,node="ROOT"):
             return self.edgecost(self.root)
         if self.is_leaf(node):
             return 0
-        else:
-            ln,lw = self.left(node)
-            rn,rw = self.right(node)
-            return lw+rw+self.edgecost(ln)+self.edgecost(rn)
+        ln,lw = self.left(node)
+        rn,rw = self.right(node)
+        return lw+rw+self.edgecost(ln)+self.edgecost(rn)
 
     def copy(self):
         return BinaryTree(copy.deepcopy(self.nodes),copy.deepcopy(self.edges),self.root,self.nodes[self.root])
@@ -74,10 +73,10 @@ def merge(self,other,fv,fe): # Can be improved a lot
                 weight1 = edge1[1]
                 for edge2 in other.edges[f]:
                     t2 = edge2[0]
-                    weight2= edge2[1]
                     if t1==t2:
+                        weight2= edge2[1]
                         final_edges[f].append((t1,fe(weight1,weight2)))
-                        
+
         self.edges = final_edges
         return self
 

diff --git a/bioinformatics/algorithms/burrows_wheeler.py b/bioinformatics/algorithms/burrows_wheeler.py
@@ -8,22 +8,11 @@ def bwt(text, s_array=None):
     :param s_array: suffix array
     :return: BWT of text
     """
-    # bwt = ""
-    # for i in s_array:
-    #    bwt += text[i]
-    # return bwt
-    if s_array is None:
-        c_rot = [text[i:] + text[:i] for i in range(len(text))]
-        c_rot = sorted(c_rot)
-        return "".join(map(lambda x: x[-1], c_rot))
-    else:
-        output = ""
-        for s_i in s_array:
-            if s_i == 0:
-                output += text[-1]
-            else:
-                output += text[s_i-1]
-        return output
+    if s_array is not None:
+        return "".join(text[-1] if s_i == 0 else text[s_i-1] for s_i in s_array)
+    c_rot = [text[i:] + text[:i] for i in range(len(text))]
+    c_rot = sorted(c_rot)
+    return "".join(map(lambda x: x[-1], c_rot))
 
 
 def number_letters(text):
@@ -104,7 +93,7 @@ def bwt_matching(first_col, last_col, pattern, ltf_mapping):  # O(n^2) but can b
 def bwt_matching_all(bwt, patterns):
     f_col, l_col = number_letters(first_col_from_bwt(bwt)), number_letters(bwt)
     ltf_mapping = last_to_first_mapping(bwt)
-    matches = []
-    for pattern in patterns:
-        matches.append(bwt_matching(f_col, l_col, pattern, ltf_mapping))
-    return matches
+    return [
+        bwt_matching(f_col, l_col, pattern, ltf_mapping)
+        for pattern in patterns
+    ]
diff --git a/bioinformatics/algorithms/clustering.py b/bioinformatics/algorithms/clustering.py
@@ -3,15 +3,17 @@
 
 
 def distance(x, y):
-    return sum([(a - b)**2 for a, b in zip(x, y)])**0.5
+    return sum((a - b)**2 for a, b in zip(x, y))**0.5
 
 
 def dist_to_cluster(point, centres):  # how close is the point to the cluster it is assigned to (i.e. the nearest centre)?
-    return min([distance(point, centre) for centre in centres])
+    return min(distance(point, centre) for centre in centres)
 
 
 def distortion(points, centres):
-    return sum([dist_to_cluster(point, centres)**2 for point in points])/len(points)
+    return sum(dist_to_cluster(point, centres) ** 2 for point in points) / len(
+        points
+    )
 
 
 def farthest_first_clustering(points, k):
@@ -34,7 +36,7 @@ def assign_to_cluster(point, centres):
 
 
 def add_vector(a, b):
-    if b == None:
+    if b is None:
         print("STOP")
     return tuple(map(lambda x: x[0]+x[1], zip(a, b)))
 
@@ -72,9 +74,7 @@ def lloyd_kmeans(points, k, initializer=False):
 
 
 def get_responsibilities(point, centres, beta):
-    h_vals = []
-    for centre in centres:
-        h_vals.append(exp(-beta*distance(point, centre)))
+    h_vals = [exp(-beta*distance(point, centre)) for centre in centres]
     return [h_val/sum(h_vals) for h_val in h_vals]
 
 

diff --git a/bioinformatics/algorithms/eulerian_cycle.py b/bioinformatics/algorithms/eulerian_cycle.py
@@ -10,7 +10,7 @@ def random_cycle(start, graph: Graph):
     edges = graph.edges
     cycle = [start]
     current = start
-    while(current != start or len(cycle) == 1):
+    while current != current or len(cycle) == 1:
         nxt = edges[current].pop()
         if len(edges[current]) == 0:
             del edges[current]
@@ -44,14 +44,12 @@ def aux(subset: set, ends):
         if (subset, ends) in memo:
             return memo[(subset, ends)]
         if len(subset) == 1:
-            if ends == subset.pop():
-                return [ends]
-            return None
+            return [ends] if ends == subset.pop() else None
         for neighbour in graph.edges[ends]:
             if neighbour not in subset:
                 continue
             subpath = aux(set.difference(subset, [ends]), neighbour)
-            if subpath == None:
+            if subpath is None:
                 continue
             res = subpath+[ends]
             memo[(subset, ends)] = res

diff --git a/bioinformatics/algorithms/four_russians_binary_encoding.py b/bioinformatics/algorithms/four_russians_binary_encoding.py
@@ -20,12 +20,12 @@ def fill_graph_needleman(graph,indel,offset=0,left=None,top=None):
             if r==c==0:
                 graph.set(r,c,offset)
             elif r==0:
-                if top==None:
+                if top is None:
                     graph.set(r,c,graph.pos(r,c-1)+indel,2)
                 else:
                     graph.set(r,c,top[c-1])
             elif c==0:
-                if left==None:
+                if left is None:
                     graph.set(r,c,graph.pos(r-1,c)+indel,1)
                 else:
                     graph.set(r,c,left[r-1])
@@ -45,14 +45,11 @@ def __init__(self,chars,t,empty=False):
         self.empty= empty
 
     def random(self):
-        output = ""
-        for i in range(self.t):
-            output += random.choice(self.chars)
-        return output
+        return "".join(random.choice(self.chars) for _ in range(self.t))
     def objWithNum(self,i):
         output = ""
         v = i
-        for ind in range(self.t):
+        for _ in range(self.t):
             output = str(self.chars[v%len(self.chars)]) +output
             v //= len(self.chars)
         return output
@@ -87,7 +84,7 @@ def __next__(self):
 def accumulate_diff(diffs):
     o = []
     for i in diffs:
-        if len(o) > 0:
+        if o:
             o.append(i+o[-1])
         else:
             o.append(i)

diff --git a/bioinformatics/algorithms/hmm.py b/bioinformatics/algorithms/hmm.py
@@ -68,9 +68,7 @@ def backward(self):
     def __getitem__(self, key):
         if key == "START":
             return self.graph[0, "START"]
-        if key == "END":
-            return self.graph[len(self.x)+1, "END"]
-        return self.graph[key]
+        return self.graph[len(self.x)+1, "END"] if key == "END" else self.graph[key]
 
 
 class HMM:

diff --git a/bioinformatics/algorithms/middle_node.py b/bioinformatics/algorithms/middle_node.py
@@ -97,17 +97,21 @@
 'Z': {'A':1, 'R':2, 'N':3, 'D':4, 'C':-4, 'Q':5, 'E':5, 'G':1, 'H':3, 'I':-1, 'L':-1, 'K':2, 'M':0, 'F':-4,'P': 1, 'S':1, 'T':1,'W': -4, 'Y':-3, 'V':0, 'B':5, 'Z':6}}
 
 def blosum62score(a,b):
-    if (a,b) in blosum62:
-        return blosum62[(a,b)]
-    return blosum62[(b,a)]
+    return blosum62[(a,b)] if (a,b) in blosum62 else blosum62[(b,a)]
 
 def prefix(s1,s2,col):
     height = len(s1)
     c1 = [i*indel for i in range(0,height+1)]
-    for i in range(1,col+1):
+    for _ in range(1,col+1):
         c2 = [c1[0]+indel]
-        for row in range(1,height+1):
-            c2.append(max(c2[row-1]+indel,c1[row]+indel,c1[row-1]+blosum62score(s1[row-1],s2[col-1])))
+        c2.extend(
+            max(
+                c2[row - 1] + indel,
+                c1[row] + indel,
+                c1[row - 1] + blosum62score(s1[row - 1], s2[col - 1]),
+            )
+            for row in range(1, height + 1)
+        )
         c1 = c2
     return c1
 
@@ -147,13 +151,11 @@ def middleEdge(s1,s2):
 print(middleNode(s1,s2))
 
 def rosalind(filei,filej,func):
-    f = open(filei,"r")
-    w1 = f.readline().rstrip('\n')
-    w2 = f.readline().rstrip('\n')
-    f.close()
+    with open(filei,"r") as f:
+        w1 = f.readline().rstrip('\n')
+        w2 = f.readline().rstrip('\n')
     o  = func(w1,w2)
-    f = open(filej,"w")
-    f.writelines(str(o))
-    f.close()
+    with open(filej,"w") as f:
+        f.writelines(str(o))
 
 rosalind("i1.txt","o1.txt",middleEdge)
diff --git a/bioinformatics/algorithms/needleman_wunsch.py b/bioinformatics/algorithms/needleman_wunsch.py
@@ -35,12 +35,12 @@ def trace_pointers_needleman(graph): #just goes for diagonal then insert then de
             o2 = graph.s2[c-1]+o2
             pointer = (r-1,c-1)
         elif p>=2:
-            o1 = "-"+o1
+            o1 = f"-{o1}"
             o2 = graph.s2[c-1]+o2
             pointer = (r,c-1)
         elif p==1:
             o1 = graph.s1[r-1]+o1
-            o2 = "-"+o2
+            o2 = f"-{o2}"
             pointer = (r-1,c)
         else:
             print("ERROR")

diff --git a/bioinformatics/algorithms/neighbour_joining.py b/bioinformatics/algorithms/neighbour_joining.py
@@ -5,7 +5,7 @@
 import math
 
 
-def neighbour_joining(d_mat: DistanceMatrix, inner_node_next_label=None):  # DO NOT USE heaping distance matrix
+def neighbour_joining(d_mat: DistanceMatrix, inner_node_next_label=None):    # DO NOT USE heaping distance matrix
     """Performs the neighbour joining algorithm on the given distance matrix.  This is a recurisve algorithm, so base and inductive cases need to be considered. It is also a greedy algorithm in which the closest pair of nodes in the augmented distance matrix are joined.
 
     Args:
@@ -30,7 +30,9 @@ def neighbour_joining(d_mat: DistanceMatrix, inner_node_next_label=None):  # DO
 
     # INDUCTIVE CASE
     # 1) Compute total_distance for each element in distance matrix
-    total_distance = {i: sum([d_mat.get(i, k) for k in d_mat.names]) for i in d_mat.names}
+    total_distance = {
+        i: sum(d_mat.get(i, k) for k in d_mat.names) for i in d_mat.names
+    }
 
     # 2) Find nodes which are closest to one another in D*, which is the same matrix except D*i,j = (n-2)*Dij - TotalDistance(i) - TotalDistance(j). The matrix is not explicitly stored, but its entries are searched.
     nexti, nextj, bestscore = None, None, math.inf

diff --git a/bioinformatics/algorithms/phylogeny_tree.py b/bioinformatics/algorithms/phylogeny_tree.py
@@ -37,8 +37,7 @@ def is_neighbour(self, i, k):
 
     def shortest_path(self, i, k):
         queue = [i]
-        visited = set()
-        visited.add(i)
+        visited = {i}
         parents = {i: None}
         while queue:
             node = queue.pop(0)

diff --git a/bioinformatics/algorithms/reconstruct_genome_string.py b/bioinformatics/algorithms/reconstruct_genome_string.py
@@ -23,8 +23,7 @@ def construct_k_universal_circular_string(k:int):
     kmers = list(Sequence(["0","1"],k))
     graph = kmers_to_debruijn(kmers)
     genome_path = eulerian_cycle(graph)
-    const = reconstruct_from_genome_path(genome_path)[:-(k-1)]
-    return const
+    return reconstruct_from_genome_path(genome_path)[:-(k-1)]
 
 def reconstruct_from_paired_kmers(paired_kmers:list,k:int,d:int):
     graph = paired_kmers_to_debruijn(paired_kmers)

diff --git a/bioinformatics/algorithms/scoring_functions.py b/bioinformatics/algorithms/scoring_functions.py
@@ -94,7 +94,7 @@
           'B': {'A': 2, 'R': 1, 'N': 4, 'D': 5, 'C': -3, 'Q': 3, 'E': 4, 'G': 2, 'H': 3, 'I': -1, 'L': -2, 'K': 2, 'M': -1, 'F': -3, 'P': 1, 'S': 2, 'T': 2, 'W': -4, 'Y': -2, 'V': 0, 'B': 6, 'Z': 5},
           'Z': {'A': 1, 'R': 2, 'N': 3, 'D': 4, 'C': -4, 'Q': 5, 'E': 5, 'G': 1, 'H': 3, 'I': -1, 'L': -1, 'K': 2, 'M': 0, 'F': -4, 'P': 1, 'S': 1, 'T': 1, 'W': -4, 'Y': -3, 'V': 0, 'B': 5, 'Z': 6}}
 
-Blosum62 = lambda x,y:(blosum62_matrix[(x,y)] if (x,y) in blosum62_matrix else blosum62_matrix[(y,x)])
+Blosum62 = lambda x,y: blosum62_matrix.get((x, y), blosum62_matrix[(y,x)])
 
 PAM250 = lambda x,y:PAM250_matrix[(x,y)]
 

diff --git a/bioinformatics/algorithms/sequencing_graph.py b/bioinformatics/algorithms/sequencing_graph.py
@@ -11,10 +11,11 @@ def __init__(self, nodes:set,edges:dict):
         self.edges = {i:edges[i] for i in edges if len(edges[i])!=0}
 
     def __str__(self):
-        output = ""
-        for so in self.edges:
-            if len(self.edges[so])!=0:
-                output += str(so) + " -> " + ",".join(map(str,self.edges[so])) + "\n"
+        output = "".join(
+            f"{str(so)} -> " + ",".join(map(str, self.edges[so])) + "\n"
+            for so in self.edges
+            if len(self.edges[so]) != 0
+        )
         return output.strip()
 
     def __repr__(self):
@@ -29,11 +30,11 @@ def add_edge(self,a,b):
         if a in self.edges:
             self.edges[a].add(b)
         else:
-            self.edges[a] = set([b])
+            self.edges[a] = {b}
 class Overlap_Graph(Graph):
     def __init__(self, patterns):
         nodes = set(patterns)
-        edges = {i:set([j for j in patterns if prefix(j)==suffix(i)])for i in patterns}
+        edges = {i: {j for j in patterns if prefix(j)==suffix(i)} for i in patterns}
         Graph.__init__(self,nodes,edges)
 
 class DeBruijn_Graph(Graph):
@@ -44,7 +45,7 @@ def __init__(self, nodes,edges):
 
 def text_to_debruijn(k,text):
     raw_edges = [(text[i:i+k-1],text[i+1:i+k]) for i in range(len(text)-k+1)]
-    nodes = set([i for i,j in raw_edges])
+    nodes = {i for i,j in raw_edges}
     edges = {i:set() for i in nodes}
     for i,j in raw_edges:
         edges[i].add(j)