diff --git a/bioinformatics/algorithms/alignment_graph.py b/bioinformatics/algorithms/alignment_graph.py index 23745b7..ce4b122 100644 --- a/bioinformatics/algorithms/alignment_graph.py +++ b/bioinformatics/algorithms/alignment_graph.py @@ -35,7 +35,5 @@ def score(self,r,c): def prnt(self): for r in range(len(self.s1)+1): - o = [] - for c in range(len(self.s2)+1): - o.append(self.pos(r,c)) + o = [self.pos(r,c) for c in range(len(self.s2)+1)] print(o) \ No newline at end of file diff --git a/bioinformatics/algorithms/binary_search.py b/bioinformatics/algorithms/binary_search.py index 44b75ed..6b74bc7 100644 --- a/bioinformatics/algorithms/binary_search.py +++ b/bioinformatics/algorithms/binary_search.py @@ -15,13 +15,13 @@ def search(self, first=True): left = mid + 1 elif comp_val > 0: right = mid - else: - if first: - if mid == self.start or self.comparer(self.emitter(mid-1)) != 0: - return mid - right = mid + elif first: + if mid == self.start or self.comparer(self.emitter(mid-1)) != 0: + return mid else: - if mid == self.end or self.comparer(self.emitter(mid+1)) != 0: - return mid - left = mid + right = mid + elif mid == self.end or self.comparer(self.emitter(mid+1)) != 0: + return mid + else: + left = mid return left if self.comparer(self.emitter(left)) == 0 else None diff --git a/bioinformatics/algorithms/binary_tree.py b/bioinformatics/algorithms/binary_tree.py index e0d9266..0bb3e8d 100644 --- a/bioinformatics/algorithms/binary_tree.py +++ b/bioinformatics/algorithms/binary_tree.py @@ -9,7 +9,7 @@ def __init__(self,nodes,edges,root=None,val=0): self.nodes = nodes #a dict of node name to val pairs self.edges = edges # a dict of node name to a list of neighbours (child name, edgeval) self.root = root #the name of the root - if self.root ==None: + if self.root is None: self.root = list(set(self.edges.keys()) - set(self.nodes.keys()))[0] self.nodes[self.root] = val @@ -20,7 +20,7 @@ def leaves(self,node="ROOT"): if node == "ROOT": return self.leaves(self.root) - if node==None: + if node is None: return [] if self.is_leaf(node): return [node] @@ -51,10 +51,9 @@ def edgecost(self,node="ROOT"): return self.edgecost(self.root) if self.is_leaf(node): return 0 - else: - ln,lw = self.left(node) - rn,rw = self.right(node) - return lw+rw+self.edgecost(ln)+self.edgecost(rn) + ln,lw = self.left(node) + rn,rw = self.right(node) + return lw+rw+self.edgecost(ln)+self.edgecost(rn) def copy(self): return BinaryTree(copy.deepcopy(self.nodes),copy.deepcopy(self.edges),self.root,self.nodes[self.root]) @@ -74,10 +73,10 @@ def merge(self,other,fv,fe): # Can be improved a lot weight1 = edge1[1] for edge2 in other.edges[f]: t2 = edge2[0] - weight2= edge2[1] if t1==t2: + weight2= edge2[1] final_edges[f].append((t1,fe(weight1,weight2))) - + self.edges = final_edges return self diff --git a/bioinformatics/algorithms/burrows_wheeler.py b/bioinformatics/algorithms/burrows_wheeler.py index b41aefa..4d2981b 100644 --- a/bioinformatics/algorithms/burrows_wheeler.py +++ b/bioinformatics/algorithms/burrows_wheeler.py @@ -8,22 +8,11 @@ def bwt(text, s_array=None): :param s_array: suffix array :return: BWT of text """ - # bwt = "" - # for i in s_array: - # bwt += text[i] - # return bwt - if s_array is None: - c_rot = [text[i:] + text[:i] for i in range(len(text))] - c_rot = sorted(c_rot) - return "".join(map(lambda x: x[-1], c_rot)) - else: - output = "" - for s_i in s_array: - if s_i == 0: - output += text[-1] - else: - output += text[s_i-1] - return output + if s_array is not None: + return "".join(text[-1] if s_i == 0 else text[s_i-1] for s_i in s_array) + c_rot = [text[i:] + text[:i] for i in range(len(text))] + c_rot = sorted(c_rot) + return "".join(map(lambda x: x[-1], c_rot)) def number_letters(text): @@ -104,7 +93,7 @@ def bwt_matching(first_col, last_col, pattern, ltf_mapping): # O(n^2) but can b def bwt_matching_all(bwt, patterns): f_col, l_col = number_letters(first_col_from_bwt(bwt)), number_letters(bwt) ltf_mapping = last_to_first_mapping(bwt) - matches = [] - for pattern in patterns: - matches.append(bwt_matching(f_col, l_col, pattern, ltf_mapping)) - return matches + return [ + bwt_matching(f_col, l_col, pattern, ltf_mapping) + for pattern in patterns + ] diff --git a/bioinformatics/algorithms/clustering.py b/bioinformatics/algorithms/clustering.py index 58059c9..6537b11 100644 --- a/bioinformatics/algorithms/clustering.py +++ b/bioinformatics/algorithms/clustering.py @@ -3,15 +3,17 @@ def distance(x, y): - return sum([(a - b)**2 for a, b in zip(x, y)])**0.5 + return sum((a - b)**2 for a, b in zip(x, y))**0.5 def dist_to_cluster(point, centres): # how close is the point to the cluster it is assigned to (i.e. the nearest centre)? - return min([distance(point, centre) for centre in centres]) + return min(distance(point, centre) for centre in centres) def distortion(points, centres): - return sum([dist_to_cluster(point, centres)**2 for point in points])/len(points) + return sum(dist_to_cluster(point, centres) ** 2 for point in points) / len( + points + ) def farthest_first_clustering(points, k): @@ -34,7 +36,7 @@ def assign_to_cluster(point, centres): def add_vector(a, b): - if b == None: + if b is None: print("STOP") return tuple(map(lambda x: x[0]+x[1], zip(a, b))) @@ -72,9 +74,7 @@ def lloyd_kmeans(points, k, initializer=False): def get_responsibilities(point, centres, beta): - h_vals = [] - for centre in centres: - h_vals.append(exp(-beta*distance(point, centre))) + h_vals = [exp(-beta*distance(point, centre)) for centre in centres] return [h_val/sum(h_vals) for h_val in h_vals] diff --git a/bioinformatics/algorithms/eulerian_cycle.py b/bioinformatics/algorithms/eulerian_cycle.py index 5604393..ddb896c 100644 --- a/bioinformatics/algorithms/eulerian_cycle.py +++ b/bioinformatics/algorithms/eulerian_cycle.py @@ -10,7 +10,7 @@ def random_cycle(start, graph: Graph): edges = graph.edges cycle = [start] current = start - while(current != start or len(cycle) == 1): + while current != current or len(cycle) == 1: nxt = edges[current].pop() if len(edges[current]) == 0: del edges[current] @@ -44,14 +44,12 @@ def aux(subset: set, ends): if (subset, ends) in memo: return memo[(subset, ends)] if len(subset) == 1: - if ends == subset.pop(): - return [ends] - return None + return [ends] if ends == subset.pop() else None for neighbour in graph.edges[ends]: if neighbour not in subset: continue subpath = aux(set.difference(subset, [ends]), neighbour) - if subpath == None: + if subpath is None: continue res = subpath+[ends] memo[(subset, ends)] = res diff --git a/bioinformatics/algorithms/four_russians_binary_encoding.py b/bioinformatics/algorithms/four_russians_binary_encoding.py index d638af7..75d65db 100644 --- a/bioinformatics/algorithms/four_russians_binary_encoding.py +++ b/bioinformatics/algorithms/four_russians_binary_encoding.py @@ -20,12 +20,12 @@ def fill_graph_needleman(graph,indel,offset=0,left=None,top=None): if r==c==0: graph.set(r,c,offset) elif r==0: - if top==None: + if top is None: graph.set(r,c,graph.pos(r,c-1)+indel,2) else: graph.set(r,c,top[c-1]) elif c==0: - if left==None: + if left is None: graph.set(r,c,graph.pos(r-1,c)+indel,1) else: graph.set(r,c,left[r-1]) @@ -45,14 +45,11 @@ def __init__(self,chars,t,empty=False): self.empty= empty def random(self): - output = "" - for i in range(self.t): - output += random.choice(self.chars) - return output + return "".join(random.choice(self.chars) for _ in range(self.t)) def objWithNum(self,i): output = "" v = i - for ind in range(self.t): + for _ in range(self.t): output = str(self.chars[v%len(self.chars)]) +output v //= len(self.chars) return output @@ -87,7 +84,7 @@ def __next__(self): def accumulate_diff(diffs): o = [] for i in diffs: - if len(o) > 0: + if o: o.append(i+o[-1]) else: o.append(i) diff --git a/bioinformatics/algorithms/hmm.py b/bioinformatics/algorithms/hmm.py index 02c8d5b..a2d5c15 100644 --- a/bioinformatics/algorithms/hmm.py +++ b/bioinformatics/algorithms/hmm.py @@ -68,9 +68,7 @@ def backward(self): def __getitem__(self, key): if key == "START": return self.graph[0, "START"] - if key == "END": - return self.graph[len(self.x)+1, "END"] - return self.graph[key] + return self.graph[len(self.x)+1, "END"] if key == "END" else self.graph[key] class HMM: diff --git a/bioinformatics/algorithms/middle_node.py b/bioinformatics/algorithms/middle_node.py index 29c1080..ecb640d 100644 --- a/bioinformatics/algorithms/middle_node.py +++ b/bioinformatics/algorithms/middle_node.py @@ -97,17 +97,21 @@ 'Z': {'A':1, 'R':2, 'N':3, 'D':4, 'C':-4, 'Q':5, 'E':5, 'G':1, 'H':3, 'I':-1, 'L':-1, 'K':2, 'M':0, 'F':-4,'P': 1, 'S':1, 'T':1,'W': -4, 'Y':-3, 'V':0, 'B':5, 'Z':6}} def blosum62score(a,b): - if (a,b) in blosum62: - return blosum62[(a,b)] - return blosum62[(b,a)] + return blosum62[(a,b)] if (a,b) in blosum62 else blosum62[(b,a)] def prefix(s1,s2,col): height = len(s1) c1 = [i*indel for i in range(0,height+1)] - for i in range(1,col+1): + for _ in range(1,col+1): c2 = [c1[0]+indel] - for row in range(1,height+1): - c2.append(max(c2[row-1]+indel,c1[row]+indel,c1[row-1]+blosum62score(s1[row-1],s2[col-1]))) + c2.extend( + max( + c2[row - 1] + indel, + c1[row] + indel, + c1[row - 1] + blosum62score(s1[row - 1], s2[col - 1]), + ) + for row in range(1, height + 1) + ) c1 = c2 return c1 @@ -147,13 +151,11 @@ def middleEdge(s1,s2): print(middleNode(s1,s2)) def rosalind(filei,filej,func): - f = open(filei,"r") - w1 = f.readline().rstrip('\n') - w2 = f.readline().rstrip('\n') - f.close() + with open(filei,"r") as f: + w1 = f.readline().rstrip('\n') + w2 = f.readline().rstrip('\n') o = func(w1,w2) - f = open(filej,"w") - f.writelines(str(o)) - f.close() + with open(filej,"w") as f: + f.writelines(str(o)) rosalind("i1.txt","o1.txt",middleEdge) \ No newline at end of file diff --git a/bioinformatics/algorithms/needleman_wunsch.py b/bioinformatics/algorithms/needleman_wunsch.py index 7bce528..47ad902 100644 --- a/bioinformatics/algorithms/needleman_wunsch.py +++ b/bioinformatics/algorithms/needleman_wunsch.py @@ -35,12 +35,12 @@ def trace_pointers_needleman(graph): #just goes for diagonal then insert then de o2 = graph.s2[c-1]+o2 pointer = (r-1,c-1) elif p>=2: - o1 = "-"+o1 + o1 = f"-{o1}" o2 = graph.s2[c-1]+o2 pointer = (r,c-1) elif p==1: o1 = graph.s1[r-1]+o1 - o2 = "-"+o2 + o2 = f"-{o2}" pointer = (r-1,c) else: print("ERROR") diff --git a/bioinformatics/algorithms/neighbour_joining.py b/bioinformatics/algorithms/neighbour_joining.py index fa49334..0cf1370 100644 --- a/bioinformatics/algorithms/neighbour_joining.py +++ b/bioinformatics/algorithms/neighbour_joining.py @@ -5,7 +5,7 @@ import math -def neighbour_joining(d_mat: DistanceMatrix, inner_node_next_label=None): # DO NOT USE heaping distance matrix +def neighbour_joining(d_mat: DistanceMatrix, inner_node_next_label=None): # DO NOT USE heaping distance matrix """Performs the neighbour joining algorithm on the given distance matrix. This is a recurisve algorithm, so base and inductive cases need to be considered. It is also a greedy algorithm in which the closest pair of nodes in the augmented distance matrix are joined. Args: @@ -30,7 +30,9 @@ def neighbour_joining(d_mat: DistanceMatrix, inner_node_next_label=None): # DO # INDUCTIVE CASE # 1) Compute total_distance for each element in distance matrix - total_distance = {i: sum([d_mat.get(i, k) for k in d_mat.names]) for i in d_mat.names} + total_distance = { + i: sum(d_mat.get(i, k) for k in d_mat.names) for i in d_mat.names + } # 2) Find nodes which are closest to one another in D*, which is the same matrix except D*i,j = (n-2)*Dij - TotalDistance(i) - TotalDistance(j). The matrix is not explicitly stored, but its entries are searched. nexti, nextj, bestscore = None, None, math.inf diff --git a/bioinformatics/algorithms/phylogeny_tree.py b/bioinformatics/algorithms/phylogeny_tree.py index 3c20671..4be654f 100644 --- a/bioinformatics/algorithms/phylogeny_tree.py +++ b/bioinformatics/algorithms/phylogeny_tree.py @@ -37,8 +37,7 @@ def is_neighbour(self, i, k): def shortest_path(self, i, k): queue = [i] - visited = set() - visited.add(i) + visited = {i} parents = {i: None} while queue: node = queue.pop(0) diff --git a/bioinformatics/algorithms/reconstruct_genome_string.py b/bioinformatics/algorithms/reconstruct_genome_string.py index e9f4232..71f883d 100644 --- a/bioinformatics/algorithms/reconstruct_genome_string.py +++ b/bioinformatics/algorithms/reconstruct_genome_string.py @@ -23,8 +23,7 @@ def construct_k_universal_circular_string(k:int): kmers = list(Sequence(["0","1"],k)) graph = kmers_to_debruijn(kmers) genome_path = eulerian_cycle(graph) - const = reconstruct_from_genome_path(genome_path)[:-(k-1)] - return const + return reconstruct_from_genome_path(genome_path)[:-(k-1)] def reconstruct_from_paired_kmers(paired_kmers:list,k:int,d:int): graph = paired_kmers_to_debruijn(paired_kmers) diff --git a/bioinformatics/algorithms/scoring_functions.py b/bioinformatics/algorithms/scoring_functions.py index de0ec73..e55b23c 100644 --- a/bioinformatics/algorithms/scoring_functions.py +++ b/bioinformatics/algorithms/scoring_functions.py @@ -94,7 +94,7 @@ 'B': {'A': 2, 'R': 1, 'N': 4, 'D': 5, 'C': -3, 'Q': 3, 'E': 4, 'G': 2, 'H': 3, 'I': -1, 'L': -2, 'K': 2, 'M': -1, 'F': -3, 'P': 1, 'S': 2, 'T': 2, 'W': -4, 'Y': -2, 'V': 0, 'B': 6, 'Z': 5}, 'Z': {'A': 1, 'R': 2, 'N': 3, 'D': 4, 'C': -4, 'Q': 5, 'E': 5, 'G': 1, 'H': 3, 'I': -1, 'L': -1, 'K': 2, 'M': 0, 'F': -4, 'P': 1, 'S': 1, 'T': 1, 'W': -4, 'Y': -3, 'V': 0, 'B': 5, 'Z': 6}} -Blosum62 = lambda x,y:(blosum62_matrix[(x,y)] if (x,y) in blosum62_matrix else blosum62_matrix[(y,x)]) +Blosum62 = lambda x,y: blosum62_matrix.get((x, y), blosum62_matrix[(y,x)]) PAM250 = lambda x,y:PAM250_matrix[(x,y)] diff --git a/bioinformatics/algorithms/sequencing_graph.py b/bioinformatics/algorithms/sequencing_graph.py index 4d19aa3..fb97bb1 100644 --- a/bioinformatics/algorithms/sequencing_graph.py +++ b/bioinformatics/algorithms/sequencing_graph.py @@ -11,10 +11,11 @@ def __init__(self, nodes:set,edges:dict): self.edges = {i:edges[i] for i in edges if len(edges[i])!=0} def __str__(self): - output = "" - for so in self.edges: - if len(self.edges[so])!=0: - output += str(so) + " -> " + ",".join(map(str,self.edges[so])) + "\n" + output = "".join( + f"{str(so)} -> " + ",".join(map(str, self.edges[so])) + "\n" + for so in self.edges + if len(self.edges[so]) != 0 + ) return output.strip() def __repr__(self): @@ -29,11 +30,11 @@ def add_edge(self,a,b): if a in self.edges: self.edges[a].add(b) else: - self.edges[a] = set([b]) + self.edges[a] = {b} class Overlap_Graph(Graph): def __init__(self, patterns): nodes = set(patterns) - edges = {i:set([j for j in patterns if prefix(j)==suffix(i)])for i in patterns} + edges = {i: {j for j in patterns if prefix(j)==suffix(i)} for i in patterns} Graph.__init__(self,nodes,edges) class DeBruijn_Graph(Graph): @@ -44,7 +45,7 @@ def __init__(self, nodes,edges): def text_to_debruijn(k,text): raw_edges = [(text[i:i+k-1],text[i+1:i+k]) for i in range(len(text)-k+1)] - nodes = set([i for i,j in raw_edges]) + nodes = {i for i,j in raw_edges} edges = {i:set() for i in nodes} for i,j in raw_edges: edges[i].add(j) diff --git a/bioinformatics/algorithms/small_parsimony.py b/bioinformatics/algorithms/small_parsimony.py index 0eec23a..a636a82 100644 --- a/bioinformatics/algorithms/small_parsimony.py +++ b/bioinformatics/algorithms/small_parsimony.py @@ -27,7 +27,6 @@ def dp_tree_char_scores(self,k,v): #the DP to build up tree return self.s[v][k] if self.is_leaf(v): self.s[v] = {c:(inf if c!=self.leaves[v] else 0) for c in self.charSet} - return self.s[v][k] else: best_dscore,best_sscore,best_dkv,best_skv = inf,inf,None,None for i in self.charSet: @@ -44,23 +43,23 @@ def dp_tree_char_scores(self,k,v): #the DP to build up tree self.s[v] = {} self.s[v][k] = best_dscore+best_sscore self.optimal_tv[(v,k)] = (best_dkv,best_skv) - return self.s[v][k] + + return self.s[v][k] def best_k(self,node): if self.is_leaf(node): return self.leaves[node] - else: - bestscore = inf - bestk = None - for k in self.charSet: - score = self.dp_tree_char_scores(k,node) - if score 0: + while stack: path = stack.pop() char = path[-1][1] last_node = path[-1][0] @@ -183,13 +182,13 @@ def get_longest_repeat_string(self): def long_path(node, pre_text): if self.node_is_leaf(node): return "" - else: - best = pre_text - for next_node, text in self.edges[node]: - attempt = long_path(next_node, pre_text + text) - if len(attempt) > len(best): - best = attempt - return best + best = pre_text + for next_node, text in self.edges[node]: + attempt = long_path(next_node, pre_text + text) + if len(attempt) > len(best): + best = attempt + return best + return long_path(self.root, "") def longest_substring_match(self, text): @@ -243,7 +242,7 @@ def match(reference_text, testing_text): def rec(node, text, append_text=""): if self.node_is_leaf(node): - return append_text+"(M)" + return f"{append_text}(M)" for next_node, ref_text in self.edges[node]: matched_text, text_left = match(ref_text, text) if len(matched_text) == 0: @@ -255,11 +254,12 @@ def rec(node, text, append_text=""): return append_text+matched_text+text[len(matched_text)] return rec(next_node, text_left, append_text+matched_text) return append_text+text[0] + return rec(self.root, text) def create_trie(words): - nodes = set([0]) + nodes = {0} root = 0 edges = {} nextNode = 1 @@ -284,11 +284,7 @@ def match_text_to_patterns(text, patterns): Returns all indices where the text matches a pattern """ trie = create_trie(patterns) - matches = [] - for i in range(len(text)): - if len(trie.prefix_match(text[i:])) != 0: - matches.append(i) - return matches + return [i for i in range(len(text)) if len(trie.prefix_match(text[i:])) != 0] def create_suffix_trie(text, has_dollar=True): diff --git a/bioinformatics/algorithms/waterman_smith.py b/bioinformatics/algorithms/waterman_smith.py index 94fdf4a..cb969fe 100644 --- a/bioinformatics/algorithms/waterman_smith.py +++ b/bioinformatics/algorithms/waterman_smith.py @@ -14,9 +14,7 @@ def fill_graph_smith(graph,indel): for r in range(graph.height()): if r==c==0: graph.set(r,c,0) - elif r==0: - graph.set(r,c,0,0) - elif c==0: + elif r == 0 or c == 0: graph.set(r,c,0,0) else: graph.update(r,c,graph.pos(r-1,c)+indel,1) @@ -43,12 +41,12 @@ def trace_pointers_smith(graph): #just goes for diagonal then insert then delete o2 = graph.s2[c-1]+o2 pointer = (r-1,c-1) elif p>=2: - o1 = "-"+o1 + o1 = f"-{o1}" o2 = graph.s2[c-1]+o2 pointer = (r,c-1) elif p==1: o1 = graph.s1[r-1]+o1 - o2 = "-"+o2 + o2 = f"-{o2}" pointer = (r-1,c) else: print("Done") diff --git a/bioinformatics/data_processors/parse_hmm.py b/bioinformatics/data_processors/parse_hmm.py index 0931212..29e45b8 100644 --- a/bioinformatics/data_processors/parse_hmm.py +++ b/bioinformatics/data_processors/parse_hmm.py @@ -19,5 +19,4 @@ def parse_hmm(text): states = parse_row(text[4]) transitions = TransitionMatrix(states, parse_table(states, states, text[6:6+len(states)+1])) emissions = EmissionMatrix(states, symbols, parse_table(states, symbols, text[6+len(states)+2:])) - hmm = HMM(symbols, states, transitions, emissions) - return hmm + return HMM(symbols, states, transitions, emissions) diff --git a/bioinformatics/rosalind/rosalind.py b/bioinformatics/rosalind/rosalind.py index 173b29b..31ad979 100644 --- a/bioinformatics/rosalind/rosalind.py +++ b/bioinformatics/rosalind/rosalind.py @@ -1,15 +1,13 @@ def rosalind_solve(input_file="rosalind/io/i.txt", output_file="rosalind/io/o.txt", func=(lambda x: x)): - f = open(input_file, "r") - i = f.readlines() - i = [x.strip() for x in i] if type(i) == list else i - f.close() + with open(input_file, "r") as f: + i = f.readlines() + i = [x.strip() for x in i] if type(i) == list else i o = func(i) - if output_file == None: + if output_file is None: return o - f = open(output_file, "w") - o2 = "\n".join(list(map(lambda x: str(x), o))) if type(o) == list else str(o) - f.write(o2) - f.close() + with open(output_file, "w") as f: + o2 = "\n".join(list(map(lambda x: str(x), o))) if type(o) == list else str(o) + f.write(o2) return o diff --git a/bioinformatics/rosalind/solutions/q7b.py b/bioinformatics/rosalind/solutions/q7b.py index 7403aea..3e0ddef 100644 --- a/bioinformatics/rosalind/solutions/q7b.py +++ b/bioinformatics/rosalind/solutions/q7b.py @@ -4,14 +4,11 @@ def q7b(): i = "src/rosalind/io/i.txt" o = "src/rosalind/io/o.txt" - f = open(i,"r") - n = int(f.readline()) - j = f.readline().strip() - mat = [] - for _ in range(n): - mat.append(list(map(int,f.readline().strip().split()))) - d_mat = DistanceMatrix(mat,list(map(str,range(n)))) - f2 = open(o,"w+") - f2.writelines(str(limb_length_n(j,d_mat))) - f.close() + with open(i,"r") as f: + n = int(f.readline()) + j = f.readline().strip() + mat = [list(map(int,f.readline().strip().split())) for _ in range(n)] + d_mat = DistanceMatrix(mat,list(map(str,range(n)))) + f2 = open(o,"w+") + f2.writelines(str(limb_length_n(j,d_mat))) f2.close() diff --git a/bioinformatics/rosalind/solutions/q7c.py b/bioinformatics/rosalind/solutions/q7c.py index 24b2d2c..3f46923 100644 --- a/bioinformatics/rosalind/solutions/q7c.py +++ b/bioinformatics/rosalind/solutions/q7c.py @@ -4,13 +4,10 @@ def q7c(): i = "src/rosalind/io/i.txt" o = "src/rosalind/io/o.txt" - f = open(i,"r") - n = int(f.readline()) - mat = [] - for _ in range(n): - mat.append(list(map(int,f.readline().strip().split()))) - d_mat = DistanceMatrix(mat,list(map(str,range(n)))) - f2 = open(o,"w+") - f2.writelines(str(additive_phylogeny(d_mat))) - f.close() + with open(i,"r") as f: + n = int(f.readline()) + mat = [list(map(int,f.readline().strip().split())) for _ in range(n)] + d_mat = DistanceMatrix(mat,list(map(str,range(n)))) + f2 = open(o,"w+") + f2.writelines(str(additive_phylogeny(d_mat))) f2.close() \ No newline at end of file diff --git a/bioinformatics/rosalind/solutions/q7d.py b/bioinformatics/rosalind/solutions/q7d.py index b443932..e1c007a 100644 --- a/bioinformatics/rosalind/solutions/q7d.py +++ b/bioinformatics/rosalind/solutions/q7d.py @@ -5,13 +5,10 @@ def q7d(): i = "src/rosalind/io/i.txt" o = "src/rosalind/io/o.txt" - f = open(i,"r") - n = int(f.readline()) - mat = [] - for _ in range(n): - mat.append(list(map(int,f.readline().strip().split()))) - d_mat = DistanceMatrix(mat,list(map(str,range(n))),True) - f2 = open(o,"w+") - f2.writelines(str(upgma(d_mat))) - f.close() + with open(i,"r") as f: + n = int(f.readline()) + mat = [list(map(int,f.readline().strip().split())) for _ in range(n)] + d_mat = DistanceMatrix(mat,list(map(str,range(n))),True) + f2 = open(o,"w+") + f2.writelines(str(upgma(d_mat))) f2.close() \ No newline at end of file diff --git a/bioinformatics/rosalind/solutions/q7e.py b/bioinformatics/rosalind/solutions/q7e.py index 32bba7b..db27bd1 100644 --- a/bioinformatics/rosalind/solutions/q7e.py +++ b/bioinformatics/rosalind/solutions/q7e.py @@ -5,13 +5,10 @@ def q7e(): i = "src/rosalind/io/i.txt" o = "src/rosalind/io/o.txt" - f = open(i,"r") - n = int(f.readline()) - mat = [] - for _ in range(n): - mat.append(list(map(int,f.readline().strip().split()))) - d_mat = DistanceMatrix(mat,list(map(str,range(n)))) - f2 = open(o,"w+") - f2.writelines(str(neighbour_joining(d_mat))) - f.close() + with open(i,"r") as f: + n = int(f.readline()) + mat = [list(map(int,f.readline().strip().split())) for _ in range(n)] + d_mat = DistanceMatrix(mat,list(map(str,range(n)))) + f2 = open(o,"w+") + f2.writelines(str(neighbour_joining(d_mat))) f2.close() \ No newline at end of file diff --git a/bioinformatics/rosalind/solutions/q7f.py b/bioinformatics/rosalind/solutions/q7f.py index 522c0f9..81b3b80 100644 --- a/bioinformatics/rosalind/solutions/q7f.py +++ b/bioinformatics/rosalind/solutions/q7f.py @@ -4,13 +4,12 @@ def q7f(): i = "bioinformatics/rosalind/io/i7f.txt" o = "bioinformatics/rosalind/io/o7f.txt" - f = open(i,"r") - n = int(f.readline()) - txt = f.readlines() - tree = to_tree(string_to_adj_list(txt)) - f2 = open(o,"w+") - m = len(tree.nodes[tree.leaves()[0]]) - output = small_parsimony(tree,m) - f2.write(f"{output.edgecost()}\n{str(output).strip()}") - f.close() + with open(i,"r") as f: + n = int(f.readline()) + txt = f.readlines() + tree = to_tree(string_to_adj_list(txt)) + f2 = open(o,"w+") + m = len(tree.nodes[tree.leaves()[0]]) + output = small_parsimony(tree,m) + f2.write(f"{output.edgecost()}\n{str(output).strip()}") f2.close() \ No newline at end of file diff --git a/bioinformatics/rosalind/solutions/q9c.py b/bioinformatics/rosalind/solutions/q9c.py index 80a160a..10772da 100644 --- a/bioinformatics/rosalind/solutions/q9c.py +++ b/bioinformatics/rosalind/solutions/q9c.py @@ -9,8 +9,7 @@ def suffix_trie_rosalind(text): trie = create_suffix_trie(text) output = [] for frm in trie.edges: - for edge in trie.edges[frm]: - output.append(edge[1]) + output.extend(edge[1] for edge in trie.edges[frm]) return '\n'.join(output) diff --git a/bioinformatics/tests/test_actions_working.py b/bioinformatics/tests/test_actions_working.py index 85fa787..4da8c89 100644 --- a/bioinformatics/tests/test_actions_working.py +++ b/bioinformatics/tests/test_actions_working.py @@ -2,4 +2,4 @@ import pytest def test_actions_working(): - assert True \ No newline at end of file + pass \ No newline at end of file diff --git a/bioinformatics/tests/test_binary_tree.py b/bioinformatics/tests/test_binary_tree.py index c089dda..2a0353d 100644 --- a/bioinformatics/tests/test_binary_tree.py +++ b/bioinformatics/tests/test_binary_tree.py @@ -2,9 +2,8 @@ def test_binary_tree(): - f = open('bioinformatics/tests/data/ros_p_adj_list.txt','r') - raw_data = f.readlines() - f.close() + with open('bioinformatics/tests/data/ros_p_adj_list.txt','r') as f: + raw_data = f.readlines() tree = to_tree(string_to_adj_list(raw_data)) assert len(tree.leaves())==4 assert len(tree.nodes)==7 diff --git a/bioinformatics/tests/test_parsimony_preprocessors.py b/bioinformatics/tests/test_parsimony_preprocessors.py index d3b9087..d8c59cb 100644 --- a/bioinformatics/tests/test_parsimony_preprocessors.py +++ b/bioinformatics/tests/test_parsimony_preprocessors.py @@ -2,9 +2,8 @@ def test_working_translation_for_rosalind_adj(): - f = open('bioinformatics/tests/data/ros_p_adj_list.txt','r') - raw_data = f.readlines() - f.close() + with open('bioinformatics/tests/data/ros_p_adj_list.txt','r') as f: + raw_data = f.readlines() tree = to_tree(string_to_adj_list(raw_data)) assert len(tree.leaves())==4 assert len(tree.nodes)==7