|
18 | 18 |
|
19 | 19 | import numpy as np |
20 | 20 | from jinja2 import Template |
| 21 | +import biotite.sequence.align as align, phylo |
| 22 | +import biotite.sequence.phylo as phylo |
| 23 | +from biotite.sequence import NucleotideSequence |
| 24 | +import networkx as nx |
| 25 | +import matplotlib.pyplot as plt |
21 | 26 |
|
22 | 27 | from clint.textui import colored, indent, puts_err |
23 | 28 | from docopt import docopt |
@@ -52,56 +57,71 @@ def first(s): |
52 | 57 | """ |
53 | 58 | Generate an aligned fasta from a VCF file. |
54 | 59 | """ |
55 | | - gt_set = np.chararray((0,len(v.samples))) |
56 | 60 | gt_set = [] |
57 | 61 | for line in variant_set: |
58 | 62 | if line.is_snp: |
59 | 63 | gt_set.append(firstv(line.gt_bases)) |
60 | 64 | if len(gt_set) == 0: |
61 | 65 | exit(puts_err("No genotypes")) |
62 | 66 | gt_set = np.vstack(gt_set) |
63 | | - seqs = list(zip(v.samples, np.transpose(gt_set))) |
| 67 | + seqs = [''.join(gt_set[:, i]) for i in range(gt_set.shape[1])] |
| 68 | + |
64 | 69 | if args["fasta"]: |
65 | | - for sample, seq in seqs: |
66 | | - print((">" + sample)) |
67 | | - print((''.join(seq))) |
| 70 | + for sample, seq in list(zip(v.samples, seqs)): |
| 71 | + print(">" + sample) |
| 72 | + print(seq) |
68 | 73 |
|
69 | 74 | elif args["tree"]: |
70 | 75 | """ |
71 | 76 | Generate a phylogenetic tree using an aligned fasta with muscle. |
72 | 77 | """ |
73 | 78 |
|
74 | | - ######## |
75 | | - # Consider biotite to replace deprecated --maketree function in muscle |
76 | | - ######## |
77 | | - |
78 | | - # Check for muscle dependency |
79 | | - check_program_exists("muscle") |
80 | | - fasta = "" |
81 | 79 | with indent(4): |
82 | 80 | puts_err(colored.blue("\nGenerating Fasta\n")) |
83 | | - for sample, seq in seqs: |
84 | | - fasta += ">" + sample + "\n" + ''.join(seq) + "\n" |
85 | | - tree_type = "upgma" # default is upgma |
| 81 | + trace = align.Alignment.trace_from_strings(seqs) |
| 82 | + aligned_seqs = align.Alignment([NucleotideSequence(seq) for seq in seqs], trace) |
| 83 | + distances = 1 - align.get_pairwise_sequence_identity(aligned_seqs, mode="all") |
| 84 | + |
86 | 85 | if args["nj"]: |
87 | | - tree_type = "neighborjoining" |
88 | | - with indent(4): |
89 | | - puts_err(colored.blue("\nGenerating " + tree_type + " Tree\n")) |
90 | | - comm = ["muscle", "-maketree", "-in", "-", "-cluster", tree_type] |
91 | | - tree, err = Popen(comm, stdin=PIPE, stdout=PIPE).communicate(input=fasta.encode()) |
92 | | - |
| 86 | + tree = phylo.neighbor_joining(distances) |
| 87 | + else: |
| 88 | + tree = phylo.upgma(distances) |
| 89 | + |
93 | 90 | # output tree |
94 | | - print(tree.decode("utf-8")) |
| 91 | + print(tree.to_newick(labels=v.samples)) |
95 | 92 |
|
96 | 93 | if args["--plot"]: |
| 94 | + # graph = tree.as_graph().to_undirected() |
| 95 | + # fig = plt.figure(figsize=(8.0, 8.0)) |
| 96 | + # ax = fig.gca() |
| 97 | + # ax.axis("off") |
| 98 | + # # Calculate position of nodes in the plot |
| 99 | + # pos = nx.kamada_kawai_layout(graph) |
| 100 | + # # Assign the gene names to the nodes that represent a reference index |
| 101 | + # node_labels = {i: name for i, name in enumerate(v.samples)} |
| 102 | + # nx.draw_networkx_edges(graph, pos, ax=ax) |
| 103 | + # nx.draw_networkx_labels( |
| 104 | + # graph, |
| 105 | + # pos, |
| 106 | + # ax=ax, |
| 107 | + # labels=node_labels, |
| 108 | + # font_size=7, |
| 109 | + # # Draw a white background behind the labeled nodes |
| 110 | + # # for better readability |
| 111 | + # bbox=dict(pad=0, color="white"), |
| 112 | + # ) |
| 113 | + # fig.tight_layout() |
| 114 | + |
| 115 | + # plt.show() |
| 116 | + |
97 | 117 | prefix = os.path.dirname(os.path.abspath(sys.modules['vcfkit'].__file__)) + "/static" |
98 | 118 | template = open(prefix + "/tree.html",'r').read() |
99 | 119 | tree_template = Template(template) |
100 | 120 | html_out = tempfile.NamedTemporaryFile(suffix=".html", delete=False) |
101 | 121 | with html_out as f: |
102 | | - tree = tree.replace("\n", "") |
| 122 | + tree = tree.to_newick(labels=v.samples) |
103 | 123 | sample_len = len(v.samples) |
104 | | - f.write(tree_template.render(**locals())) |
| 124 | + f.write(tree_template.render(tree=tree, prefix=prefix, sample_len=sample_len).encode()) |
105 | 125 | webbrowser.open("file://" + html_out.name) |
106 | 126 |
|
107 | 127 | if __name__ == '__main__': |
|
0 commit comments