-
Notifications
You must be signed in to change notification settings - Fork 12
Expand file tree
/
Copy pathgdmatrix2tree.py
More file actions
executable file
·55 lines (47 loc) · 1.92 KB
/
gdmatrix2tree.py
File metadata and controls
executable file
·55 lines (47 loc) · 1.92 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
#!/usr/bin/env python
"""
Creates NJ tree from a genetic distance matrix. Outputs ASCII format to
STDOUT and a nexus-formatted tree to output file. Note: distance matrix can
be created from `vcf` using `vcf_gdmatrix.py`.
"""
import sys
import argparse
import numpy as np
import Bio.Phylo
import Bio.Phylo.TreeConstruction
__author__ = 'Pim Bongaerts'
__copyright__ = 'Copyright (C) 2016 Pim Bongaerts'
__license__ = 'GPL'
def get_matrix_from_file(matrix_filename):
""" Get genetic distance matrix from file """
matrix_file = open(matrix_filename, 'r')
names = []
matrix = []
for index, line in enumerate(matrix_file, start=1):
if index > 1: # Skip header
cols = line.strip().split()
names.append(cols[0])
matrix.append([float(i) for i in cols[1:index]])
matrix_file.close()
distance_matrix = Bio.Phylo.TreeConstruction._DistanceMatrix(names=names,
matrix=matrix)
return distance_matrix
def main(matrix_filename, tree_output_filename):
# Read matrix and labels from file
distance_matrix = get_matrix_from_file(matrix_filename)
# Generate tree from distance matrix
constructor = Bio.Phylo.TreeConstruction.DistanceTreeConstructor()
tree = constructor.nj(distance_matrix)
tree.ladderize()
# Output to screen
Bio.Phylo.draw_ascii(tree)
Bio.Phylo.write(tree, tree_output_filename, 'nexus')
if __name__ == '__main__':
parser = argparse.ArgumentParser(description=__doc__)
parser.add_argument('matrix_filename', metavar='matrix_file',
help='text file (tsv or csv) with genetic distance \
matrix')
parser.add_argument('tree_output_filename', metavar='tree_output_file',
help='nexus file with output tree')
args = parser.parse_args()
main(args.matrix_filename, args.tree_output_filename)