-
Notifications
You must be signed in to change notification settings - Fork 1
Expand file tree
/
Copy path20080703b.py
More file actions
124 lines (117 loc) · 4.59 KB
/
20080703b.py
File metadata and controls
124 lines (117 loc) · 4.59 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
"""Evaluate partitions of perturbed distance matrices of a set of trees.
Evaluate a bipartition function on perturbed distance matrices
of a set of trees.
The perturbed distance matrices will be symmetric and non-negative.
To generate the perturbed distance matrix,
distance_i will be multiplied by exp(X_i)
where X_i is a normally distributed random variable
with mean zero and standard deviation equal to the perturbation strength.
The exact bipartition criterion is a matrix function by Eric Stone.
"""
from StringIO import StringIO
import random
import math
from SnippetUtil import HandlingError
import FelTree
import NewickIO
import Clustering
import iterutils
from Form import RadioItem
import Form
import FormOut
#FIXME use const data
def get_form():
"""
@return: the body of a form
"""
# define the default tree lines
default_tree_lines = [
'(a:1.062, c:0.190, (d:1.080, b:2.30):2.112);',
'((d:0.083, b:0.614):0.150, e:0.581, (c:1.290, a:0.359):1.070);',
'((b:1.749, d:0.523):0.107, e:1.703, (a:0.746, c:0.070):4.025);']
# define the form objects
form_objects = [
Form.MultiLine('trees', 'newick trees (one tree per line)',
'\n'.join(default_tree_lines)),
Form.Float('strength', 'perturbation strength',
0.1, low_inclusive=0),
Form.RadioGroup('options', 'bipartition function', [
RadioItem('exact', 'exact criterion', True),
RadioItem('sign', 'spectral sign approximation'),
RadioItem('threshold', 'spectral threshold approximation'),
RadioItem('nj', 'neighbor joining criterion'),
RadioItem('random', 'random bipartition')])]
return form_objects
def get_form_out():
return FormOut.Report()
def get_response_content(fs):
# get the newick trees.
trees = []
for tree_string in iterutils.stripped_lines(StringIO(fs.trees)):
# Parse each tree and make sure
# that it conforms to various requirements.
tree = NewickIO.parse(tree_string, FelTree.NewickTree)
tip_names = [tip.get_name() for tip in tree.gen_tips()]
if len(tip_names) < 4:
msg_a = 'expected at least four tips but found '
msg_b = str(len(tip_names))
raise HandlingError(msg_a + msg_b)
if any(name is None for name in tip_names):
raise HandlingError('each terminal node must be labeled')
if len(set(tip_names)) != len(tip_names):
raise HandlingError('each terminal node label must be unique')
trees.append(tree)
# read the criterion string, creating the splitter object
if fs.exact:
splitter = Clustering.StoneExactDMS()
elif fs.sign:
splitter = Clustering.StoneSpectralSignDMS()
elif fs.threshold:
splitter = Clustering.StoneSpectralThresholdDMS()
elif fs.nj:
splitter = Clustering.NeighborJoiningDMS()
elif fs.random:
splitter = Clustering.RandomDMS()
# assert that the computation is fast
complexity = 0
for tree in trees:
n = len(list(tree.gen_tips()))
complexity += n * splitter.get_complexity(n)
if complexity > 1000000:
raise HandlingError('this computation would take too long')
# evaluate the bipartition of each tree based on its distance matrix
informative_split_count = 0
degenerate_split_count = 0
invalid_split_count = 0
for tree in trees:
tips = list(tree.gen_tips())
n = len(tips)
D = tree.get_distance_matrix()
if fs.strength:
P = [row[:] for row in D]
for i in range(n):
for j in range(i):
x = random.normalvariate(0, fs.strength)
new_distance = D[i][j] * math.exp(x)
P[i][j] = new_distance
P[j][i] = new_distance
else:
P = D
index_selection = splitter.get_selection(P)
tip_selection = [tips[i] for i in index_selection]
n_selection = len(tip_selection)
n_complement = n - n_selection
if min(n_selection, n_complement) < 2:
degenerate_split_count += 1
else:
if tree.get_split_branch(tip_selection):
informative_split_count += 1
else:
invalid_split_count += 1
# define the response
out = StringIO()
print >> out, informative_split_count, 'informative splits'
print >> out, degenerate_split_count, 'degenerate splits'
print >> out, invalid_split_count, 'invalid splits'
# return the response
return out.getvalue()