-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathboost.py
More file actions
64 lines (50 loc) · 2.01 KB
/
boost.py
File metadata and controls
64 lines (50 loc) · 2.01 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
import weak_classifier
from math import log, exp
def error(data, weights, guesses):
return sum((weights[i] for i, d in enumerate(data) if d.label != guesses[i]),
0.0)
def calc_alpha(data, weights, guesses):
er = error(data, weights, guesses)
if er < 2e-300:
return 100
return min(log((1 - er) / er) / 2, 100)
def train_classifier(data, max_iterations):
assert data
assert max_iterations
weights = [1/float(len(data))] * len(data)
norm = [None] * max_iterations
# Base hypothesis.
base_h = [None] * max_iterations
alpha = [None] * max_iterations
for iter in xrange(max_iterations):
base_h[iter] = weak_classifier.train_classifier(data, weights)
guesses = list(base_h[iter].classify(data))
alpha[iter] = calc_alpha(data, weights, guesses)
print "alpha = %s" % alpha[iter]
norm[iter] = sum((weights[i] * exp(- alpha[iter] * d.label * guesses[i])
for i, d in enumerate(data)),
0.0)
weights = [(weights[i] * exp(- alpha[iter] * d.label * guesses[i])) / norm[iter]
for i, d in enumerate(data)]
return BoostClassifier(base_h, alpha, max_iterations)
def sign(x):
if x >= 0:
return 1
else:
return -1
class BoostClassifier(object):
def __init__(self, base_h, alpha, iterations):
assert len(base_h) == iterations
assert len(alpha) == iterations
self.iterations = iterations
self.base_h = base_h
self.alpha = alpha
def __repr__(self):
return ("BoostClassifier(base_h=%s, alpha=%s, iterations=%s)" %
(self.base_h, self.alpha, self.iterations))
def classify(self, data):
guesses_table = [list(self.base_h[iter].classify(data))
for iter in xrange(self.iterations)]
for i in xrange(len(data)):
yield sign(sum(self.alpha[iter] * guesses_table[iter][i]
for iter in xrange(self.iterations)))