-
Notifications
You must be signed in to change notification settings - Fork 1
Expand file tree
/
Copy pathLogisticClassifier.py
More file actions
127 lines (107 loc) · 3.79 KB
/
LogisticClassifier.py
File metadata and controls
127 lines (107 loc) · 3.79 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
from numpy import *
from numpy.random import *
class LogisticClassifier(object):
"""
Multiclass logistic regression with regularization. Trained with gradient descent + momentum (if desired).
"""
def __init__(self, basis=None):
"""
Instantiate a logistic regression model. Options for the basis are
'poly', 'rbf', 'sigmoid', 'rectifier', or 'linear'.
"""
self.W = array([])
self.A = None #the mixing matrix for basis mapping.
self.basis=basis
if basis == 'poly':
self.basisfunc = self.poly_basis
elif basis == 'rbf':
self.basisfunc = self.rbf_basis
elif basis == 'sigmoid':
self.basisfunc = self.sigmoid_basis
elif basis == 'rectifier':
self.basisfunc = self.rectifier_basis
else:
self.basisfunc = self.identity
def identity(self, x):
#identity basis function + a bias
return hstack((x,1))
def poly_basis(self, x):
#polynomial basis
degree = 2
#first mix the components of x in a higher dimension
xn = dot(self.A,x)
return self.identity(hstack(tuple(sum(xn**i for i in range(degree)))))
def rbf_basis(self, x):
#in this case, use the mixing matrix as centroids.
return self.identity(hstack(tuple(exp(-norm(x-mu)) for mu in self.A)))
def sigmoid_basis(self, x):
#just like a neural network layer.
xn = dot(self.A, x)
return self.identity((1+exp(-xn))**-1)
def rectifier_basis(self, x):
#used in the latest neural nets
xn = dot(self.A, x)
return self.identity(maximum(xn, 0))
def basismap(self, X):
#if X is an observation matrix (examples by dimensions),
#return each row mapped to a higher dimsional space
new_dimensions = self.basisfunc(X[0,:]).shape[0]
Xn = zeros((X.shape[0], new_dimensions))
for i,xi in enumerate(X):
Xn[i,:] = self.basisfunc(xi)
return Xn
def fit(self, X, Y, itrs=100, learn_rate=0.1, reg=0.1,
momentum=0.5, report_cost=False, proj_layer_size=10):
"""
Fit the model.
X - observation matrix (observations by dimensions)
Y - one-hot target matrix (examples by classes)
itrs - number of iterations to run
learn_rate - size of step to use for gradient descent
reg - regularization penalty (lambda above)
momentum - weight of the previous gradient in the update step
report_cost - if true, return the loss function at each step (expensive).
proj_layer_size - number of dimensions in the projection (mixing) layer. Higher -> more variance
"""
#first map to a new basis
if self.basis != 'rbf':
self.A = uniform(-1, 1, (proj_layer_size, X.shape[1]))
else:
#use the training examples as bases
self.A = X[permutation(X.shape[0])[:proj_layer_size],:]
Xn = self.basismap(X)
#set up weights
self.W = uniform(-0.1, 0.1, (Y.shape[1], Xn.shape[1]))
#optimize
costs = []
previous_grad = zeros(self.W.shape) #used in momentum
for i in range(itrs):
grad = self.grad(Xn, Y, reg) #compute gradient
self.W = self.W - learn_rate*(grad + momentum*previous_grad) #take a step, use previous gradient as well.
previous_grad = grad
if report_cost:
costs.append(self.loss(X,Y,reg))
return costs
def softmax(self, Z):
#returns sigmoid elementwise
Z = maximum(Z, -1e3)
Z = minimum(Z, 1e3)
numerator = exp(Z)
return numerator / sum(numerator, axis=1).reshape((-1,1))
def predict(self, X):
"""
If the model has been trained, makes predictions on an observation matrix (observations by features)
"""
Xn = self.basismap(X)
return self.softmax(dot(Xn, self.W.T))
def grad(self, Xn, Y, reg):
"""
Returns the gradient of the loss function wrt the weights.
"""
#Xn should be the design matrix
Yh = self.softmax(dot(Xn, self.W.T))
return -dot(Y.T-Yh.T,Xn)/Xn.shape[0] + reg*self.W
def loss(self, X, Y, reg):
#assuming X is the data matrix
Yh = self.predict(X)
return -mean(mean(Y*log(Yh))) - reg*trace(dot(self.W,self.W.T))/self.W.shape[0]