-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathlogisticRegression.py
More file actions
80 lines (69 loc) · 2.81 KB
/
logisticRegression.py
File metadata and controls
80 lines (69 loc) · 2.81 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
from numpy import *
import matplotlib.pyplot as plt
import time
def sigmoid(inX):
return 1.0/(1+exp(-inX))
def trainLogRegress(train_x, train_y, opts):
# calculate trainning time
startTime = time.time()
numSamples, numFeatures = shape(train_x)
alpha = opts['alpha']
maxIter = opts['maxIter']
weights = ones((numFeatures, 1)) # init weights equal to 1
## print weights
#optimize through gradient descent algorithm
for k in range(maxIter):
if opts['optimizeType'] == 'gradDescent':
output = sigmoid(train_x * weights)
error = train_y - output
weights = weights + alpha * train_x.transpose()*error
elif opts['optimizeType'] == 'stocGradDescent':
for i in range(numSamples):
output = sigmoid(train_x[i, :]*weights)
error = train_y[i,0] - output
weights = weights + alpha * train_x[i, :].transpose() * error
elif opts['optimizeType'] == 'smoothStocGradDescent':
dataIndex = range(numSamples)
for i in range(numSamples):
alpha = 4.0/(1.0+k+i) + 0.01
randIndex=int(random.uniform(0, len(dataIndex)))
output = sigmoid(train_x[randIndex, :] * weights)
error = train_y[randIndex, 0] - output
weights = weights + alpha* train_x[randIndex, :].transpose() * error
del(dataIndex[randIndex])
else:
raise NameError('Not support optimize method type!')
print 'Congratulations, training complete! Took %s!' % (time.time()- startTime)
return weights
def testLogRegress(weights, test_x, test_y):
numSamples, numFeatures = shape(test_x)
matchCount = 0
for i in xrange(numSamples):
predict = sigmoid(test_x[i, :]*weights)[0, 0] > 0.5
## pre = sigmoid(test_x[i, :]*weights)
## print pre
if predict ==bool(test_y[i, 0]):
matchCount += 1
accuracy = float(matchCount)/numSamples
return accuracy
def showLogRegress(weights, train_x, train_y):
numSamples, numFeatures = shape(train_x)
if numFeatures !=3:
print "Sorry! I can not draw because the dimension of your data is not 2!"
return 1
# draw all samples
for i in xrange(numSamples):
if int(train_y[i, 0]) == 0:
plt.plot(train_x[i,1], train_x[i, 2], 'or')
elif int(train_y[i, 0]) == 1:
plt.plot(train_x[i, 1], train_x[i, 2], 'ob')
# draw the classify line
min_x = min(train_x[:,1])[0,0]
max_x = max(train_x[:,1])[0,0]
weights = weights.getA()
y_min_x = float(-weights[0] - weights[1]*min_x)/weights[2]
y_max_x = float(-weights[0] - weights[1]*max_x)/weights[2]
plt.plot([min_x, max_x], [y_min_x, y_max_x], '-g')
plt.xlabel('X1')
plt.ylabel('X2')
plt.show()