MachineLearning/logisticRegression.py at master · louiss007/MachineLearning · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
from numpy import *
import matplotlib.pyplot as plt
import time

def sigmoid(inX):
    return 1.0/(1+exp(-inX))

def trainLogRegress(train_x, train_y, opts):
    # calculate trainning time
    startTime = time.time()

    numSamples, numFeatures = shape(train_x)
    alpha = opts['alpha']
    maxIter = opts['maxIter']
    weights = ones((numFeatures, 1)) # init weights equal to 1
##    print weights

    #optimize through gradient descent algorithm
    for k in range(maxIter):
        if opts['optimizeType'] == 'gradDescent':
            output = sigmoid(train_x * weights)
            error = train_y - output
            weights = weights + alpha * train_x.transpose()*error
        elif opts['optimizeType'] == 'stocGradDescent':
            for i in range(numSamples):
                output = sigmoid(train_x[i, :]*weights)
                error = train_y[i,0] - output
                weights = weights + alpha * train_x[i, :].transpose() * error
        elif opts['optimizeType'] == 'smoothStocGradDescent':
            dataIndex = range(numSamples)
            for i in range(numSamples):
                alpha = 4.0/(1.0+k+i) + 0.01
                randIndex=int(random.uniform(0, len(dataIndex)))
                output = sigmoid(train_x[randIndex, :] * weights)
                error = train_y[randIndex, 0] - output
                weights = weights + alpha* train_x[randIndex, :].transpose() * error
                del(dataIndex[randIndex])
        else:
            raise NameError('Not support optimize method type!')

    print 'Congratulations, training complete! Took %s!' % (time.time()- startTime)
    return weights

def testLogRegress(weights, test_x, test_y):
    numSamples, numFeatures = shape(test_x)
    matchCount = 0
    for i in xrange(numSamples):
        predict = sigmoid(test_x[i, :]*weights)[0, 0] > 0.5
##        pre = sigmoid(test_x[i, :]*weights)
##        print pre
        if predict ==bool(test_y[i, 0]):
            matchCount += 1
    accuracy = float(matchCount)/numSamples
    return accuracy

def showLogRegress(weights, train_x, train_y):
    numSamples, numFeatures = shape(train_x)
    if numFeatures !=3:
        print "Sorry! I can not draw because the dimension of your data is not 2!"
        return 1

    # draw all samples
    for i in xrange(numSamples):
        if int(train_y[i, 0]) == 0:
            plt.plot(train_x[i,1], train_x[i, 2], 'or')
        elif int(train_y[i, 0]) == 1:
            plt.plot(train_x[i, 1], train_x[i, 2], 'ob')

    # draw the classify line
    min_x = min(train_x[:,1])[0,0]
    max_x = max(train_x[:,1])[0,0]
    weights = weights.getA()
    y_min_x = float(-weights[0] - weights[1]*min_x)/weights[2]
    y_max_x = float(-weights[0] - weights[1]*max_x)/weights[2]
    plt.plot([min_x, max_x], [y_min_x, y_max_x], '-g')
    plt.xlabel('X1')
    plt.ylabel('X2')
    plt.show()