Python-Machine-Learning/classification/demo/logistic_regression_demo.py at master · CescWang1991/Python-Machine-Learning · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
from classification.function import *
import random
import numpy as np

def sigmoid(inX):
    return 1.0/(1+exp(-inX))


def gradAscent(features, labels):
    m, n = shape(features)
    alpha = 0.01
    maxCycles = 500
    weights = ones((n, 1))
    for k in range(maxCycles):
        h = sigmoid(np.dot(features, weights))
        error = (labels - h)
        weights = weights + alpha * np.dot(features.transpose(), error)
    return weights

def stocGradAscent(features, labels):
    numIters = 150
    m, n = shape(features)
    weights = ones(n)
    for j in range(numIters):
        dataIndex = list(range(m))
        for i in range(m):
            alpha = 4 / (1.0 + j + i) + 0.01
            randIndex = int(random.uniform(0, len(dataIndex)))
            h = sigmoid(np.dot(features[randIndex], weights))
            error = labels[randIndex] - h
            weights = weights + alpha * error * features[randIndex]
            del(dataIndex[randIndex])
    newWeights = zeros((n, 1))
    for k in range(n):
        newWeights[k][0] = weights[k]
    return newWeights


def classify(testFeat, testLabels, weights):
    values = sigmoid(np.dot(testFeat, weights))
    predictions = zeros((len(testLabels), 1))
    errorCount = 0.0
    for i in range(len(testLabels)):
        if values[i] > 0.5:
            predictions[i] = 1
        else:
            predictions[i] = 0
        print("the classifier came back with: %d, the real answer is: %d" % (predictions[i], testLabels[i]))
        if (predictions[i] != testLabels[i]):
            errorCount += 1.0
    print("the total error rate is: %f" % (errorCount / float(len(testLabels))))


filename = '../data/sample_binary_classification_data.txt'
labels, features = file2matrix(filename)
hoRatio = 0.20
m, n = shape(features)
testFeat = features[0:int(hoRatio*m)]
trainFeat = features[int(hoRatio*m):m]
testLabels = labels[0:int(hoRatio*m)]
trainLabels = labels[int(hoRatio*m):m]
w0 = gradAscent(trainFeat, trainLabels)
w1 = stocGradAscent(trainFeat, trainLabels)
print("classify with gradient ascent")
classify(testFeat, testLabels, w0)
print("\nclassify with stochastic gradient ascent")
classify(testFeat, testLabels, w1)