-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathSMO.py
More file actions
109 lines (99 loc) · 3.31 KB
/
SMO.py
File metadata and controls
109 lines (99 loc) · 3.31 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
# coding=utf-8
import random
import numpy as np
def clip(alpha, L, H):
'''
修剪alpha的值到L和H之间.
'''
if alpha < L:
return L
elif alpha > H:
return H
else:
return alpha
def select_j(i, m):
'''
在m中随机选择除了i之外剩余的数
'''
l = list(range(m))
seq = l[: i] + l[i + 1:]
return random.choice(seq)
def get_w(alphas, dataset, labels):
''' 通过已知数据点和拉格朗日乘子获得分割超平面参数w
'''
alphas, dataset, labels = np.array(alphas), np.array(dataset), np.array(labels)
yx = labels.reshape(1, -1).T * np.array([1, 1]) * dataset
w = np.dot(yx.T, alphas)
return w.tolist()
def smo(dataset, labels, C, max_iter):
'''
:param dataset: 所有特征数据向量
:param labels: 所有的数据标签
:param C: 软间隔常数, 0 <= alpha_i <= C
:param max_iter: 外层循环最大迭代次数
'''
dataset = np.array(dataset)
m, n = dataset.shape
labels = np.array(labels)
# 初始化参数
alphas = np.zeros(m)
b = 0
it = 0
def f(x):
"SVM分类器函数 y = w^Tx + b"
# Kernel function vector.
x = np.matrix(x).T
data = np.matrix(dataset)
ks = data * x
# Predictive value.
wx = np.matrix(alphas * labels) * ks
fx = wx + b
return fx[0, 0]
while it < max_iter:
pair_changed = 0
for i in range(m):
a_i, x_i, y_i = alphas[i], dataset[i], labels[i]
fx_i = f(x_i)
E_i = fx_i - y_i
#这里不采用书上的选取alpha的方法,因为实际效果上随机选取表现得竟然更好
j = select_j(i, m)
a_j, x_j, y_j = alphas[j], dataset[j], labels[j]
fx_j = f(x_j)
E_j = fx_j - y_j
K_ii, K_jj, K_ij = np.dot(x_i, x_i), np.dot(x_j, x_j), np.dot(x_i, x_j)
eta = K_ii + K_jj - 2 * K_ij
if eta <= 0:
print('WARNING!!! eta <= 0')
continue
# 获取更新的alpha对
a_i_old, a_j_old = a_i, a_j
a_j_new = a_j_old + y_j * (E_i - E_j) / eta
# 对alpha进行修剪
if y_i != y_j:
L = max(0, a_j_old - a_i_old)
H = min(C, C + a_j_old - a_i_old)
else:
L = max(0, a_i_old + a_j_old - C)
H = min(C, a_j_old + a_i_old)
a_j_new = clip(a_j_new, L, H)
a_i_new = a_i_old + y_i * y_j * (a_j_old - a_j_new)
if abs(a_j_new - a_j_old) < 0.00001:
continue
alphas[i], alphas[j] = a_i_new, a_j_new
# 更新阈值b
b_i = -E_i - y_i * K_ii * (a_i_new - a_i_old) - y_j * K_ij * (a_j_new - a_j_old) + b
b_j = -E_j - y_i * K_ij * (a_i_new - a_i_old) - y_j * K_jj * (a_j_new - a_j_old) + b
if 0 < a_i_new < C:
b = b_i
elif 0 < a_j_new < C:
b = b_j
else:
b = (b_i + b_j) / 2
pair_changed += 1
print('INFO iteration:{} i:{} pair_changed:{}'.format(it, i, pair_changed))
if pair_changed == 0:
it += 1
else:
it = 0
print('iteration number: {}'.format(it))
return alphas, b