-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathmain_old.py
More file actions
122 lines (103 loc) · 3.65 KB
/
main_old.py
File metadata and controls
122 lines (103 loc) · 3.65 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
from gensim.models import *
import numpy as np
import matplotlib.pyplot as plt
from scipy.linalg import orth
def initialize(dim_D, dim_W):
dictionary = np.zeros((dim_D, dim_D))
for i in range(dim_D):
dictionary[i][i] = 1
W = np.zeros((dim_W, dim_W))
#W = np.random.randn(dim_W, dim_W)
return dictionary, W
def normalize(X):
mean_X = np.mean(X, axis=0)
std_X = np.std(X, axis=0)
X = (X - mean_X) / std_X
return X
def propagate(w, X, Z):
cost = np.sum((np.dot(X, w) - Z)**2)
dw = 2 * np.dot(X.T, (np.dot(X, w) - Z)) #########################
assert (dw.shape == w.shape)
cost = np.squeeze(cost)
assert (cost.shape == ())
return dw, cost
def optimize(w, X, Z, num_iterations, learning_rate, print_cost = False):
costs = []
for i in range(num_iterations):
dw, cost = propagate(w, X, Z)
w = w - learning_rate * dw
if i%100 == 0:
costs.append(cost)
if i%100 == 0 and print_cost:
print('Cost after iteration %i: %f'% (i,cost))
params = {'w': w,
'dw': dw}
return params, costs
def predict(w, X):
m = X.shape[0]
dim = X.shape[1]
Z_prediction = np.dot(X, w)
assert (Z_prediction.shape == (m,dim))
return Z_prediction
def train(model_source, model_target, vocab_array, num_iterations=2000, learning_rate=0.05, print_cost = False):
vec_EN = np.zeros((1, 300))
vec_IT = np.zeros((1, 300))
D, w = initialize(25, 300)
true_word = 0.0
m = vocab_array.shape[0]
for i in range(m):
word_EN = vocab_array[i][0]
row_EN = model_source.wv[word_EN]
row_EN.shape = (1, 300)
vec_EN = np.row_stack((vec_EN, row_EN))
word_IT = vocab_array[i][1]
row_IT = model_target.wv[word_IT]
row_IT.shape = (1, 300)
vec_IT = np.row_stack((vec_IT, row_IT))
vec_EN = np.delete(vec_EN, 0, 0) #25*300
vec_IT = np.delete(vec_IT, 0, 0) #25*300
params, costs = optimize(w, vec_EN, vec_IT, num_iterations, learning_rate, print_cost)
w = params['w']
Z_prediction = predict(w, vec_EN)
for i in range(Z_prediction.shape[0]):
vec_prediction = Z_prediction[i]
vec_prediction.shape = (300, )
e = model_target.wv.similar_by_vector(vec_prediction, topn=1, restrict_vocab=None)
#print(e[0][0])
if e[0][0] == vocab_array[i][1]:
true_word += 1
print('Train accuracy: {}%'.format(true_word/m*100))
d = {'costs': costs,
'w': w,
'learning_rate': learning_rate}
return d
if __name__ == '__main__':
model_EN = KeyedVectors.load_word2vec_format('model_EN/v7_EN_nor.vec', binary=False)
print('英文模型加载完毕!')
model_IT = KeyedVectors.load_word2vec_format('model_ES/v7_ES_nor.vec', binary=False)
print('意大利文模型加载完毕!')
#model_EN = Word2Vec.load("model_EN/v7_EN.model")
#model_IT = Word2Vec.load("model_ES/v7_ES.model")
vocab = np.load('vocab/vocabEN-ES.npy')
vocab_train = np.array([['','']])
for i in range(100):
row = np.array([vocab[i][0], vocab[i][1]])
vocab_train = np.row_stack((vocab_train, row))
vocab_train = np.delete(vocab_train, 0, 0)
print(vocab_train)
learning_rates = [0.001,0.0001]
models = {}
for i in learning_rates:
print('learn_rate is :' + str(i))
models[str(i)] = train(model_EN, model_IT, vocab_train, num_iterations=1000, learning_rate=i, print_cost=False)
print('\n' + '---------------------------------------------------------------------' + '\n')
for i in learning_rates:
plt.plot(np.squeeze(models[str(i)]["costs"]), label=str(models[str(i)]["learning_rate"]))
#d = train(model_EN, model_IT, vocab_train, num_iterations=3000, learning_rate=0.001, print_cost=True)
plt.ylabel('cost')
plt.xlabel('iterations (per hundreds)')
#plt.title("Learning rate =" + str(learning_rates))
legend = plt.legend(loc='upper center', shadow=True)
frame = legend.get_frame()
frame.set_facecolor('0.90')
plt.show()