-
Notifications
You must be signed in to change notification settings - Fork 3
Expand file tree
/
Copy pathlearn.py
More file actions
executable file
·149 lines (113 loc) · 4.92 KB
/
learn.py
File metadata and controls
executable file
·149 lines (113 loc) · 4.92 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
# -*- coding: utf-8 -*-
from game import FEATURE_SIZE, HISTORY_SIZE
from model import DualNetwork
import numpy as np
import tensorflow as tf
def average_gradients(tower_grads):
average_grads = []
for grad_and_vars in zip(*tower_grads):
grads = []
for g, _ in grad_and_vars:
grads.append(tf.expand_dims(g, 0))
grad = tf.reduce_mean(tf.concat(grads, 0), 0)
v = grad_and_vars[0][1]
average_grads.append((grad, v))
return average_grads
def learn(fp_, ckpt_path="", lr_=1e-4, use_gpu=True, gpu_cnt=1):
device_name = "gpu" if use_gpu else "cpu"
with tf.get_default_graph().as_default(), tf.device("/cpu:0"):
# placeholders
f_list = []
p_list = []
r_list = []
for gpu_idx in range(gpu_cnt):
f_list.append(tf.placeholder(
"float", shape=[None, HISTORY_SIZE, FEATURE_SIZE],
name="feature_%d" % gpu_idx))
p_list.append(tf.placeholder(
"float", shape=[None, FEATURE_SIZE], name="prob_%d" % gpu_idx))
r_list.append(tf.placeholder(
"float", shape=[None], name="result_%d" % gpu_idx))
lr = tf.placeholder(tf.float32, shape=[], name="learning_rate")
# optimizer and network definition
opt = tf.train.MomentumOptimizer(lr, 0.9)
dn = DualNetwork()
# compute and apply gradients
tower_grads = []
with tf.variable_scope(tf.get_variable_scope()):
for gpu_idx in range(gpu_cnt):
with tf.device("/%s:%d" % (device_name, gpu_idx)):
tf.get_variable_scope().reuse_variables()
policy_, value_ = dn.model(
f_list[gpu_idx], temp=1.0, is_train=True)
policy_ = tf.clip_by_value(policy_, 1e-6, 1)
loss_p = -tf.reduce_mean(tf.reduce_sum(tf.multiply(
p_list[gpu_idx], tf.log(policy_)), 1))
loss_v = tf.reduce_mean(
tf.square(tf.subtract(value_, r_list[gpu_idx])))
if gpu_idx == 0:
vars_train = tf.get_collection("vars_train")
loss_l2 = tf.add_n([tf.nn.l2_loss(v) for v in vars_train])
loss = loss_p + loss_v + 1e-4 * loss_l2
tower_grads.append(opt.compute_gradients(loss))
train_op = opt.apply_gradients(average_gradients(tower_grads))
# accuracy
with tf.variable_scope(tf.get_variable_scope(), reuse=True):
with tf.device("/%s:0" % device_name):
f_acc = tf.placeholder(
"float", shape=[None, HISTORY_SIZE, FEATURE_SIZE], name="feature_acc")
p_acc = tf.placeholder(
"float", shape=[None, FEATURE_SIZE], name="prob_acc")
r_acc = tf.placeholder(
"float", shape=[None], name="result_acc")
p_, v_ = dn.model(f_acc, temp=1.0, is_train=False)
prediction = tf.equal(tf.argmax(p_, 1), tf.argmax(p_acc, 1))
accuracy_p = tf.reduce_mean(tf.cast(prediction, "float"))
accuracy_v = tf.reduce_mean(tf.square(tf.subtract(v_, r_acc)))
accuracy = (accuracy_p, accuracy_v)
sess = dn.create_sess(ckpt_path)
feed = fp_ # FeedPicker
feed_cnt = feed.size
# training settings
batch_cnt = min(100, feed_cnt)
total_epochs = 4
epoch_steps = feed_cnt // (batch_cnt * gpu_cnt) + 1
learning_rate = lr_
# training
for epoch_idx in range(total_epochs):
if epoch_idx > 0:
learning_rate *= 0.5
for _ in range(epoch_steps):
feed_dict_ = {}
feed_dict_[lr] = learning_rate
for gpu_idx in range(gpu_cnt):
batch = feed.next_batch(batch_cnt)
feed_dict_[f_list[gpu_idx]] = batch[0]
feed_dict_[p_list[gpu_idx]] = batch[1]
feed_dict_[r_list[gpu_idx]] = batch[2]
sess.run(train_op, feed_dict=feed_dict_)
# calculate accuracy
acc_batch_cnt = batch_cnt
acc_steps = feed.size // acc_batch_cnt
np.random.shuffle(feed._perm)
acc_sum = [0.0, 0.0]
str_log = ""
for _ in range(acc_steps):
acc_batch = feed.next_batch(acc_batch_cnt)
accur = sess.run(
accuracy, feed_dict={f_acc: acc_batch[0],
p_acc: acc_batch[1],
r_acc: acc_batch[2]})
acc_sum[0] += accur[0]
acc_sum[1] += accur[1]
acc_sum[0] *= 100.0 / acc_steps
acc_sum[1] *= 0.5 / acc_steps
print("train: accuracy=%3.1f[%%] mse=%.3f"
% (acc_sum[0], acc_sum[1]))
str_log += "%3.2f\t%.3f\t" % (acc_sum[0], acc_sum[1])
# save log
log_file = open("train_log.txt", "a")
log_file.write(str_log + "\n")
log_file.close()
# save ckpt file
dn.save_vars(sess, "ckpt/model")