-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy path1003.py
More file actions
71 lines (50 loc) · 1.44 KB
/
1003.py
File metadata and controls
71 lines (50 loc) · 1.44 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
# Follows a random search idea from:
# http://kvfrans.com/simple-algoritms-for-solving-cartpole/
# import gym
import gym.wrappers
import numpy as np
import time
#
float_formatter = lambda x: "%+.6f" % x
np.set_printoptions(formatter={'float_kind': float_formatter})
#
# https://github.com/openai/gym/wiki/MountainCar-v0
#
env = gym.make('MountainCar-v0')
env = gym.wrappers.Monitor(env, 'tmp/mountaincar-experiment-1', force=True)
#
EP_MAX = 1000
T_MAX = 100
#
r_best = None
params_best = np.zeros(3)
#
def add_bias(w):
return np.append([1], w)
for ep in range(EP_MAX):
# noinspection PyRedeclaration
observation = env.reset()
# use only velocity
params = np.array([0, 0, 1000])
# random with bias
# params = (np.random.random(3) - 0.5) * 10
print("ep: {}".format(ep))
print("params: {}".format(params))
t = 0
r = 0
while True:
t += 1
env.render()
k = np.dot(add_bias(observation), params)
action = min(max(int(k), 0), 2)
print("time: {:3d}, observation: {}, action: {}".format(t, observation, action))
#
observation, reward, done, info = env.step(action)
r += reward
if done:
print("Episode finished after {} timesteps".format(t))
time.sleep(1)
break
if r_best is None or r > r_best:
print("Good! Episode got more reward ({}) than previous one ({})".format(r, r_best))
r_best = r