-
Notifications
You must be signed in to change notification settings - Fork 5
Expand file tree
/
Copy pathPlay.py
More file actions
87 lines (77 loc) · 3.2 KB
/
Play.py
File metadata and controls
87 lines (77 loc) · 3.2 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
"""
This script creates nice gifs from the game state
It does not use multiprocessing
It needs a pretrained network
"""
import gym
from PongBot import create_model
import os
import numpy as np
import matplotlib.pyplot as plt
from matplotlib import animation
#import tensorflow as tf
#import tensorflow.keras.backend as K
#num_cores = 1
#num_CPU = 1
#num_GPU = 1
#config = tf.ConfigProto(intra_op_parallelism_threads=num_cores,
# inter_op_parallelism_threads=num_cores,
# allow_soft_placement=True,
# device_count = {'CPU' : num_CPU,'GPU' : num_GPU})
#session = tf.Session(config=config)
#K.set_session(session)
for score in range(21,-22, -1):
if os.path.isfile("pong_ppo_"+str(score)+".h5"):
ppo_net = create_model("pong_ppo_"+str(score)+".h5")
break
try: ppo_net
except: raise Exception('You need a pretrained net to do this')
def display_frames_as_gif(frames):
"""
Displays a list of frames as a gif, with controls
"""
try:
plt.figure(figsize=(frames[0].shape[1] / 72.0, frames[0].shape[0] / 72.0), dpi = 72)
patch = plt.imshow(frames[0])
plt.axis('off')
plt.subplots_adjust(left=0, right=1, top=1, bottom=0)
def animate(i):
patch.set_data(frames[i])
anim = animation.FuncAnimation(plt.gcf(), animate, frames = len(frames), interval=33)
anim.save('gifs/gameplay.gif')
except:
plt.figure(figsize=(frames[0].shape[1] / 72.0, frames[0].shape[0] / 72.0), dpi = 72)
patch = plt.imshow(frames[0])
plt.axis('off')
plt.subplots_adjust(left=0, right=1, top=1, bottom=0)
def animate(i):
patch.set_data(frames[i])
anim = animation.FuncAnimation(plt.gcf(), animate, frames = len(frames), interval=33)
anim.save('gifs/gameplay.gif', writer=animation.PillowWriter(fps=40))
DUMMY_ACTION, DUMMY_VALUE = np.zeros((1, 3)), np.zeros((1, 1))
def process_frame(frame): #cropped and renormalized
return ((frame[34:194,:,1]-72)*-1./164)[::2,::2]
frames=[]
env=gym.make('Pong-v0')
frames.append(env.reset())
observation = process_frame(frames[-1])
prev_observation=observation
done = False
while not done: #for pong, everytime reward!=0 can be seen as the end of a cycle, thus we train after them
env.render()
state = np.concatenate((prev_observation[:,:,np.newaxis], observation[:,:,np.newaxis]), axis=2) #we create an array containing the 2 last images of the game
predicted = ppo_net.predict([state.reshape(1,80,80,2), DUMMY_VALUE, DUMMY_ACTION])[0][0] #DUMMY sth are required by the network but never used, this is a hack
alea = np.random.random()
aleatar=0
action=1
for i in range(len(predicted)): #chose randomly an action according to the probability distribution given by the softmax
aleatar+=predicted[i]
if(alea<=aleatar):
action=i+1
break;
# action=np.argmax(predicted)+1
prev_observation=observation
observation, reward, done, info = env.step(action) #compute the next step of the game, see openai gym for information
frames.append(observation)
observation = process_frame(observation)
display_frames_as_gif(frames)