From 2b1604488182f548b68bb0c2a25b0bb178d596c9 Mon Sep 17 00:00:00 2001 From: yuanhaorannnnnn <37274406+yuanhaorannnnnn@users.noreply.github.com> Date: Sun, 11 Mar 2018 23:03:34 +0100 Subject: [PATCH 01/18] Update FlappyAgent.py --- RandomBird/FlappyAgent.py | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/RandomBird/FlappyAgent.py b/RandomBird/FlappyAgent.py index 9f3ec84..00fb802 100644 --- a/RandomBird/FlappyAgent.py +++ b/RandomBird/FlappyAgent.py @@ -1,9 +1,17 @@ import numpy as np +Q=np.load("trained_Q.npy") def FlappyPolicy(state, screen): + + # Using "state" + y = int(288 + (state['next_pipe_top_y'] + state['next_pipe_bottom_y']) * 0.5 - state['player_y']) + x = int(state['next_pipe_dist_to_player']) + v = int(state['player_vel']) + action=None - if(np.random.randint(0,2)<1): - action=119 + action = int(np.argmax(Q[y][x][v][:])) + if (action == 1): + action = 119 + return action - From 59e43c74c6617942fbb25f004e6933ac0bb3e887 Mon Sep 17 00:00:00 2001 From: yuanhaorannnnnn <37274406+yuanhaorannnnnn@users.noreply.github.com> Date: Sun, 11 Mar 2018 23:04:05 +0100 Subject: [PATCH 02/18] Create training.py --- RandomBird/training.py | 1 + 1 file changed, 1 insertion(+) create mode 100644 RandomBird/training.py diff --git a/RandomBird/training.py b/RandomBird/training.py new file mode 100644 index 0000000..8b13789 --- /dev/null +++ b/RandomBird/training.py @@ -0,0 +1 @@ + From b97aafbcbca9ae926d54a030ad6baa10b5da7dde Mon Sep 17 00:00:00 2001 From: yuanhaorannnnnn <37274406+yuanhaorannnnnn@users.noreply.github.com> Date: Sun, 11 Mar 2018 23:04:37 +0100 Subject: [PATCH 03/18] Update training.py --- RandomBird/training.py | 82 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 82 insertions(+) diff --git a/RandomBird/training.py b/RandomBird/training.py index 8b13789..56dd84c 100644 --- a/RandomBird/training.py +++ b/RandomBird/training.py @@ -1 +1,83 @@ + +from ple.games.flappybird import FlappyBird +from ple import PLE +import numpy as np +import matplotlib.pyplot as plt + +game = FlappyBird() +p = PLE(game, fps=30, frame_skip=1, num_steps=1, force_fps=True, display_screen=False) + +p.init() +reward = 0.0 +nb_games = 1000 +cumulated = np.zeros((nb_games)) + +# parameter of modele +r_1 = 1 +r_2 = -100 +alpha = 0.04 + +x_wall = np.zeros((40)) +y_wall = np.zeros((40)) +v_wall = np.zeros((40)) +a_wall = np.zeros((40)) +#Q(y,x,v,a) ,a is set of action +Q = np.zeros((512,300, 21, 2)) +## fly if y < 273 +Q[255:511,:,:,0] = 0.1 +Q[0:254,:,:,1] = 0.1 +# between the pipe +Q[:,8,:,1] = 0.2 # in the middle: jump +Q[216:256,120:144,:,1] = 0.2 # jump if too low +Q[256:306,120:144,:,0] = 0.2 + +for i in range(nb_games): + p.reset_game() + + while(not p.game_over()): + state = game.getGameState() + screen = p.getScreenRGB() + #instead of using absolute position of pipe, use relative position + y = int(288 + (state['next_pipe_top_y'] + state['next_pipe_bottom_y']) * 0.5 - state['player_y']) + x = int(state['next_pipe_dist_to_player']) + v = int(state['player_vel']) + + #greedy policy + action = int(np.argmax(Q[y][x][v][:])) + if (action == 1): + action_value = 119 + else: action_value=None + if (i>1): + for j in range(37-1, 0, -1): + x_wall[j] = int(x_wall[j-1]) + y_wall[j] = int(y_wall[j-1]) + v_wall[j] = int(v_wall[j-1]) + a_wall[j] = int(a_wall[j-1]) + x_wall[0] = int(x) + y_wall[0] = int(y) + v_wall[0] = int(v) + a_wall[0] = int(action) + + #reward is +1 if bird fly by the pipe + reward = p.act(action_value) + my_reward=0 + if (reward==1): + my_reward = r_1 + cumulated[i] += 1 + for j in range(1, 40): + Q[int(y_wall[j]),int(x_wall[j]),int(v_wall[j]),int(a_wall[j])] += alpha * (my_reward + np.max(Q[int(y_wall[j-1]),int(x_wall[j-1]),int(v_wall[j-1]),int(a_wall[j-1])])) + + # bad result : -100 + if (reward<0): + my_reward = r_2 + if (x==20): + for j in range(0, 27): + Q[int(y_wall[j]),int(x_wall[j]),int(v_wall[j]),int(a_wall[j])] += alpha * (my_reward + np.max(Q[int(y_wall[j-1]),int(x_wall[j-1]),int(v_wall[j-1]),int(a_wall[j-1])])) + else: + for j in range(0, 6): + Q[int(y_wall[j]),int(x_wall[j]),int(v_wall[j]),int(a_wall[j])] += alpha * (my_reward + np.max(Q[int(y_wall[j-1]),int(x_wall[j-1]),int(v_wall[j-1]),int(a_wall[j-1])])) + +np.save('trained_Q', Q) + + From bafe6b2439c106963c59fc6bf9c8e65679e3eecc Mon Sep 17 00:00:00 2001 From: yuanhaorannnnnn <37274406+yuanhaorannnnnn@users.noreply.github.com> Date: Sun, 11 Mar 2018 23:13:11 +0100 Subject: [PATCH 04/18] Create run.py --- yuanhaoran/run.py | 1 + 1 file changed, 1 insertion(+) create mode 100644 yuanhaoran/run.py diff --git a/yuanhaoran/run.py b/yuanhaoran/run.py new file mode 100644 index 0000000..8b13789 --- /dev/null +++ b/yuanhaoran/run.py @@ -0,0 +1 @@ + From 1ec758ddae7e0a33d303be80d4e85e1b6e62cf8c Mon Sep 17 00:00:00 2001 From: yuanhaorannnnnn <37274406+yuanhaorannnnnn@users.noreply.github.com> Date: Sun, 11 Mar 2018 23:13:47 +0100 Subject: [PATCH 05/18] Update run.py --- yuanhaoran/run.py | 29 +++++++++++++++++++++++++++++ 1 file changed, 29 insertions(+) diff --git a/yuanhaoran/run.py b/yuanhaoran/run.py index 8b13789..a08f27c 100644 --- a/yuanhaoran/run.py +++ b/yuanhaoran/run.py @@ -1 +1,30 @@ +# You're not allowed to change this file +from ple.games.flappybird import FlappyBird +from ple import PLE +import numpy as np +from FlappyAgent import FlappyPolicy + +game = FlappyBird(graphics="fixed") # use "fancy" for full background, random bird color and random pipe color, use "fixed" (default) for black background and constant bird and pipe colors. +p = PLE(game, fps=30, frame_skip=1, num_steps=1, force_fps=False, display_screen=True) +# Note: if you want to see you agent act in real time, set force_fps to False. But don't use this setting for learning, just for display purposes. + +p.init() +reward = 0.0 + +nb_games = 100 +cumulated = np.zeros((nb_games)) + +for i in range(nb_games): + p.reset_game() + + while(not p.game_over()): + state = game.getGameState() + screen = p.getScreenRGB() + action=FlappyPolicy(state, screen) ### Your job is to define this function. + + reward = p.act(action) + cumulated[i] = cumulated[i] + reward + +average_score = np.mean(cumulated) +max_score = np.max(cumulated) From 036236fad48465539325c6cdf2b9bd54ae952af1 Mon Sep 17 00:00:00 2001 From: yuanhaorannnnnn <37274406+yuanhaorannnnnn@users.noreply.github.com> Date: Sun, 11 Mar 2018 23:14:02 +0100 Subject: [PATCH 06/18] Create training.py --- yuanhaoran/training.py | 1 + 1 file changed, 1 insertion(+) create mode 100644 yuanhaoran/training.py diff --git a/yuanhaoran/training.py b/yuanhaoran/training.py new file mode 100644 index 0000000..8b13789 --- /dev/null +++ b/yuanhaoran/training.py @@ -0,0 +1 @@ + From a05fb5843e9b88de6ffedc591f00be54f65a42cf Mon Sep 17 00:00:00 2001 From: yuanhaorannnnnn <37274406+yuanhaorannnnnn@users.noreply.github.com> Date: Sun, 11 Mar 2018 23:14:12 +0100 Subject: [PATCH 07/18] Create FlappyAgent.py --- yuanhaoran/FlappyAgent.py | 1 + 1 file changed, 1 insertion(+) create mode 100644 yuanhaoran/FlappyAgent.py diff --git a/yuanhaoran/FlappyAgent.py b/yuanhaoran/FlappyAgent.py new file mode 100644 index 0000000..8b13789 --- /dev/null +++ b/yuanhaoran/FlappyAgent.py @@ -0,0 +1 @@ + From 8e06886cc93d26360bbe734123dbf1f1b40f1343 Mon Sep 17 00:00:00 2001 From: yuanhaorannnnnn <37274406+yuanhaorannnnnn@users.noreply.github.com> Date: Sun, 11 Mar 2018 23:14:35 +0100 Subject: [PATCH 08/18] Update FlappyAgent.py --- yuanhaoran/FlappyAgent.py | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/yuanhaoran/FlappyAgent.py b/yuanhaoran/FlappyAgent.py index 8b13789..3574457 100644 --- a/yuanhaoran/FlappyAgent.py +++ b/yuanhaoran/FlappyAgent.py @@ -1 +1,17 @@ +import numpy as np +Q=np.load("trained_Q.npy") + +def FlappyPolicy(state, screen): + + # Using "state" + y = int(288 + (state['next_pipe_top_y'] + state['next_pipe_bottom_y']) * 0.5 - state['player_y']) + x = int(state['next_pipe_dist_to_player']) + v = int(state['player_vel']) + + action=None + action = int(np.argmax(Q[y][x][v][:])) + if (action == 1): + action = 119 + + return action From bc6d2cf94274629df823b57c992728b8b84ad034 Mon Sep 17 00:00:00 2001 From: yuanhaorannnnnn <37274406+yuanhaorannnnnn@users.noreply.github.com> Date: Sun, 11 Mar 2018 23:15:08 +0100 Subject: [PATCH 09/18] Update training.py --- RandomBird/training.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/RandomBird/training.py b/RandomBird/training.py index 56dd84c..dc94b9f 100644 --- a/RandomBird/training.py +++ b/RandomBird/training.py @@ -10,7 +10,7 @@ p.init() reward = 0.0 -nb_games = 1000 +nb_games = 10000 cumulated = np.zeros((nb_games)) # parameter of modele From 1ffcd21d06ef4220ebaa8c810bf462ce773e0ce2 Mon Sep 17 00:00:00 2001 From: yuanhaorannnnnn <37274406+yuanhaorannnnnn@users.noreply.github.com> Date: Sun, 11 Mar 2018 23:15:19 +0100 Subject: [PATCH 10/18] Update training.py --- yuanhaoran/training.py | 80 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 80 insertions(+) diff --git a/yuanhaoran/training.py b/yuanhaoran/training.py index 8b13789..eeec3f1 100644 --- a/yuanhaoran/training.py +++ b/yuanhaoran/training.py @@ -1 +1,81 @@ +from ple.games.flappybird import FlappyBird +from ple import PLE +import numpy as np +import matplotlib.pyplot as plt + +game = FlappyBird() +p = PLE(game, fps=30, frame_skip=1, num_steps=1, force_fps=True, display_screen=False) + +p.init() +reward = 0.0 +nb_games = 1000 +cumulated = np.zeros((nb_games)) + +# parameter of modele +r_1 = 1 +r_2 = -100 +alpha = 0.04 + +x_wall = np.zeros((40)) +y_wall = np.zeros((40)) +v_wall = np.zeros((40)) +a_wall = np.zeros((40)) +#Q(y,x,v,a) ,a is set of action +Q = np.zeros((512,300, 21, 2)) +## fly if y < 273 +Q[255:511,:,:,0] = 0.1 +Q[0:254,:,:,1] = 0.1 +# between the pipe +Q[:,8,:,1] = 0.2 # in the middle: jump +Q[216:256,120:144,:,1] = 0.2 # jump if too low +Q[256:306,120:144,:,0] = 0.2 + +for i in range(nb_games): + p.reset_game() + + while(not p.game_over()): + state = game.getGameState() + screen = p.getScreenRGB() + #instead of using absolute position of pipe, use relative position + y = int(288 + (state['next_pipe_top_y'] + state['next_pipe_bottom_y']) * 0.5 - state['player_y']) + x = int(state['next_pipe_dist_to_player']) + v = int(state['player_vel']) + + #greedy policy + action = int(np.argmax(Q[y][x][v][:])) + if (action == 1): + action_value = 119 + else: action_value=None + if (i>1): + for j in range(37-1, 0, -1): + x_wall[j] = int(x_wall[j-1]) + y_wall[j] = int(y_wall[j-1]) + v_wall[j] = int(v_wall[j-1]) + a_wall[j] = int(a_wall[j-1]) + x_wall[0] = int(x) + y_wall[0] = int(y) + v_wall[0] = int(v) + a_wall[0] = int(action) + + #reward is +1 if bird fly by the pipe + reward = p.act(action_value) + my_reward=0 + if (reward==1): + my_reward = r_1 + cumulated[i] += 1 + for j in range(1, 40): + Q[int(y_wall[j]),int(x_wall[j]),int(v_wall[j]),int(a_wall[j])] += alpha * (my_reward + np.max(Q[int(y_wall[j-1]),int(x_wall[j-1]),int(v_wall[j-1]),int(a_wall[j-1])])) + + # bad result : -100 + if (reward<0): + my_reward = r_2 + if (x==20): + for j in range(0, 27): + Q[int(y_wall[j]),int(x_wall[j]),int(v_wall[j]),int(a_wall[j])] += alpha * (my_reward + np.max(Q[int(y_wall[j-1]),int(x_wall[j-1]),int(v_wall[j-1]),int(a_wall[j-1])])) + else: + for j in range(0, 6): + Q[int(y_wall[j]),int(x_wall[j]),int(v_wall[j]),int(a_wall[j])] += alpha * (my_reward + np.max(Q[int(y_wall[j-1]),int(x_wall[j-1]),int(v_wall[j-1]),int(a_wall[j-1])])) + +np.save('trained_Q', Q) + From b603d99785efe04af5f753cff70845d5dcfb5457 Mon Sep 17 00:00:00 2001 From: yuanhaorannnnnn <37274406+yuanhaorannnnnn@users.noreply.github.com> Date: Sun, 11 Mar 2018 23:22:30 +0100 Subject: [PATCH 11/18] Create hu'hhuh --- RandomBird/hu'hhuh | 1 + 1 file changed, 1 insertion(+) create mode 100644 RandomBird/hu'hhuh diff --git a/RandomBird/hu'hhuh b/RandomBird/hu'hhuh new file mode 100644 index 0000000..8b13789 --- /dev/null +++ b/RandomBird/hu'hhuh @@ -0,0 +1 @@ + From a7da7d52a87f40023f4f76eb4574b5e1e3b9df3c Mon Sep 17 00:00:00 2001 From: yuanhaorannnnnn <37274406+yuanhaorannnnnn@users.noreply.github.com> Date: Sun, 11 Mar 2018 23:22:45 +0100 Subject: [PATCH 12/18] Delete hu'hhuh --- RandomBird/hu'hhuh | 1 - 1 file changed, 1 deletion(-) delete mode 100644 RandomBird/hu'hhuh diff --git a/RandomBird/hu'hhuh b/RandomBird/hu'hhuh deleted file mode 100644 index 8b13789..0000000 --- a/RandomBird/hu'hhuh +++ /dev/null @@ -1 +0,0 @@ - From b2320b80af7781aca8aee4e8b6b661b45e6d4725 Mon Sep 17 00:00:00 2001 From: yuanhaorannnnnn <37274406+yuanhaorannnnnn@users.noreply.github.com> Date: Sun, 11 Mar 2018 23:22:58 +0100 Subject: [PATCH 13/18] Delete FlappyAgent.py --- RandomBird/FlappyAgent.py | 17 ----------------- 1 file changed, 17 deletions(-) delete mode 100644 RandomBird/FlappyAgent.py diff --git a/RandomBird/FlappyAgent.py b/RandomBird/FlappyAgent.py deleted file mode 100644 index 00fb802..0000000 --- a/RandomBird/FlappyAgent.py +++ /dev/null @@ -1,17 +0,0 @@ -import numpy as np -Q=np.load("trained_Q.npy") - -def FlappyPolicy(state, screen): - - # Using "state" - y = int(288 + (state['next_pipe_top_y'] + state['next_pipe_bottom_y']) * 0.5 - state['player_y']) - x = int(state['next_pipe_dist_to_player']) - v = int(state['player_vel']) - - action=None - action = int(np.argmax(Q[y][x][v][:])) - if (action == 1): - action = 119 - - return action - From 6deb77c56530b12c0104ae02299ff01a67067d1d Mon Sep 17 00:00:00 2001 From: yuanhaorannnnnn <37274406+yuanhaorannnnnn@users.noreply.github.com> Date: Sun, 11 Mar 2018 23:23:05 +0100 Subject: [PATCH 14/18] Delete run.py --- RandomBird/run.py | 29 ----------------------------- 1 file changed, 29 deletions(-) delete mode 100644 RandomBird/run.py diff --git a/RandomBird/run.py b/RandomBird/run.py deleted file mode 100644 index 39b5801..0000000 --- a/RandomBird/run.py +++ /dev/null @@ -1,29 +0,0 @@ -# You're not allowed to change this file -from ple.games.flappybird import FlappyBird -from ple import PLE -import numpy as np -from FlappyAgent import FlappyPolicy - -game = FlappyBird(graphics="fixed") # use "fancy" for full background, random bird color and random pipe color, use "fixed" (default) for black background and constant bird and pipe colors. -p = PLE(game, fps=30, frame_skip=1, num_steps=1, force_fps=False, display_screen=True) -# Note: if you want to see you agent act in real time, set force_fps to False. But don't use this setting for learning, just for display purposes. - -p.init() -reward = 0.0 - -nb_games = 100 -cumulated = np.zeros((nb_games)) - -for i in range(nb_games): - p.reset_game() - - while(not p.game_over()): - state = game.getGameState() - screen = p.getScreenRGB() - action=FlappyPolicy(state, screen) ### Your job is to define this function. - - reward = p.act(action) - cumulated[i] = cumulated[i] + reward - -average_score = np.mean(cumulated) -max_score = np.max(cumulated) From 2387f695919be1edf1099f0a1fdcefc0e0e378df Mon Sep 17 00:00:00 2001 From: yuanhaorannnnnn <37274406+yuanhaorannnnnn@users.noreply.github.com> Date: Sun, 11 Mar 2018 23:23:13 +0100 Subject: [PATCH 15/18] Delete training.py --- RandomBird/training.py | 83 ------------------------------------------ 1 file changed, 83 deletions(-) delete mode 100644 RandomBird/training.py diff --git a/RandomBird/training.py b/RandomBird/training.py deleted file mode 100644 index dc94b9f..0000000 --- a/RandomBird/training.py +++ /dev/null @@ -1,83 +0,0 @@ - - -from ple.games.flappybird import FlappyBird -from ple import PLE -import numpy as np -import matplotlib.pyplot as plt - -game = FlappyBird() -p = PLE(game, fps=30, frame_skip=1, num_steps=1, force_fps=True, display_screen=False) - -p.init() -reward = 0.0 -nb_games = 10000 -cumulated = np.zeros((nb_games)) - -# parameter of modele -r_1 = 1 -r_2 = -100 -alpha = 0.04 - -x_wall = np.zeros((40)) -y_wall = np.zeros((40)) -v_wall = np.zeros((40)) -a_wall = np.zeros((40)) -#Q(y,x,v,a) ,a is set of action -Q = np.zeros((512,300, 21, 2)) -## fly if y < 273 -Q[255:511,:,:,0] = 0.1 -Q[0:254,:,:,1] = 0.1 -# between the pipe -Q[:,8,:,1] = 0.2 # in the middle: jump -Q[216:256,120:144,:,1] = 0.2 # jump if too low -Q[256:306,120:144,:,0] = 0.2 - -for i in range(nb_games): - p.reset_game() - - while(not p.game_over()): - state = game.getGameState() - screen = p.getScreenRGB() - #instead of using absolute position of pipe, use relative position - y = int(288 + (state['next_pipe_top_y'] + state['next_pipe_bottom_y']) * 0.5 - state['player_y']) - x = int(state['next_pipe_dist_to_player']) - v = int(state['player_vel']) - - #greedy policy - action = int(np.argmax(Q[y][x][v][:])) - if (action == 1): - action_value = 119 - else: action_value=None - if (i>1): - for j in range(37-1, 0, -1): - x_wall[j] = int(x_wall[j-1]) - y_wall[j] = int(y_wall[j-1]) - v_wall[j] = int(v_wall[j-1]) - a_wall[j] = int(a_wall[j-1]) - x_wall[0] = int(x) - y_wall[0] = int(y) - v_wall[0] = int(v) - a_wall[0] = int(action) - - #reward is +1 if bird fly by the pipe - reward = p.act(action_value) - my_reward=0 - if (reward==1): - my_reward = r_1 - cumulated[i] += 1 - for j in range(1, 40): - Q[int(y_wall[j]),int(x_wall[j]),int(v_wall[j]),int(a_wall[j])] += alpha * (my_reward + np.max(Q[int(y_wall[j-1]),int(x_wall[j-1]),int(v_wall[j-1]),int(a_wall[j-1])])) - - # bad result : -100 - if (reward<0): - my_reward = r_2 - if (x==20): - for j in range(0, 27): - Q[int(y_wall[j]),int(x_wall[j]),int(v_wall[j]),int(a_wall[j])] += alpha * (my_reward + np.max(Q[int(y_wall[j-1]),int(x_wall[j-1]),int(v_wall[j-1]),int(a_wall[j-1])])) - else: - for j in range(0, 6): - Q[int(y_wall[j]),int(x_wall[j]),int(v_wall[j]),int(a_wall[j])] += alpha * (my_reward + np.max(Q[int(y_wall[j-1]),int(x_wall[j-1]),int(v_wall[j-1]),int(a_wall[j-1])])) - -np.save('trained_Q', Q) - - From d1363212ca58be8e28ad9117b06f1b70a64a2daf Mon Sep 17 00:00:00 2001 From: yuanhaorannnnnn <37274406+yuanhaorannnnnn@users.noreply.github.com> Date: Sun, 11 Mar 2018 23:23:42 +0100 Subject: [PATCH 16/18] Update FlappyAgent.py --- yuanhaoran/FlappyAgent.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/yuanhaoran/FlappyAgent.py b/yuanhaoran/FlappyAgent.py index 3574457..bc327e6 100644 --- a/yuanhaoran/FlappyAgent.py +++ b/yuanhaoran/FlappyAgent.py @@ -5,7 +5,7 @@ def FlappyPolicy(state, screen): # Using "state" - y = int(288 + (state['next_pipe_top_y'] + state['next_pipe_bottom_y']) * 0.5 - state['player_y']) + y = int(256 + (state['next_pipe_top_y'] + state['next_pipe_bottom_y']) * 0.5 - state['player_y']) x = int(state['next_pipe_dist_to_player']) v = int(state['player_vel']) From 0a93a61ae4800db10efd6afae47edb810d91d14e Mon Sep 17 00:00:00 2001 From: yuanhaorannnnnn <37274406+yuanhaorannnnnn@users.noreply.github.com> Date: Sun, 11 Mar 2018 23:24:44 +0100 Subject: [PATCH 17/18] Create README.md --- yuanhaoran/README.md | 1 + 1 file changed, 1 insertion(+) create mode 100644 yuanhaoran/README.md diff --git a/yuanhaoran/README.md b/yuanhaoran/README.md new file mode 100644 index 0000000..8b13789 --- /dev/null +++ b/yuanhaoran/README.md @@ -0,0 +1 @@ + From 27f72127d335a32c60a65fde9175b435d03c1c28 Mon Sep 17 00:00:00 2001 From: yuanhaorannnnnn <37274406+yuanhaorannnnnn@users.noreply.github.com> Date: Sun, 11 Mar 2018 23:34:28 +0100 Subject: [PATCH 18/18] Update README.md --- yuanhaoran/README.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/yuanhaoran/README.md b/yuanhaoran/README.md index 8b13789..7d70fd3 100644 --- a/yuanhaoran/README.md +++ b/yuanhaoran/README.md @@ -1 +1,3 @@ +This is the implementation of the game FlappyBird with Q-learning +At first, you should install the PLE framework, and then run the training.py to get the trained data and finally run the run.py to play the game.