From 2b1604488182f548b68bb0c2a25b0bb178d596c9 Mon Sep 17 00:00:00 2001
From: yuanhaorannnnnn <37274406+yuanhaorannnnnn@users.noreply.github.com>
Date: Sun, 11 Mar 2018 23:03:34 +0100
Subject: [PATCH 01/18] Update FlappyAgent.py

---
 RandomBird/FlappyAgent.py | 14 +++++++++++---
 1 file changed, 11 insertions(+), 3 deletions(-)

diff --git a/RandomBird/FlappyAgent.py b/RandomBird/FlappyAgent.py
index 9f3ec84..00fb802 100644
--- a/RandomBird/FlappyAgent.py
+++ b/RandomBird/FlappyAgent.py
@@ -1,9 +1,17 @@
 import numpy as np
+Q=np.load("trained_Q.npy")
 
 def FlappyPolicy(state, screen):
+
+    # Using "state"
+    y = int(288 + (state['next_pipe_top_y'] + state['next_pipe_bottom_y']) * 0.5 - state['player_y'])
+    x = int(state['next_pipe_dist_to_player'])
+    v = int(state['player_vel'])
+                
     action=None
-    if(np.random.randint(0,2)<1):
-        action=119
+    action = int(np.argmax(Q[y][x][v][:]))
+    if (action == 1): 
+            action = 119
+          
     return action
 
-

From 59e43c74c6617942fbb25f004e6933ac0bb3e887 Mon Sep 17 00:00:00 2001
From: yuanhaorannnnnn <37274406+yuanhaorannnnnn@users.noreply.github.com>
Date: Sun, 11 Mar 2018 23:04:05 +0100
Subject: [PATCH 02/18] Create training.py

---
 RandomBird/training.py | 1 +
 1 file changed, 1 insertion(+)
 create mode 100644 RandomBird/training.py

diff --git a/RandomBird/training.py b/RandomBird/training.py
new file mode 100644
index 0000000..8b13789
--- /dev/null
+++ b/RandomBird/training.py
@@ -0,0 +1 @@
+

From b97aafbcbca9ae926d54a030ad6baa10b5da7dde Mon Sep 17 00:00:00 2001
From: yuanhaorannnnnn <37274406+yuanhaorannnnnn@users.noreply.github.com>
Date: Sun, 11 Mar 2018 23:04:37 +0100
Subject: [PATCH 03/18] Update training.py

---
 RandomBird/training.py | 82 ++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 82 insertions(+)

diff --git a/RandomBird/training.py b/RandomBird/training.py
index 8b13789..56dd84c 100644
--- a/RandomBird/training.py
+++ b/RandomBird/training.py
@@ -1 +1,83 @@
 
+
+from ple.games.flappybird import FlappyBird
+from ple import PLE
+import numpy as np
+import matplotlib.pyplot as plt
+
+game = FlappyBird()
+p = PLE(game, fps=30, frame_skip=1, num_steps=1, force_fps=True, display_screen=False)
+
+p.init()
+reward = 0.0
+nb_games = 1000
+cumulated = np.zeros((nb_games))
+
+# parameter of modele
+r_1 = 1
+r_2 = -100
+alpha = 0.04 
+
+x_wall = np.zeros((40))
+y_wall = np.zeros((40))
+v_wall = np.zeros((40))
+a_wall = np.zeros((40))
+#Q(y,x,v,a) ,a is set of action
+Q = np.zeros((512,300, 21, 2))
+## fly if y < 273
+Q[255:511,:,:,0] = 0.1
+Q[0:254,:,:,1] = 0.1
+# between the pipe
+Q[:,8,:,1] = 0.2  # in the middle: jump
+Q[216:256,120:144,:,1] = 0.2  # jump if too low
+Q[256:306,120:144,:,0] = 0.2
+
+for i in range(nb_games):
+    p.reset_game()
+            
+    while(not p.game_over()):
+        state = game.getGameState()
+        screen = p.getScreenRGB()
+        #instead of using absolute position of pipe, use relative position
+        y = int(288 + (state['next_pipe_top_y'] + state['next_pipe_bottom_y']) * 0.5 - state['player_y'])
+        x = int(state['next_pipe_dist_to_player'])
+        v = int(state['player_vel'])
+        
+        #greedy policy
+        action = int(np.argmax(Q[y][x][v][:]))
+        if (action == 1): 
+            action_value = 119 
+        else: action_value=None        
+        if (i>1):
+            for j in range(37-1, 0, -1):
+                x_wall[j] = int(x_wall[j-1])
+                y_wall[j] = int(y_wall[j-1])
+                v_wall[j] = int(v_wall[j-1])
+                a_wall[j] = int(a_wall[j-1])
+            x_wall[0] = int(x)
+            y_wall[0] = int(y)
+            v_wall[0] = int(v)
+            a_wall[0] = int(action)
+       
+        #reward is +1 if bird fly by the pipe
+        reward = p.act(action_value)
+        my_reward=0
+        if (reward==1):
+            my_reward = r_1
+            cumulated[i] += 1
+            for j in range(1, 40):
+                Q[int(y_wall[j]),int(x_wall[j]),int(v_wall[j]),int(a_wall[j])] += alpha * (my_reward + np.max(Q[int(y_wall[j-1]),int(x_wall[j-1]),int(v_wall[j-1]),int(a_wall[j-1])]))
+        
+        # bad result : -100
+        if (reward<0):
+            my_reward = r_2
+            if (x==20):
+                for j in range(0, 27):
+                    Q[int(y_wall[j]),int(x_wall[j]),int(v_wall[j]),int(a_wall[j])] += alpha * (my_reward + np.max(Q[int(y_wall[j-1]),int(x_wall[j-1]),int(v_wall[j-1]),int(a_wall[j-1])]))
+            else:
+               for j in range(0, 6):
+                    Q[int(y_wall[j]),int(x_wall[j]),int(v_wall[j]),int(a_wall[j])] += alpha * (my_reward + np.max(Q[int(y_wall[j-1]),int(x_wall[j-1]),int(v_wall[j-1]),int(a_wall[j-1])]))
+
+np.save('trained_Q', Q)
+    
+

From bafe6b2439c106963c59fc6bf9c8e65679e3eecc Mon Sep 17 00:00:00 2001
From: yuanhaorannnnnn <37274406+yuanhaorannnnnn@users.noreply.github.com>
Date: Sun, 11 Mar 2018 23:13:11 +0100
Subject: [PATCH 04/18] Create run.py

---
 yuanhaoran/run.py | 1 +
 1 file changed, 1 insertion(+)
 create mode 100644 yuanhaoran/run.py

diff --git a/yuanhaoran/run.py b/yuanhaoran/run.py
new file mode 100644
index 0000000..8b13789
--- /dev/null
+++ b/yuanhaoran/run.py
@@ -0,0 +1 @@
+

From 1ec758ddae7e0a33d303be80d4e85e1b6e62cf8c Mon Sep 17 00:00:00 2001
From: yuanhaorannnnnn <37274406+yuanhaorannnnnn@users.noreply.github.com>
Date: Sun, 11 Mar 2018 23:13:47 +0100
Subject: [PATCH 05/18] Update run.py

---
 yuanhaoran/run.py | 29 +++++++++++++++++++++++++++++
 1 file changed, 29 insertions(+)

diff --git a/yuanhaoran/run.py b/yuanhaoran/run.py
index 8b13789..a08f27c 100644
--- a/yuanhaoran/run.py
+++ b/yuanhaoran/run.py
@@ -1 +1,30 @@
 
+# You're not allowed to change this file
+from ple.games.flappybird import FlappyBird
+from ple import PLE
+import numpy as np
+from FlappyAgent import FlappyPolicy
+
+game = FlappyBird(graphics="fixed") # use "fancy" for full background, random bird color and random pipe color, use "fixed" (default) for black background and constant bird and pipe colors.
+p = PLE(game, fps=30, frame_skip=1, num_steps=1, force_fps=False, display_screen=True)
+# Note: if you want to see you agent act in real time, set force_fps to False. But don't use this setting for learning, just for display purposes.
+
+p.init()
+reward = 0.0
+
+nb_games = 100
+cumulated = np.zeros((nb_games))
+
+for i in range(nb_games):
+    p.reset_game()
+    
+    while(not p.game_over()):
+        state = game.getGameState()
+        screen = p.getScreenRGB()
+        action=FlappyPolicy(state, screen) ### Your job is to define this function.
+        
+        reward = p.act(action)
+        cumulated[i] = cumulated[i] + reward
+
+average_score = np.mean(cumulated)
+max_score = np.max(cumulated)

From 036236fad48465539325c6cdf2b9bd54ae952af1 Mon Sep 17 00:00:00 2001
From: yuanhaorannnnnn <37274406+yuanhaorannnnnn@users.noreply.github.com>
Date: Sun, 11 Mar 2018 23:14:02 +0100
Subject: [PATCH 06/18] Create training.py

---
 yuanhaoran/training.py | 1 +
 1 file changed, 1 insertion(+)
 create mode 100644 yuanhaoran/training.py

diff --git a/yuanhaoran/training.py b/yuanhaoran/training.py
new file mode 100644
index 0000000..8b13789
--- /dev/null
+++ b/yuanhaoran/training.py
@@ -0,0 +1 @@
+

From a05fb5843e9b88de6ffedc591f00be54f65a42cf Mon Sep 17 00:00:00 2001
From: yuanhaorannnnnn <37274406+yuanhaorannnnnn@users.noreply.github.com>
Date: Sun, 11 Mar 2018 23:14:12 +0100
Subject: [PATCH 07/18] Create FlappyAgent.py

---
 yuanhaoran/FlappyAgent.py | 1 +
 1 file changed, 1 insertion(+)
 create mode 100644 yuanhaoran/FlappyAgent.py

diff --git a/yuanhaoran/FlappyAgent.py b/yuanhaoran/FlappyAgent.py
new file mode 100644
index 0000000..8b13789
--- /dev/null
+++ b/yuanhaoran/FlappyAgent.py
@@ -0,0 +1 @@
+

From 8e06886cc93d26360bbe734123dbf1f1b40f1343 Mon Sep 17 00:00:00 2001
From: yuanhaorannnnnn <37274406+yuanhaorannnnnn@users.noreply.github.com>
Date: Sun, 11 Mar 2018 23:14:35 +0100
Subject: [PATCH 08/18] Update FlappyAgent.py

---
 yuanhaoran/FlappyAgent.py | 16 ++++++++++++++++
 1 file changed, 16 insertions(+)

diff --git a/yuanhaoran/FlappyAgent.py b/yuanhaoran/FlappyAgent.py
index 8b13789..3574457 100644
--- a/yuanhaoran/FlappyAgent.py
+++ b/yuanhaoran/FlappyAgent.py
@@ -1 +1,17 @@
 
+import numpy as np
+Q=np.load("trained_Q.npy")
+
+def FlappyPolicy(state, screen):
+
+    # Using "state"
+    y = int(288 + (state['next_pipe_top_y'] + state['next_pipe_bottom_y']) * 0.5 - state['player_y'])
+    x = int(state['next_pipe_dist_to_player'])
+    v = int(state['player_vel'])
+                
+    action=None
+    action = int(np.argmax(Q[y][x][v][:]))
+    if (action == 1): 
+            action = 119
+          
+    return action

From bc6d2cf94274629df823b57c992728b8b84ad034 Mon Sep 17 00:00:00 2001
From: yuanhaorannnnnn <37274406+yuanhaorannnnnn@users.noreply.github.com>
Date: Sun, 11 Mar 2018 23:15:08 +0100
Subject: [PATCH 09/18] Update training.py

---
 RandomBird/training.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/RandomBird/training.py b/RandomBird/training.py
index 56dd84c..dc94b9f 100644
--- a/RandomBird/training.py
+++ b/RandomBird/training.py
@@ -10,7 +10,7 @@
 
 p.init()
 reward = 0.0
-nb_games = 1000
+nb_games = 10000
 cumulated = np.zeros((nb_games))
 
 # parameter of modele

From 1ffcd21d06ef4220ebaa8c810bf462ce773e0ce2 Mon Sep 17 00:00:00 2001
From: yuanhaorannnnnn <37274406+yuanhaorannnnnn@users.noreply.github.com>
Date: Sun, 11 Mar 2018 23:15:19 +0100
Subject: [PATCH 10/18] Update training.py

---
 yuanhaoran/training.py | 80 ++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 80 insertions(+)

diff --git a/yuanhaoran/training.py b/yuanhaoran/training.py
index 8b13789..eeec3f1 100644
--- a/yuanhaoran/training.py
+++ b/yuanhaoran/training.py
@@ -1 +1,81 @@
 
+from ple.games.flappybird import FlappyBird
+from ple import PLE
+import numpy as np
+import matplotlib.pyplot as plt
+
+game = FlappyBird()
+p = PLE(game, fps=30, frame_skip=1, num_steps=1, force_fps=True, display_screen=False)
+
+p.init()
+reward = 0.0
+nb_games = 1000
+cumulated = np.zeros((nb_games))
+
+# parameter of modele
+r_1 = 1
+r_2 = -100
+alpha = 0.04 
+
+x_wall = np.zeros((40))
+y_wall = np.zeros((40))
+v_wall = np.zeros((40))
+a_wall = np.zeros((40))
+#Q(y,x,v,a) ,a is set of action
+Q = np.zeros((512,300, 21, 2))
+## fly if y < 273
+Q[255:511,:,:,0] = 0.1
+Q[0:254,:,:,1] = 0.1
+# between the pipe
+Q[:,8,:,1] = 0.2  # in the middle: jump
+Q[216:256,120:144,:,1] = 0.2  # jump if too low
+Q[256:306,120:144,:,0] = 0.2
+
+for i in range(nb_games):
+    p.reset_game()
+            
+    while(not p.game_over()):
+        state = game.getGameState()
+        screen = p.getScreenRGB()
+        #instead of using absolute position of pipe, use relative position
+        y = int(288 + (state['next_pipe_top_y'] + state['next_pipe_bottom_y']) * 0.5 - state['player_y'])
+        x = int(state['next_pipe_dist_to_player'])
+        v = int(state['player_vel'])
+        
+        #greedy policy
+        action = int(np.argmax(Q[y][x][v][:]))
+        if (action == 1): 
+            action_value = 119 
+        else: action_value=None        
+        if (i>1):
+            for j in range(37-1, 0, -1):
+                x_wall[j] = int(x_wall[j-1])
+                y_wall[j] = int(y_wall[j-1])
+                v_wall[j] = int(v_wall[j-1])
+                a_wall[j] = int(a_wall[j-1])
+            x_wall[0] = int(x)
+            y_wall[0] = int(y)
+            v_wall[0] = int(v)
+            a_wall[0] = int(action)
+       
+        #reward is +1 if bird fly by the pipe
+        reward = p.act(action_value)
+        my_reward=0
+        if (reward==1):
+            my_reward = r_1
+            cumulated[i] += 1
+            for j in range(1, 40):
+                Q[int(y_wall[j]),int(x_wall[j]),int(v_wall[j]),int(a_wall[j])] += alpha * (my_reward + np.max(Q[int(y_wall[j-1]),int(x_wall[j-1]),int(v_wall[j-1]),int(a_wall[j-1])]))
+        
+        # bad result : -100
+        if (reward<0):
+            my_reward = r_2
+            if (x==20):
+                for j in range(0, 27):
+                    Q[int(y_wall[j]),int(x_wall[j]),int(v_wall[j]),int(a_wall[j])] += alpha * (my_reward + np.max(Q[int(y_wall[j-1]),int(x_wall[j-1]),int(v_wall[j-1]),int(a_wall[j-1])]))
+            else:
+               for j in range(0, 6):
+                    Q[int(y_wall[j]),int(x_wall[j]),int(v_wall[j]),int(a_wall[j])] += alpha * (my_reward + np.max(Q[int(y_wall[j-1]),int(x_wall[j-1]),int(v_wall[j-1]),int(a_wall[j-1])]))
+
+np.save('trained_Q', Q)
+    

From b603d99785efe04af5f753cff70845d5dcfb5457 Mon Sep 17 00:00:00 2001
From: yuanhaorannnnnn <37274406+yuanhaorannnnnn@users.noreply.github.com>
Date: Sun, 11 Mar 2018 23:22:30 +0100
Subject: [PATCH 11/18] Create hu'hhuh

---
 RandomBird/hu'hhuh | 1 +
 1 file changed, 1 insertion(+)
 create mode 100644 RandomBird/hu'hhuh

diff --git a/RandomBird/hu'hhuh b/RandomBird/hu'hhuh
new file mode 100644
index 0000000..8b13789
--- /dev/null
+++ b/RandomBird/hu'hhuh
@@ -0,0 +1 @@
+

From a7da7d52a87f40023f4f76eb4574b5e1e3b9df3c Mon Sep 17 00:00:00 2001
From: yuanhaorannnnnn <37274406+yuanhaorannnnnn@users.noreply.github.com>
Date: Sun, 11 Mar 2018 23:22:45 +0100
Subject: [PATCH 12/18] Delete hu'hhuh

---
 RandomBird/hu'hhuh | 1 -
 1 file changed, 1 deletion(-)
 delete mode 100644 RandomBird/hu'hhuh

diff --git a/RandomBird/hu'hhuh b/RandomBird/hu'hhuh
deleted file mode 100644
index 8b13789..0000000
--- a/RandomBird/hu'hhuh
+++ /dev/null
@@ -1 +0,0 @@
-

From b2320b80af7781aca8aee4e8b6b661b45e6d4725 Mon Sep 17 00:00:00 2001
From: yuanhaorannnnnn <37274406+yuanhaorannnnnn@users.noreply.github.com>
Date: Sun, 11 Mar 2018 23:22:58 +0100
Subject: [PATCH 13/18] Delete FlappyAgent.py

---
 RandomBird/FlappyAgent.py | 17 -----------------
 1 file changed, 17 deletions(-)
 delete mode 100644 RandomBird/FlappyAgent.py

diff --git a/RandomBird/FlappyAgent.py b/RandomBird/FlappyAgent.py
deleted file mode 100644
index 00fb802..0000000
--- a/RandomBird/FlappyAgent.py
+++ /dev/null
@@ -1,17 +0,0 @@
-import numpy as np
-Q=np.load("trained_Q.npy")
-
-def FlappyPolicy(state, screen):
-
-    # Using "state"
-    y = int(288 + (state['next_pipe_top_y'] + state['next_pipe_bottom_y']) * 0.5 - state['player_y'])
-    x = int(state['next_pipe_dist_to_player'])
-    v = int(state['player_vel'])
-                
-    action=None
-    action = int(np.argmax(Q[y][x][v][:]))
-    if (action == 1): 
-            action = 119
-          
-    return action
-

From 6deb77c56530b12c0104ae02299ff01a67067d1d Mon Sep 17 00:00:00 2001
From: yuanhaorannnnnn <37274406+yuanhaorannnnnn@users.noreply.github.com>
Date: Sun, 11 Mar 2018 23:23:05 +0100
Subject: [PATCH 14/18] Delete run.py

---
 RandomBird/run.py | 29 -----------------------------
 1 file changed, 29 deletions(-)
 delete mode 100644 RandomBird/run.py

diff --git a/RandomBird/run.py b/RandomBird/run.py
deleted file mode 100644
index 39b5801..0000000
--- a/RandomBird/run.py
+++ /dev/null
@@ -1,29 +0,0 @@
-# You're not allowed to change this file
-from ple.games.flappybird import FlappyBird
-from ple import PLE
-import numpy as np
-from FlappyAgent import FlappyPolicy
-
-game = FlappyBird(graphics="fixed") # use "fancy" for full background, random bird color and random pipe color, use "fixed" (default) for black background and constant bird and pipe colors.
-p = PLE(game, fps=30, frame_skip=1, num_steps=1, force_fps=False, display_screen=True)
-# Note: if you want to see you agent act in real time, set force_fps to False. But don't use this setting for learning, just for display purposes.
-
-p.init()
-reward = 0.0
-
-nb_games = 100
-cumulated = np.zeros((nb_games))
-
-for i in range(nb_games):
-    p.reset_game()
-    
-    while(not p.game_over()):
-        state = game.getGameState()
-        screen = p.getScreenRGB()
-        action=FlappyPolicy(state, screen) ### Your job is to define this function.
-        
-        reward = p.act(action)
-        cumulated[i] = cumulated[i] + reward
-
-average_score = np.mean(cumulated)
-max_score = np.max(cumulated)

From 2387f695919be1edf1099f0a1fdcefc0e0e378df Mon Sep 17 00:00:00 2001
From: yuanhaorannnnnn <37274406+yuanhaorannnnnn@users.noreply.github.com>
Date: Sun, 11 Mar 2018 23:23:13 +0100
Subject: [PATCH 15/18] Delete training.py

---
 RandomBird/training.py | 83 ------------------------------------------
 1 file changed, 83 deletions(-)
 delete mode 100644 RandomBird/training.py

diff --git a/RandomBird/training.py b/RandomBird/training.py
deleted file mode 100644
index dc94b9f..0000000
--- a/RandomBird/training.py
+++ /dev/null
@@ -1,83 +0,0 @@
-
-
-from ple.games.flappybird import FlappyBird
-from ple import PLE
-import numpy as np
-import matplotlib.pyplot as plt
-
-game = FlappyBird()
-p = PLE(game, fps=30, frame_skip=1, num_steps=1, force_fps=True, display_screen=False)
-
-p.init()
-reward = 0.0
-nb_games = 10000
-cumulated = np.zeros((nb_games))
-
-# parameter of modele
-r_1 = 1
-r_2 = -100
-alpha = 0.04 
-
-x_wall = np.zeros((40))
-y_wall = np.zeros((40))
-v_wall = np.zeros((40))
-a_wall = np.zeros((40))
-#Q(y,x,v,a) ,a is set of action
-Q = np.zeros((512,300, 21, 2))
-## fly if y < 273
-Q[255:511,:,:,0] = 0.1
-Q[0:254,:,:,1] = 0.1
-# between the pipe
-Q[:,8,:,1] = 0.2  # in the middle: jump
-Q[216:256,120:144,:,1] = 0.2  # jump if too low
-Q[256:306,120:144,:,0] = 0.2
-
-for i in range(nb_games):
-    p.reset_game()
-            
-    while(not p.game_over()):
-        state = game.getGameState()
-        screen = p.getScreenRGB()
-        #instead of using absolute position of pipe, use relative position
-        y = int(288 + (state['next_pipe_top_y'] + state['next_pipe_bottom_y']) * 0.5 - state['player_y'])
-        x = int(state['next_pipe_dist_to_player'])
-        v = int(state['player_vel'])
-        
-        #greedy policy
-        action = int(np.argmax(Q[y][x][v][:]))
-        if (action == 1): 
-            action_value = 119 
-        else: action_value=None        
-        if (i>1):
-            for j in range(37-1, 0, -1):
-                x_wall[j] = int(x_wall[j-1])
-                y_wall[j] = int(y_wall[j-1])
-                v_wall[j] = int(v_wall[j-1])
-                a_wall[j] = int(a_wall[j-1])
-            x_wall[0] = int(x)
-            y_wall[0] = int(y)
-            v_wall[0] = int(v)
-            a_wall[0] = int(action)
-       
-        #reward is +1 if bird fly by the pipe
-        reward = p.act(action_value)
-        my_reward=0
-        if (reward==1):
-            my_reward = r_1
-            cumulated[i] += 1
-            for j in range(1, 40):
-                Q[int(y_wall[j]),int(x_wall[j]),int(v_wall[j]),int(a_wall[j])] += alpha * (my_reward + np.max(Q[int(y_wall[j-1]),int(x_wall[j-1]),int(v_wall[j-1]),int(a_wall[j-1])]))
-        
-        # bad result : -100
-        if (reward<0):
-            my_reward = r_2
-            if (x==20):
-                for j in range(0, 27):
-                    Q[int(y_wall[j]),int(x_wall[j]),int(v_wall[j]),int(a_wall[j])] += alpha * (my_reward + np.max(Q[int(y_wall[j-1]),int(x_wall[j-1]),int(v_wall[j-1]),int(a_wall[j-1])]))
-            else:
-               for j in range(0, 6):
-                    Q[int(y_wall[j]),int(x_wall[j]),int(v_wall[j]),int(a_wall[j])] += alpha * (my_reward + np.max(Q[int(y_wall[j-1]),int(x_wall[j-1]),int(v_wall[j-1]),int(a_wall[j-1])]))
-
-np.save('trained_Q', Q)
-    
-

From d1363212ca58be8e28ad9117b06f1b70a64a2daf Mon Sep 17 00:00:00 2001
From: yuanhaorannnnnn <37274406+yuanhaorannnnnn@users.noreply.github.com>
Date: Sun, 11 Mar 2018 23:23:42 +0100
Subject: [PATCH 16/18] Update FlappyAgent.py

---
 yuanhaoran/FlappyAgent.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/yuanhaoran/FlappyAgent.py b/yuanhaoran/FlappyAgent.py
index 3574457..bc327e6 100644
--- a/yuanhaoran/FlappyAgent.py
+++ b/yuanhaoran/FlappyAgent.py
@@ -5,7 +5,7 @@
 def FlappyPolicy(state, screen):
 
     # Using "state"
-    y = int(288 + (state['next_pipe_top_y'] + state['next_pipe_bottom_y']) * 0.5 - state['player_y'])
+    y = int(256 + (state['next_pipe_top_y'] + state['next_pipe_bottom_y']) * 0.5 - state['player_y'])
     x = int(state['next_pipe_dist_to_player'])
     v = int(state['player_vel'])
                 

From 0a93a61ae4800db10efd6afae47edb810d91d14e Mon Sep 17 00:00:00 2001
From: yuanhaorannnnnn <37274406+yuanhaorannnnnn@users.noreply.github.com>
Date: Sun, 11 Mar 2018 23:24:44 +0100
Subject: [PATCH 17/18] Create README.md

---
 yuanhaoran/README.md | 1 +
 1 file changed, 1 insertion(+)
 create mode 100644 yuanhaoran/README.md

diff --git a/yuanhaoran/README.md b/yuanhaoran/README.md
new file mode 100644
index 0000000..8b13789
--- /dev/null
+++ b/yuanhaoran/README.md
@@ -0,0 +1 @@
+

From 27f72127d335a32c60a65fde9175b435d03c1c28 Mon Sep 17 00:00:00 2001
From: yuanhaorannnnnn <37274406+yuanhaorannnnnn@users.noreply.github.com>
Date: Sun, 11 Mar 2018 23:34:28 +0100
Subject: [PATCH 18/18] Update README.md

---
 yuanhaoran/README.md | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/yuanhaoran/README.md b/yuanhaoran/README.md
index 8b13789..7d70fd3 100644
--- a/yuanhaoran/README.md
+++ b/yuanhaoran/README.md
@@ -1 +1,3 @@
 
+This is the implementation of the game FlappyBird with Q-learning
+At first, you should install the PLE framework, and then run the training.py to get the trained data and finally run the run.py to play the game.