Skip to content
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
78 changes: 52 additions & 26 deletions dqn/train.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,12 +9,19 @@
from copy import deepcopy
from minecraft_env import env
from memory import Memory
import marlo
import gym

def stop_loop(event):
event.stop()

if __name__=="__main__":
env = env.MinecraftEnv()
env.init(allowDiscreteMovement=None,
videoResolution=[800, 600])
env = gym.make('MinecraftEating1-v0')
env.init(allowDiscreteMovement=["move", "turn"],
videoResolution=[800, 600],)
# env = env.MinecraftEnv()
# env.init(allowDiscreteMovement=None,
# videoResolution=[800, 600])
env.seed(500)
torch.manual_seed(500)
render_map = False
Expand All @@ -39,10 +46,11 @@
if render_map:
root, canvas = init_map()


steps = 0
scores = []
epsilon = 1.0
max_score = hp.initial_max_score

for episode in range(hp.num_episodes):
state = env.reset()
state = pre_process(state)
Expand All @@ -58,56 +66,74 @@

score = 0
prev_life = 20
num_train = 0
while True:

env.render(mode='rgb_array')
steps += 1

# mu, std, _ = actor(torch.Tensor(history).unsqueeze(0))
# action = get_action(mu, std)[0]
qvalue = model(to_tensor(history).unsqueeze(0))
action = get_action(epsilon, qvalue, num_actions)
next_state, reward, done, info = env.step(action)
# print(reward, info)

observation = info['observation']
if observation is not None:
life = observation['entities'][0]['life']
if life < prev_life:
reward = reward + (life - prev_life)
# observation = info['observation']
# if observation is not None:
# life = observation['entities'][0]['life']
# if life < prev_life:
# reward = reward + (life - prev_life)

next_state = pre_process(next_state)
next_state = np.reshape(next_state, (84, 84, 1))
next_history = np.append(next_state, history[:, :, :3], axis=2)
reward *= 0.1
reward += 0.1

next_history = np.append(next_state, history[:, :, :3],
axis=2)
if done:
mask = 0
reward = 0
# reward = 0
else:
mask = 1


memory.push(history, next_history, action, reward, mask)

score += reward
history = deepcopy(next_history)

if steps > hp.initial_exploration:
epsilon -= 0.00001
batch = memory.sample()
train_model(model, target_model, batch, optimizer)
num_train += 1
# epsilon -= 0.00001
# batch = memory.sample()


# print("----loop start----")
# loop = asyncio.new_event_loop()
# asyncio.set_event_loop(loop)
# task = loop.create_task(train_model(model, target_model, batch, optimizer))
# task.add_done_callback(stop_loop(loop))
#
# loop.run_forever()
#
# print("loop end")

if steps % hp.update_target:
update_target_model(model, target_model)

if done:
print('episode: ', episode, 'steps: ', steps, 'epsilon: ', round(epsilon, 4),
' score: ', score)
for i in range(num_train):
epsilon -= 0.00001
batch = memory.sample()
train_model(model, target_model, batch, optimizer)
print('episode: ', episode, 'steps: ', steps, 'epsilon: ', round(epsilon, 4),
' score: ', score, 'num_trained: ',num_train)
# score_history.append(score)
break


if episode % hp.save_freq:
score = int(score)
directory = 'save_model/'
if not os.path.exists(directory):
os.makedirs(directory)
save_directory = 'save_model/'
if episode % hp.save_freq:
score = int(score)
directory = 'save_model/'
if not os.path.exists(save_directory):
os.makedirs(save_directory)
torch.save(model.state_dict(), 'save_model/' + str(score) +
'model.pt')