-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathplayer.py
More file actions
86 lines (77 loc) · 2.51 KB
/
player.py
File metadata and controls
86 lines (77 loc) · 2.51 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
import random
#from game import ball_state
class SoccerAgent:
def __init__(self, playerNum):
self.Qvalues = dict([])
self.epsilon = 0.01
self.alpha = 0.4
self.episodeRewards = 0.0
self.discount = 1.0
# 1 or 2
self.id = playerNum
def getLegalActions(self, state):
if state[0]:
return ["pass", "shoot", "hold"]
return ["wait"]
def getQValue(self, state, action):
# not yet defined...
if state not in self.Qvalues.keys():
self.Qvalues[state] = dict([])
actions = ["pass", "shoot", "hold", "wait"]
# non-terminal check
if actions:
# initialize all Q(s, a) for a given s
for a in actions:
self.Qvalues[state][a] = 0.0
return 0.0
# exists, return Q(s, a)
return self.Qvalues[state][action]
def computeValueFromQValues(self, state):
actions = self.getLegalActions(state)
# terminal state
if not actions:
return 0.0
# find max Q(s, a)
return max([self.getQValue(state, a) for a in actions])
def computeActionFromQValues(self, state):
actions = self.getLegalActions(state)
# terminal state
if not actions:
return None
# find best action
action = (actions[0], self.getQValue(state, actions[0]))
for a in actions:
q = self.getQValue(state, a)
# update
if q > action[1]:
action = (a, q)
# random tie-breaker
elif q == action[1]:
action = random.choice(( (a, q), action ))
return action[0]
def getAction(self, state):
# Pick Action
legalActions = self.getLegalActions(state)
action = None
# terminal state
if not legalActions:
return None
# random action or not?
if random.random() < self.epsilon:
# random
action = random.choice(legalActions)
else:
# choose best policy
action = self.computeActionFromQValues(state)
return action
def update(self, state, action, nextState, reward):
# find q = argmax(Q(s', a))
q = self.computeValueFromQValues(nextState)
Q = (1 - self.alpha)*self.getQValue(state,action) + self.alpha*(reward + self.discount*q)
# assign to Q(s, a)
self.Qvalues[state][action] = Q
def observeTransition(self, state, action, nextState, delta):
self.episodeRewards += delta
self.update(state, action, nextState, delta)
def reset(self):
self.Qvalues = dict([])