Skip to content

Commit 104f306

Browse files
ch4: add is_terminal() and step() API for MC; keep terminal self-loop reward at 0
1 parent e1cf15f commit 104f306

1 file changed

Lines changed: 9 additions & 4 deletions

File tree

ch4_dynamic_programming/gridworld.py

Lines changed: 9 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -55,14 +55,19 @@ def _build_PR(self):
5555
# R already has step_reward by default.
5656
return P, R
5757

58-
# -------- environment API (used by ch5 as well) --------
58+
# --- public API used by ch5 (MC) ---
59+
5960
def is_terminal(self, s):
60-
return tuple(s) == self.goal
61+
"""Return True iff state s is the goal (works with tuple or index)."""
62+
if isinstance(s, tuple):
63+
return s == self.goal
64+
# s given as index
65+
return self.i2s[int(s)] == self.goal
6166

6267
def step(self, s, a):
63-
"""Given state (tuple or index) and action index -> (next_state_tuple, reward)."""
68+
"""Take action a in state s (tuple or index). Returns (next_state_tuple, reward)."""
6469
s_idx = self.s2i[s] if isinstance(s, tuple) else int(s)
6570
probs = self.P[s_idx, a]
66-
sp_idx = int(np.argmax(probs)) # deterministic
71+
sp_idx = int(np.argmax(probs)) # deterministic env
6772
r = float(self.R[s_idx, a, sp_idx])
6873
return self.i2s[sp_idx], r

0 commit comments

Comments
 (0)