File tree Expand file tree Collapse file tree
Expand file tree Collapse file tree Original file line number Diff line number Diff line change @@ -55,14 +55,19 @@ def _build_PR(self):
5555 # R already has step_reward by default.
5656 return P , R
5757
58- # -------- environment API (used by ch5 as well) --------
58+ # --- public API used by ch5 (MC) ---
59+
5960 def is_terminal (self , s ):
60- return tuple (s ) == self .goal
61+ """Return True iff state s is the goal (works with tuple or index)."""
62+ if isinstance (s , tuple ):
63+ return s == self .goal
64+ # s given as index
65+ return self .i2s [int (s )] == self .goal
6166
6267 def step (self , s , a ):
63- """Given state (tuple or index) and action index -> (next_state_tuple, reward)."""
68+ """Take action a in state s (tuple or index). Returns (next_state_tuple, reward)."""
6469 s_idx = self .s2i [s ] if isinstance (s , tuple ) else int (s )
6570 probs = self .P [s_idx , a ]
66- sp_idx = int (np .argmax (probs )) # deterministic
71+ sp_idx = int (np .argmax (probs )) # deterministic env
6772 r = float (self .R [s_idx , a , sp_idx ])
6873 return self .i2s [sp_idx ], r
You can’t perform that action at this time.
0 commit comments