Skip to content

Commit d2687f9

Browse files
Add Monte Carlo Prediction demo for Chapter 5 (two-state MDP example)
1 parent 7b59352 commit d2687f9

File tree

2 files changed

+56
-0
lines changed

2 files changed

+56
-0
lines changed

ch5_monte_carlo/__init__.py

Whitespace-only changes.
Lines changed: 56 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,56 @@
1+
# ch5_monte_carlo/examples/mc_prediction_demo.py
2+
3+
import numpy as np
4+
from collections import defaultdict
5+
6+
def generate_episode(p=0.5):
7+
"""Generate one episode in the two-state MDP.
8+
Returns a list of (state, reward)."""
9+
episode = []
10+
state = "A"
11+
while state == "A":
12+
if np.random.rand() < p:
13+
episode.append(("A", 0)) # self-loop in A
14+
state = "A"
15+
else:
16+
episode.append(("A", 0)) # A -> B
17+
state = "B"
18+
# B -> Terminal with +1 reward
19+
episode.append(("B", 1))
20+
return episode
21+
22+
def mc_prediction(episodes=5000, p=0.5, gamma=0.9, first_visit=True):
23+
"""Monte Carlo prediction for the two-state MDP."""
24+
returns = defaultdict(list)
25+
V = defaultdict(float)
26+
27+
for _ in range(episodes):
28+
episode = generate_episode(p)
29+
G, visited = 0, set()
30+
# process backward
31+
for t in reversed(range(len(episode))):
32+
s, r = episode[t]
33+
G = gamma * G + r
34+
if first_visit:
35+
if s not in visited:
36+
returns[s].append(G)
37+
V[s] = np.mean(returns[s])
38+
visited.add(s)
39+
else: # every-visit
40+
returns[s].append(G)
41+
V[s] = np.mean(returns[s])
42+
return V
43+
44+
if __name__ == "__main__":
45+
np.random.seed(42)
46+
V_fv = mc_prediction(episodes=5000, first_visit=True)
47+
V_ev = mc_prediction(episodes=5000, first_visit=False)
48+
49+
# Analytic values
50+
gamma, p = 0.9, 0.5
51+
vA_true = (gamma**2 * (1 - p)) / (1 - p * gamma)
52+
vB_true = gamma
53+
54+
print("First-visit MC:", dict(V_fv))
55+
print("Every-visit MC:", dict(V_ev))
56+
print(f"True values: A={vA_true:.5f}, B={vB_true:.5f}")

0 commit comments

Comments
 (0)