Skip to content

Commit 0641f6b

Browse files
Add Chapter 10: Function Approximation (code, tests, workflow)
1 parent 0ee153b commit 0641f6b

19 files changed

Lines changed: 341 additions & 0 deletions

.github/workflows/ch10.yml

Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,38 @@
1+
name: ch10 — Function Approximation
2+
3+
on:
4+
push:
5+
paths:
6+
- 'ch10_function_approx/**'
7+
- '.github/workflows/ch10.yml'
8+
pull_request:
9+
paths:
10+
- 'ch10_function_approx/**'
11+
- '.github/workflows/ch10.yml'
12+
13+
jobs:
14+
test:
15+
runs-on: ubuntu-latest
16+
strategy:
17+
matrix:
18+
python-version: ['3.9', '3.10', '3.11']
19+
steps:
20+
- name: Checkout repository
21+
uses: actions/checkout@v4
22+
23+
- name: Set up Python
24+
uses: actions/setup-python@v5
25+
with:
26+
python-version: ${{ matrix.python-version }}
27+
cache: 'pip'
28+
29+
- name: Install dependencies
30+
run: |
31+
python -m pip install -U pip
32+
pip install -r ch10_function_approx/requirements.txt
33+
34+
- name: Run Chapter 10 tests
35+
env:
36+
PYTHONPATH: .
37+
run: |
38+
pytest -q ch10_function_approx/tests
Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
# Chapter 10 — Function Approximation Basics
2+
3+
Usage:
4+
```bash
5+
pip install -r ch10_function_approx/requirements.txt
6+
pytest ch10_function_approx/tests
7+
python -m ch10_function_approx.scripts.run_mc_demo
8+
```

ch10_function_approx/__init__.py

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
# Chapter 10 — Function Approximation Basics
2+
from .features.tile_coding import TileCoder, ActionBlockTileCoder
3+
from .agents.linear_sarsa import LinearSarsaAgent
4+
from .agents.linear_td0 import LinearTD0
5+
from .envs.mountain_car import MountainCar
6+
from .utils.policies import epsilon_greedy
7+
8+
__all__ = [
9+
"TileCoder",
10+
"ActionBlockTileCoder",
11+
"LinearSarsaAgent",
12+
"LinearTD0",
13+
"MountainCar",
14+
"epsilon_greedy",
15+
]
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
# package
Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,35 @@
1+
from __future__ import annotations
2+
import numpy as np
3+
from dataclasses import dataclass
4+
from ..utils.policies import epsilon_greedy
5+
6+
@dataclass
7+
class LinearSarsaAgent:
8+
d: int
9+
nA: int
10+
gamma: float = 1.0
11+
alpha: float = 0.5
12+
eps: float = 0.05
13+
seed: int | None = None
14+
15+
def __post_init__(self):
16+
self.w = np.zeros(self.d, dtype=float)
17+
self.rng = np.random.default_rng(self.seed)
18+
19+
def q_row(self, phi_fn, s_vec) -> np.ndarray:
20+
vals = np.zeros(self.nA, dtype=float)
21+
for a in range(self.nA):
22+
vals[a] = self.w @ phi_fn(s_vec, a)
23+
return vals
24+
25+
def act(self, phi_fn, s_vec) -> int:
26+
q = self.q_row(phi_fn, s_vec)
27+
return epsilon_greedy(q, self.eps, self.rng)
28+
29+
def step(self, phi_fn, s_vec, a, r, ns_vec, na):
30+
phi_sa = phi_fn(s_vec, a)
31+
phi_ns_na = phi_fn(ns_vec, na)
32+
td_target = r + self.gamma * (self.w @ phi_ns_na)
33+
td_err = td_target - (self.w @ phi_sa)
34+
self.w += self.alpha * td_err * phi_sa
35+
return td_err
Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,22 @@
1+
from __future__ import annotations
2+
import numpy as np
3+
from dataclasses import dataclass
4+
5+
@dataclass
6+
class LinearTD0:
7+
d: int
8+
gamma: float = 0.99
9+
alpha: float = 0.1
10+
seed: int | None = None
11+
12+
def __post_init__(self):
13+
self.w = np.zeros(self.d, dtype=float)
14+
self.rng = np.random.default_rng(self.seed)
15+
16+
def predict(self, phi_s: np.ndarray) -> float:
17+
return float(self.w @ phi_s)
18+
19+
def update(self, phi_s: np.ndarray, r: float, phi_ns: np.ndarray):
20+
delta = r + self.gamma * (self.w @ phi_ns) - (self.w @ phi_s)
21+
self.w += self.alpha * delta * phi_s
22+
return delta
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
# package
Lines changed: 50 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,50 @@
1+
from __future__ import annotations
2+
from dataclasses import dataclass
3+
import numpy as np
4+
5+
@dataclass
6+
class MountainCarConfig:
7+
x_min: float = -1.2
8+
x_max: float = 0.6
9+
v_min: float = -0.07
10+
v_max: float = 0.07
11+
goal_x: float = 0.5
12+
gamma: float = 1.0
13+
max_steps: int = 2000
14+
15+
class MountainCar:
16+
LEFT, NEUTRAL, RIGHT = 0, 1, 2
17+
18+
def __init__(self, cfg: MountainCarConfig = MountainCarConfig()):
19+
self.cfg = cfg
20+
self.rng = np.random.default_rng()
21+
self.reset()
22+
23+
@property
24+
def nA(self): return 3
25+
26+
def reset(self, seed: int | None = None):
27+
if seed is not None:
28+
self.rng = np.random.default_rng(seed)
29+
self.x = self.rng.uniform(-0.6, -0.4)
30+
self.v = 0.0
31+
self.t = 0
32+
return np.array([self.x, self.v], dtype=float)
33+
34+
def step(self, a: int):
35+
assert 0 <= a < self.nA
36+
force = {self.LEFT: -1.0, self.NEUTRAL: 0.0, self.RIGHT: +1.0}[a]
37+
v = self.v + 0.001 * force - 0.0025 * np.cos(3 * self.x)
38+
v = np.clip(v, self.cfg.v_min, self.cfg.v_max)
39+
x = self.x + v
40+
if x < self.cfg.x_min:
41+
x = self.cfg.x_min
42+
v = 0.0
43+
self.x, self.v = x, v
44+
self.t += 1
45+
done = (self.x >= self.cfg.goal_x) or (self.t >= self.cfg.max_steps)
46+
reward = 0.0 if (self.x >= self.cfg.goal_x) else -1.0
47+
return np.array([self.x, self.v], dtype=float), reward, done, {}
48+
49+
def state(self):
50+
return np.array([self.x, self.v], dtype=float)
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
# package
Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,40 @@
1+
from __future__ import annotations
2+
import numpy as np
3+
from ..envs.mountain_car import MountainCar, MountainCarConfig
4+
from ..features.tile_coding import TileCoder, ActionBlockTileCoder
5+
from ..agents.linear_sarsa import LinearSarsaAgent
6+
7+
def make_tilecoder(n_tilings=8, bins=(8,8)):
8+
lows = np.array([-1.2, -0.07], dtype=float)
9+
highs = np.array([0.6, 0.07], dtype=float)
10+
offsets = []
11+
rng = np.random.default_rng(0)
12+
for t in range(n_tilings):
13+
offsets.append(rng.random(2) * 0.999)
14+
tc = TileCoder(lows=lows, highs=highs, bins_per_dim=bins, n_tilings=n_tilings, offsets=offsets)
15+
return tc
16+
17+
def run(episodes=50, seed=0, n_tilings=8):
18+
env = MountainCar(MountainCarConfig())
19+
tc = make_tilecoder(n_tilings=n_tilings, bins=(8,8))
20+
atc = ActionBlockTileCoder(tc, n_actions=env.nA)
21+
agent = LinearSarsaAgent(d=atc.d, nA=env.nA, gamma=1.0, alpha=0.5/n_tilings, eps=0.05, seed=seed)
22+
23+
steps_per_ep = []
24+
for ep in range(episodes):
25+
s = env.reset(seed + ep)
26+
a = agent.act(atc.phi, s)
27+
steps = 0
28+
while True:
29+
ns, r, done, _ = env.step(a)
30+
na = agent.act(atc.phi, ns)
31+
agent.step(atc.phi, s, a, r, ns, na)
32+
s, a = ns, na
33+
steps += 1
34+
if done: break
35+
steps_per_ep.append(steps)
36+
return np.array(steps_per_ep), agent.w
37+
38+
if __name__ == "__main__":
39+
steps, w = run(episodes=20, seed=123, n_tilings=8)
40+
print("Steps per episode:", steps)

0 commit comments

Comments
 (0)