Add Chapter 10: Function Approximation (code, tests, workflow)

srikanthbaride · srikanthbaride · commit 0641f6b1fbd0 · 2025-09-12T10:23:30.000-05:00
diff --git a/.github/workflows/ch10.yml b/.github/workflows/ch10.yml
@@ -0,0 +1,38 @@
+name: ch10 — Function Approximation
+
+on:
+  push:
+    paths:
+      - 'ch10_function_approx/**'
+      - '.github/workflows/ch10.yml'
+  pull_request:
+    paths:
+      - 'ch10_function_approx/**'
+      - '.github/workflows/ch10.yml'
+
+jobs:
+  test:
+    runs-on: ubuntu-latest
+    strategy:
+      matrix:
+        python-version: ['3.9', '3.10', '3.11']
+    steps:
+      - name: Checkout repository
+        uses: actions/checkout@v4
+
+      - name: Set up Python
+        uses: actions/setup-python@v5
+        with:
+          python-version: ${{ matrix.python-version }}
+          cache: 'pip'
+
+      - name: Install dependencies
+        run: |
+          python -m pip install -U pip
+          pip install -r ch10_function_approx/requirements.txt
+
+      - name: Run Chapter 10 tests
+        env:
+          PYTHONPATH: .
+        run: |
+          pytest -q ch10_function_approx/tests
diff --git a/ch10_function_approx/README_ch10.md b/ch10_function_approx/README_ch10.md
@@ -0,0 +1,8 @@
+# Chapter 10 — Function Approximation Basics
+
+Usage:
+```bash
+pip install -r ch10_function_approx/requirements.txt
+pytest ch10_function_approx/tests
+python -m ch10_function_approx.scripts.run_mc_demo
+```
diff --git a/ch10_function_approx/__init__.py b/ch10_function_approx/__init__.py
@@ -0,0 +1,15 @@
+# Chapter 10 — Function Approximation Basics
+from .features.tile_coding import TileCoder, ActionBlockTileCoder
+from .agents.linear_sarsa import LinearSarsaAgent
+from .agents.linear_td0 import LinearTD0
+from .envs.mountain_car import MountainCar
+from .utils.policies import epsilon_greedy
+
+__all__ = [
+    "TileCoder",
+    "ActionBlockTileCoder",
+    "LinearSarsaAgent",
+    "LinearTD0",
+    "MountainCar",
+    "epsilon_greedy",
+]
diff --git a/ch10_function_approx/agents/__init__.py b/ch10_function_approx/agents/__init__.py
@@ -0,0 +1 @@
+# package
diff --git a/ch10_function_approx/agents/linear_sarsa.py b/ch10_function_approx/agents/linear_sarsa.py
@@ -0,0 +1,35 @@
+from __future__ import annotations
+import numpy as np
+from dataclasses import dataclass
+from ..utils.policies import epsilon_greedy
+
+@dataclass
+class LinearSarsaAgent:
+    d: int
+    nA: int
+    gamma: float = 1.0
+    alpha: float = 0.5
+    eps: float = 0.05
+    seed: int | None = None
+
+    def __post_init__(self):
+        self.w = np.zeros(self.d, dtype=float)
+        self.rng = np.random.default_rng(self.seed)
+
+    def q_row(self, phi_fn, s_vec) -> np.ndarray:
+        vals = np.zeros(self.nA, dtype=float)
+        for a in range(self.nA):
+            vals[a] = self.w @ phi_fn(s_vec, a)
+        return vals
+
+    def act(self, phi_fn, s_vec) -> int:
+        q = self.q_row(phi_fn, s_vec)
+        return epsilon_greedy(q, self.eps, self.rng)
+
+    def step(self, phi_fn, s_vec, a, r, ns_vec, na):
+        phi_sa = phi_fn(s_vec, a)
+        phi_ns_na = phi_fn(ns_vec, na)
+        td_target = r + self.gamma * (self.w @ phi_ns_na)
+        td_err = td_target - (self.w @ phi_sa)
+        self.w += self.alpha * td_err * phi_sa
+        return td_err
diff --git a/ch10_function_approx/agents/linear_td0.py b/ch10_function_approx/agents/linear_td0.py
@@ -0,0 +1,22 @@
+from __future__ import annotations
+import numpy as np
+from dataclasses import dataclass
+
+@dataclass
+class LinearTD0:
+    d: int
+    gamma: float = 0.99
+    alpha: float = 0.1
+    seed: int | None = None
+
+    def __post_init__(self):
+        self.w = np.zeros(self.d, dtype=float)
+        self.rng = np.random.default_rng(self.seed)
+
+    def predict(self, phi_s: np.ndarray) -> float:
+        return float(self.w @ phi_s)
+
+    def update(self, phi_s: np.ndarray, r: float, phi_ns: np.ndarray):
+        delta = r + self.gamma * (self.w @ phi_ns) - (self.w @ phi_s)
+        self.w += self.alpha * delta * phi_s
+        return delta
diff --git a/ch10_function_approx/envs/__init__.py b/ch10_function_approx/envs/__init__.py
@@ -0,0 +1 @@
+# package
diff --git a/ch10_function_approx/envs/mountain_car.py b/ch10_function_approx/envs/mountain_car.py
@@ -0,0 +1,50 @@
+from __future__ import annotations
+from dataclasses import dataclass
+import numpy as np
+
+@dataclass
+class MountainCarConfig:
+    x_min: float = -1.2
+    x_max: float = 0.6
+    v_min: float = -0.07
+    v_max: float = 0.07
+    goal_x: float = 0.5
+    gamma: float = 1.0
+    max_steps: int = 2000
+
+class MountainCar:
+    LEFT, NEUTRAL, RIGHT = 0, 1, 2
+
+    def __init__(self, cfg: MountainCarConfig = MountainCarConfig()):
+        self.cfg = cfg
+        self.rng = np.random.default_rng()
+        self.reset()
+
+    @property
+    def nA(self): return 3
+
+    def reset(self, seed: int | None = None):
+        if seed is not None:
+            self.rng = np.random.default_rng(seed)
+        self.x = self.rng.uniform(-0.6, -0.4)
+        self.v = 0.0
+        self.t = 0
+        return np.array([self.x, self.v], dtype=float)
+
+    def step(self, a: int):
+        assert 0 <= a < self.nA
+        force = {self.LEFT: -1.0, self.NEUTRAL: 0.0, self.RIGHT: +1.0}[a]
+        v = self.v + 0.001 * force - 0.0025 * np.cos(3 * self.x)
+        v = np.clip(v, self.cfg.v_min, self.cfg.v_max)
+        x = self.x + v
+        if x < self.cfg.x_min:
+            x = self.cfg.x_min
+            v = 0.0
+        self.x, self.v = x, v
+        self.t += 1
+        done = (self.x >= self.cfg.goal_x) or (self.t >= self.cfg.max_steps)
+        reward = 0.0 if (self.x >= self.cfg.goal_x) else -1.0
+        return np.array([self.x, self.v], dtype=float), reward, done, {}
+
+    def state(self):
+        return np.array([self.x, self.v], dtype=float)
diff --git a/ch10_function_approx/examples/__init__.py b/ch10_function_approx/examples/__init__.py
@@ -0,0 +1 @@
+# package
diff --git a/ch10_function_approx/examples/mountain_car_linear.py b/ch10_function_approx/examples/mountain_car_linear.py
@@ -0,0 +1,40 @@
+from __future__ import annotations
+import numpy as np
+from ..envs.mountain_car import MountainCar, MountainCarConfig
+from ..features.tile_coding import TileCoder, ActionBlockTileCoder
+from ..agents.linear_sarsa import LinearSarsaAgent
+
+def make_tilecoder(n_tilings=8, bins=(8,8)):
+    lows = np.array([-1.2, -0.07], dtype=float)
+    highs = np.array([0.6, 0.07], dtype=float)
+    offsets = []
+    rng = np.random.default_rng(0)
+    for t in range(n_tilings):
+        offsets.append(rng.random(2) * 0.999)
+    tc = TileCoder(lows=lows, highs=highs, bins_per_dim=bins, n_tilings=n_tilings, offsets=offsets)
+    return tc
+
+def run(episodes=50, seed=0, n_tilings=8):
+    env = MountainCar(MountainCarConfig())
+    tc = make_tilecoder(n_tilings=n_tilings, bins=(8,8))
+    atc = ActionBlockTileCoder(tc, n_actions=env.nA)
+    agent = LinearSarsaAgent(d=atc.d, nA=env.nA, gamma=1.0, alpha=0.5/n_tilings, eps=0.05, seed=seed)
+
+    steps_per_ep = []
+    for ep in range(episodes):
+        s = env.reset(seed + ep)
+        a = agent.act(atc.phi, s)
+        steps = 0
+        while True:
+            ns, r, done, _ = env.step(a)
+            na = agent.act(atc.phi, ns)
+            agent.step(atc.phi, s, a, r, ns, na)
+            s, a = ns, na
+            steps += 1
+            if done: break
+        steps_per_ep.append(steps)
+    return np.array(steps_per_ep), agent.w
+
+if __name__ == "__main__":
+    steps, w = run(episodes=20, seed=123, n_tilings=8)
+    print("Steps per episode:", steps)
diff --git a/ch10_function_approx/features/__init__.py b/ch10_function_approx/features/__init__.py
@@ -0,0 +1 @@
+# package
diff --git a/ch10_function_approx/features/tile_coding.py b/ch10_function_approx/features/tile_coding.py
@@ -0,0 +1,65 @@
+from __future__ import annotations
+import numpy as np
+from dataclasses import dataclass
+from typing import Iterable, Tuple, List
+
+@dataclass
+class TileCoder:
+    lows: np.ndarray
+    highs: np.ndarray
+    bins_per_dim: Tuple[int, ...]
+    n_tilings: int
+    offsets: List[np.ndarray]
+
+    def __post_init__(self):
+        self.lows = np.asarray(self.lows, dtype=float)
+        self.highs = np.asarray(self.highs, dtype=float)
+        assert self.lows.shape == self.highs.shape
+        self.d = self.lows.size
+        assert len(self.bins_per_dim) == self.d
+        assert len(self.offsets) == self.n_tilings
+        self.bins_per_dim = tuple(int(b) for b in self.bins_per_dim)
+        self.tiles_per_tiling = int(np.prod(self.bins_per_dim))
+        self.total_tiles = self.tiles_per_tiling * self.n_tilings
+        self.bin_sizes = (self.highs - self.lows) / np.array(self.bins_per_dim, dtype=float)
+
+    def _coord_to_index_single(self, x: np.ndarray, tiling_idx: int) -> int:
+        offs = self.offsets[tiling_idx] * self.bin_sizes
+        z = (x - (self.lows - offs)) / self.bin_sizes
+        idxs = np.floor(z).astype(int)
+        idxs = np.clip(idxs, 0, np.array(self.bins_per_dim) - 1)
+        flat = 0
+        for i, b in enumerate(self.bins_per_dim):
+            flat = flat * b + idxs[i]
+        return flat
+
+    def active_indices(self, x: Iterable[float]) -> List[int]:
+        x = np.asarray(x, dtype=float)
+        inds = []
+        for t in range(self.n_tilings):
+            local = self._coord_to_index_single(x, t)
+            inds.append(t * self.tiles_per_tiling + int(local))
+        return inds
+
+    def featurize(self, x: Iterable[float]) -> np.ndarray:
+        v = np.zeros(self.total_tiles, dtype=float)
+        for i in self.active_indices(x):
+            v[i] = 1.0
+        return v
+
+class ActionBlockTileCoder:
+    def __init__(self, tilecoder: TileCoder, n_actions: int):
+        self.tc = tilecoder
+        self.nA = int(n_actions)
+        self.d = self.tc.total_tiles * self.nA
+
+    def phi(self, x, a: int) -> np.ndarray:
+        v = np.zeros(self.d, dtype=float)
+        inds = self.tc.active_indices(x)
+        offset = a * self.tc.total_tiles
+        for i in inds:
+            v[offset + i] = 1.0
+        return v
+
+    def active_count(self) -> int:
+        return self.tc.n_tilings
diff --git a/ch10_function_approx/requirements.txt b/ch10_function_approx/requirements.txt
@@ -0,0 +1,3 @@
+numpy>=1.21
+matplotlib>=3.4
+pytest>=7.0
diff --git a/ch10_function_approx/scripts/__init__.py b/ch10_function_approx/scripts/__init__.py
@@ -0,0 +1 @@
+# package
diff --git a/ch10_function_approx/scripts/run_mc_demo.py b/ch10_function_approx/scripts/run_mc_demo.py
@@ -0,0 +1,4 @@
+from ch10_function_approx.examples.mountain_car_linear import run
+if __name__ == "__main__":
+    steps, w = run(episodes=30, seed=42, n_tilings=8)
+    print("Steps per episode:", steps.tolist())
diff --git a/ch10_function_approx/tests/__init__.py b/ch10_function_approx/tests/__init__.py
@@ -0,0 +1 @@
+# package
diff --git a/ch10_function_approx/tests/test_ch10_fa.py b/ch10_function_approx/tests/test_ch10_fa.py
@@ -0,0 +1,47 @@
+import numpy as np
+from ch10_function_approx.features.tile_coding import TileCoder, ActionBlockTileCoder
+from ch10_function_approx.envs.mountain_car import MountainCar, MountainCarConfig
+from ch10_function_approx.examples.mountain_car_linear import run as mc_run
+
+def test_tilecoder_active_count_and_bounds():
+    lows = np.array([-1.0, -2.0])
+    highs = np.array([1.0, 2.0])
+    bins = (4, 5)
+    n_tilings = 8
+    offsets = [np.array([i/n_tilings, (n_tilings-i-1)/n_tilings]) for i in range(n_tilings)]
+    tc = TileCoder(lows, highs, bins, n_tilings, offsets)
+    x = np.array([0.0, 0.0])
+    inds = tc.active_indices(x)
+    assert len(inds) == n_tilings
+    assert all(0 <= i < tc.total_tiles for i in inds)
+    v = tc.featurize(x)
+    assert np.isclose(v.sum(), n_tilings)
+
+def test_action_block_shape_and_sparsity():
+    lows = np.array([-1.0, -1.0])
+    highs = np.array([1.0, 1.0])
+    bins = (4,4)
+    n_tilings = 4
+    offsets = [np.array([0.25*i, 0.25*(3-i)]) for i in range(n_tilings)]
+    tc = TileCoder(lows, highs, bins, n_tilings, offsets)
+    from ch10_function_approx.envs.mountain_car import MountainCar
+    ab = ActionBlockTileCoder(tc, n_actions=MountainCar().nA)
+    phi = ab.phi([0.1, -0.2], a=2)
+    assert phi.shape[0] == tc.total_tiles * MountainCar().nA
+    assert np.isclose(phi.sum(), n_tilings)
+
+def test_mountain_car_dynamics_and_goal():
+    env = MountainCar(MountainCarConfig())
+    _ = env.reset(seed=0)
+    done = False
+    steps = 0
+    while not done and steps < 5000:
+        _, _, done, _ = env.step(env.RIGHT)
+        steps += 1
+    assert steps <= env.cfg.max_steps
+
+def test_linear_sarsa_runs_and_improves_steps():
+    steps, w = mc_run(episodes=10, seed=0, n_tilings=4)
+    assert steps.shape[0] == 10
+    assert w.ndim == 1
+    assert np.median(steps[-5:]) <= steps[0] + 200
diff --git a/ch10_function_approx/utils/__init__.py b/ch10_function_approx/utils/__init__.py
@@ -0,0 +1 @@
+# package
diff --git a/ch10_function_approx/utils/policies.py b/ch10_function_approx/utils/policies.py
@@ -0,0 +1,7 @@
+from __future__ import annotations
+import numpy as np
+
+def epsilon_greedy(Q_row: np.ndarray, eps: float, rng: np.random.Generator) -> int:
+    if rng.random() < eps:
+        return int(rng.integers(Q_row.size))
+    return int(np.argmax(Q_row))