|
| 1 | +from __future__ import annotations |
| 2 | +import numpy as np |
| 3 | +from ..envs.mountain_car import MountainCar, MountainCarConfig |
| 4 | +from ..features.tile_coding import TileCoder, ActionBlockTileCoder |
| 5 | +from ..agents.linear_sarsa import LinearSarsaAgent |
| 6 | + |
| 7 | +def make_tilecoder(n_tilings=8, bins=(8,8)): |
| 8 | + lows = np.array([-1.2, -0.07], dtype=float) |
| 9 | + highs = np.array([0.6, 0.07], dtype=float) |
| 10 | + offsets = [] |
| 11 | + rng = np.random.default_rng(0) |
| 12 | + for t in range(n_tilings): |
| 13 | + offsets.append(rng.random(2) * 0.999) |
| 14 | + tc = TileCoder(lows=lows, highs=highs, bins_per_dim=bins, n_tilings=n_tilings, offsets=offsets) |
| 15 | + return tc |
| 16 | + |
| 17 | +def run(episodes=50, seed=0, n_tilings=8): |
| 18 | + env = MountainCar(MountainCarConfig()) |
| 19 | + tc = make_tilecoder(n_tilings=n_tilings, bins=(8,8)) |
| 20 | + atc = ActionBlockTileCoder(tc, n_actions=env.nA) |
| 21 | + agent = LinearSarsaAgent(d=atc.d, nA=env.nA, gamma=1.0, alpha=0.5/n_tilings, eps=0.05, seed=seed) |
| 22 | + |
| 23 | + steps_per_ep = [] |
| 24 | + for ep in range(episodes): |
| 25 | + s = env.reset(seed + ep) |
| 26 | + a = agent.act(atc.phi, s) |
| 27 | + steps = 0 |
| 28 | + while True: |
| 29 | + ns, r, done, _ = env.step(a) |
| 30 | + na = agent.act(atc.phi, ns) |
| 31 | + agent.step(atc.phi, s, a, r, ns, na) |
| 32 | + s, a = ns, na |
| 33 | + steps += 1 |
| 34 | + if done: break |
| 35 | + steps_per_ep.append(steps) |
| 36 | + return np.array(steps_per_ep), agent.w |
| 37 | + |
| 38 | +if __name__ == "__main__": |
| 39 | + steps, w = run(episodes=20, seed=123, n_tilings=8) |
| 40 | + print("Steps per episode:", steps) |
0 commit comments