|
| 1 | +from __future__ import annotations |
| 2 | +import argparse, os |
| 3 | +import numpy as np |
| 4 | +import pandas as pd |
| 5 | +import matplotlib.pyplot as plt |
| 6 | + |
| 7 | +from rldp.gridworld import make_gridworld, unravel_index, ACTIONS, arrows_from_policy |
| 8 | +from rldp.dp import policy_evaluation, policy_iteration, value_iteration |
| 9 | + |
| 10 | +def save_grid_csv(V, n, out_csv): |
| 11 | + M = np.zeros((n, n)) |
| 12 | + for s in range(n*n): |
| 13 | + i, j = unravel_index(s, n) |
| 14 | + M[i, j] = V[s] |
| 15 | + df = pd.DataFrame(M) |
| 16 | + os.makedirs(os.path.dirname(out_csv), exist_ok=True) |
| 17 | + df.to_csv(out_csv, index=False) |
| 18 | + |
| 19 | +def save_policy_csv(pi, n, out_csv): |
| 20 | + arr = arrows_from_policy(pi).reshape(n, n) |
| 21 | + df = pd.DataFrame(arr) |
| 22 | + os.makedirs(os.path.dirname(out_csv), exist_ok=True) |
| 23 | + df.to_csv(out_csv, index=False) |
| 24 | + |
| 25 | +def plot_values(V, n, out_png, title=None): |
| 26 | + M = np.zeros((n, n)) |
| 27 | + for s in range(n*n): |
| 28 | + i, j = unravel_index(s, n) |
| 29 | + M[i, j] = V[s] |
| 30 | + fig = plt.figure() |
| 31 | + plt.imshow(M, interpolation='nearest') |
| 32 | + plt.colorbar() |
| 33 | + if title: |
| 34 | + plt.title(title) |
| 35 | + for i in range(n): |
| 36 | + for j in range(n): |
| 37 | + plt.text(j, i, f"{M[i,j]:.0f}", ha='center', va='center') |
| 38 | + os.makedirs(os.path.dirname(out_png), exist_ok=True) |
| 39 | + plt.savefig(out_png, bbox_inches='tight', dpi=160) |
| 40 | + plt.close(fig) |
| 41 | + |
| 42 | +def main(): |
| 43 | + ap = argparse.ArgumentParser() |
| 44 | + ap.add_argument('--env', default='4x4', choices=['4x4','6x6']) |
| 45 | + ap.add_argument('--gamma', type=float, default=1.0) |
| 46 | + ap.add_argument('--theta', type=float, default=1e-6) |
| 47 | + ap.add_argument('--outdir', default='artifacts/ch4_4x4') |
| 48 | + args = ap.parse_args() |
| 49 | + |
| 50 | + n = 4 if args.env == '4x4' else 6 |
| 51 | + states, actions, P, R, meta = make_gridworld(n=n) |
| 52 | + # Policy Iteration |
| 53 | + pi_pi, V_pi = policy_iteration(states, actions, P, R, gamma=args.gamma, theta=args.theta) |
| 54 | + # Value Iteration |
| 55 | + pi_vi, V_vi = value_iteration(states, actions, P, R, gamma=args.gamma, theta=args.theta) |
| 56 | + |
| 57 | + os.makedirs(args.outdir, exist_ok=True) |
| 58 | + |
| 59 | + # Save values (final) |
| 60 | + save_grid_csv(V_pi, n, os.path.join(args.outdir, f'pi_values_{args.env}.csv')) |
| 61 | + save_grid_csv(V_vi, n, os.path.join(args.outdir, f'vi_values_{args.env}.csv')) |
| 62 | + plot_values(V_pi, n, os.path.join(args.outdir, f'pi_values_{args.env}.png'), 'Policy Iteration Values') |
| 63 | + plot_values(V_vi, n, os.path.join(args.outdir, f'vi_values_{args.env}.png'), 'Value Iteration Values') |
| 64 | + |
| 65 | + # Save policies |
| 66 | + save_policy_csv(pi_pi, n, os.path.join(args.outdir, f'pi_policy_{args.env}.csv')) |
| 67 | + save_policy_csv(pi_vi, n, os.path.join(args.outdir, f'vi_policy_{args.env}.csv')) |
| 68 | + |
| 69 | + print('Artifacts written to:', args.outdir) |
| 70 | + |
| 71 | +if __name__ == '__main__': |
| 72 | + main() |
0 commit comments