-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathrun_pendulum.py
More file actions
106 lines (85 loc) · 3.07 KB
/
run_pendulum.py
File metadata and controls
106 lines (85 loc) · 3.07 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
# run_pendulum.py
# Trains a coarse value function for swing-up (DP on a grid),
# then hands off to LQR near upright to balance.
from __future__ import annotations
import math
import numpy as np
import matplotlib.pyplot as plt
from pendulum_env import PendulumEnv, PendulumParams
from dp_tool import value_iteration, greedy_from_grid, wrap_pi
from msdcontrol.lqr import dlqr
def stage_cost(x, u, q_theta=4.0, q_dtheta=0.5, r=0.05):
theta, dtheta = x
phi = wrap_pi(theta - math.pi)
return q_theta*phi*phi + q_dtheta*dtheta*dtheta + r*u*u
def main():
env = PendulumEnv(PendulumParams(u_limit=3.0))
dt = 0.02
TH_POINTS = 49
DTH_POINTS = 37
ACT_POINTS = 11
th_grid = np.linspace(-math.pi, math.pi, TH_POINTS)
dth_grid = np.linspace(-8.0, 8.0, DTH_POINTS)
actions = np.linspace(-env.p.u_limit, env.p.u_limit, ACT_POINTS)
print("Running value iteration")
V, Pi = value_iteration(
step_fn=env.rk4,
stage_cost=lambda x,u: stage_cost(x,u),
th_grid=th_grid,
dth_grid=dth_grid,
actions=actions,
dt=dt,
gamma=0.995,
max_iters=300,
tol=2e-4,
)
print("DP done.")
A, B = env.linearize_upright_ct()
Ad, Bd = env.discretize(A, B, dt)
# Penalties for (phi, dphi) about upright
Q = np.diag([20.0, 3.0])
R = np.array([[0.2]])
K, S = dlqr(Ad, Bd, Q, R) # expects shape (1,2) for K
T = 8.0
steps = int(T/dt)
x = np.array([0.0, 0.0]) # start hanging down
xs = [x.copy()]
us = []
def near_upright(x):
phi = wrap_pi(x[0] - math.pi)
return (abs(phi) < 0.2) and (abs(x[1]) < 1.0)
for k in range(steps):
if near_upright(x):
# switch to your LQR around upright; state = [phi, dphi]
phi = wrap_pi(x[0] - math.pi)
x_lqr = np.array([phi, x[1]])
u = -(K @ x_lqr).item()
else:
# greedy action from DP policy grid
u = greedy_from_grid(x, Pi, th_grid, dth_grid)
# clamp to env limits
u = float(np.clip(u, -env.p.u_limit, env.p.u_limit))
us.append(u)
x = env.rk4(x, u, dt)
xs.append(x.copy())
xs = np.array(xs)
us = np.array(us)
# Save an animation of the swing-up (theta over time) to outputs/pendulum_swing.gif
gif_path = env.save_gif(
thetas=xs[:, 0], # use theta sequence from the sim
path="outputs/pendulum_swing.gif",
dt=dt,
fps=None, # None -> uses round(1/dt)
dpi=140,
trail=True # set False if you prefer no trail
)
print(f"Saved pendulum animation to: {gif_path}")
# t = np.arange(xs.shape[0]) * dt
# fig, ax = plt.subplots(3, 1, figsize=(8, 7), sharex=True)
# ax[0].plot(t, np.unwrap(xs[:,0])) ; ax[0].set_ylabel("theta [rad]")
# ax[1].plot(t, xs[:,1]) ; ax[1].set_ylabel("dtheta [rad/s]")
# ax[2].plot(t[:-1], us) ; ax[2].set_ylabel("u [N·m]") ; ax[2].set_xlabel("time [s]")
# plt.tight_layout()
# plt.show()
if __name__ == "__main__":
main()