-
Notifications
You must be signed in to change notification settings - Fork 3
Expand file tree
/
Copy pathexp_utils.py
More file actions
106 lines (101 loc) · 5.34 KB
/
exp_utils.py
File metadata and controls
106 lines (101 loc) · 5.34 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
import os
import time
import wandb
from torch.utils.tensorboard import SummaryWriter
from distutils.util import strtobool
def add_common_args(parser):
"""Add common arguments used across all experiments."""
parser.add_argument("--exp-name", type=str, default=os.path.basename(__file__).rstrip(".py"),
help="the name of this experiment")
parser.add_argument("--gym-id", type=str, default="ALE/Breakout-v5",
help="the id of the gym environment")
parser.add_argument("--learning-rate", type=float, default=2.5e-4,
help="the learning rate of the optimizer")
parser.add_argument("--seed", type=int, default=1,
help="seed of the experiment")
parser.add_argument("--total-timesteps", type=int, default=10000000,
help="total timesteps of the experiments")
parser.add_argument("--torch-deterministic", type=lambda x: bool(strtobool(x)), default=True, nargs="?", const=True,
help="if toggled, `torch.backends.cudnn.deterministic=False`")
parser.add_argument("--cuda", type=lambda x: bool(strtobool(x)), default=True, nargs="?", const=True,
help="if toggled, cuda will be enabled by default")
parser.add_argument("--track", type=lambda x: bool(strtobool(x)), default=False, nargs="?", const=True,
help="if toggled, this experiment will be tracked with Weights and Biases")
parser.add_argument("--wandb-project-name", type=str, default="ppo-mamba",
help="the wandb's project name")
parser.add_argument("--wandb-entity", type=str, default=None,
help="the entity (team) of wandb's project")
parser.add_argument("--capture-video", type=lambda x: bool(strtobool(x)), default=False, nargs="?", const=True,
help="whether to capture videos of the agent performances (check out `videos` folder)")
parser.add_argument("--save-interval", type=int, default=100,
help="Save the model checkpoint every X updates")
parser.add_argument("--save_model", type=lambda x:bool(strtobool(x)), default=False,
help="Whether to save model checkpoints")
# Algorithm specific arguments
parser.add_argument("--num-envs", type=int, default=8,
help="the number of parallel game environments")
parser.add_argument("--num-steps", type=int, default=128,
help="the number of steps to run in each environment per policy rollout")
parser.add_argument("--anneal-lr", type=lambda x: bool(strtobool(x)), default=True, nargs="?", const=True,
help="Toggle learning rate annealing for policy and value networks")
parser.add_argument("--gae", type=lambda x: bool(strtobool(x)), default=True, nargs="?", const=True,
help="Use GAE for advantage computation")
parser.add_argument("--gamma", type=float, default=0.99,
help="the discount factor gamma")
parser.add_argument("--gae-lambda", type=float, default=0.95,
help="the lambda for the general advantage estimation")
parser.add_argument("--num-minibatches", type=int, default=4,
help="the number of mini-batches")
parser.add_argument("--update-epochs", type=int, default=4,
help="the K epochs to update the policy")
parser.add_argument("--norm-adv", type=lambda x: bool(strtobool(x)), default=True, nargs="?", const=True,
help="Toggle advantages normalization")
parser.add_argument("--clip-coef", type=float, default=0.1,
help="the surrogate clipping coefficient")
parser.add_argument("--clip-vloss", type=lambda x: bool(strtobool(x)), default=True, nargs="?", const=True,
help="Toggle whether or not to use a clipped loss for the value function")
parser.add_argument("--ent-coef", type=float, default=0.01,
help="coefficient of the entropy")
parser.add_argument("--vf-coef", type=float, default=0.5,
help="coefficient of the value function")
parser.add_argument("--max-grad-norm", type=float, default=0.5,
help="the maximum norm for the gradient clipping")
parser.add_argument("--target-kl", type=float, default=None,
help="the target KL divergence threshold")
def setup_logging(args):
"""
Sets up Weights & Biases (if args.track is True) and TensorBoard.
Returns (writer, run_name).
"""
run_name = f"{args.gym_id}__{args.exp_name}__{args.seed}__{int(time.time())}"
if args.track:
wandb.init(
project=args.wandb_project_name,
entity=args.wandb_entity,
sync_tensorboard=True,
config=vars(args),
name=run_name,
monitor_gym=True,
save_code=True,
)
writer = SummaryWriter(f"runs/{run_name}")
writer.add_text(
"hyperparameters",
"|param|value|\n|-|-|\n%s" % (
"\n".join([f"|{key}|{value}|" for key, value in vars(args).items()])
),
)
return writer, run_name
def finish_logging(args, writer, run_name, envs):
"""
Cleans up: closes envs, closes writer, and optionally saves videos to W&B.
"""
if args.track and args.capture_video:
wandb.save(f"videos/{run_name}/*.mp4")
wandb.save(f"videos/{run_name}/*.json")
video_path = f"videos/{run_name}"
video_files = [f for f in os.listdir(video_path) if f.endswith(('.mp4', '.gif'))]
for video_file in video_files:
wandb.log({"video": wandb.Video(os.path.join(video_path, video_file), fps=4, format="mp4")})
envs.close()
writer.close()