Skip to content

Commit 5c6d871

Browse files
committed
update
1 parent 65e9265 commit 5c6d871

File tree

13 files changed

+903
-24
lines changed

13 files changed

+903
-24
lines changed
Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,24 @@
1+
# @package _global_
2+
3+
algo:
4+
name: alac
5+
discount: 0.99
6+
num_samples: 10
7+
ema: 0.005
8+
ld:
9+
resnet: false
10+
activation: relu
11+
ensemble_size: 2
12+
time_dim: 64
13+
hidden_dims: [512, 512]
14+
cond_hidden_dims: [128, 128]
15+
steps: 20
16+
step_size: 0.05
17+
noise_scale: 1.0
18+
noise_schedule: "none"
19+
clip_sampler: true
20+
x_min: -1.0
21+
x_max: 1.0
22+
epsilon: 0.001
23+
lr: 0.0003
24+
clip_grad_norm: null

examples/online/config/mujoco/algo/qsm.yaml

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2,15 +2,15 @@
22

33
algo:
44
name: qsm
5-
critic_hidden_dims: [256, 256]
5+
critic_hidden_dims: [512, 512]
66
critic_lr: 0.0003
77
discount: 0.99
88
num_samples: 10
99
ema: 0.005
10-
temp: 0.2
10+
temp: 0.1
1111
diffusion:
1212
time_dim: 64
13-
mlp_hidden_dims: [256, 256]
13+
mlp_hidden_dims: [512, 512]
1414
lr: 0.0003
1515
end_lr: null
1616
lr_decay_steps: null

examples/online/config/mujoco/config.yaml

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,6 @@ random_frames: 5_000
2727
eval_frames: 10_000
2828
log_frames: 1_000
2929
lap_reset_frames: 250
30-
eval_episodes: 10
3130
log:
3231
dir: logs
3332
tag: debug

examples/online/main_mujoco_offpolicy.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5,12 +5,11 @@
55
import hydra
66
import numpy as np
77
import omegaconf
8-
import wandb
98
from omegaconf import OmegaConf
109
from tqdm import tqdm
1110

11+
import wandb
1212
from flowrl.agent.online import *
13-
from flowrl.agent.online.idem import IDEMAgent
1413
from flowrl.config.online.mujoco import Config
1514
from flowrl.dataset.buffer.state import ReplayBuffer
1615
from flowrl.types import *
@@ -25,6 +24,7 @@
2524
"dpmd": DPMDAgent,
2625
"qsm": QSMAgent,
2726
"idem": IDEMAgent,
27+
"alac": ALACAgent,
2828
}
2929

3030
class OffPolicyTrainer():

flowrl/agent/online/__init__.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
from ..base import BaseAgent
2+
from .alac.alac import ALACAgent
23
from .ctrl.ctrl import Ctrl_TD3_Agent
34
from .dpmd import DPMDAgent
45
from .idem import IDEMAgent
@@ -18,6 +19,7 @@
1819
"DPMDAgent",
1920
"PPOAgent",
2021
"QSMAgent",
21-
"IDEMAgent"
22+
"IDEMAgent",
23+
"ALACAgent",
2224
"Ctrl_TD3_Agent",
2325
]

0 commit comments

Comments
 (0)