Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
41 changes: 41 additions & 0 deletions examples/online/config/dmc/algo/aca.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
# @package _global_

algo:
name: aca
target_update_freq: 1
feature_dim: 512
rff_dim: 1024
critic_hidden_dims: [512, 512]
reward_hidden_dims: [512, 512]
phi_hidden_dims: [512, 512]
mu_hidden_dims: [512, 512]
ctrl_coef: 1.0
reward_coef: 1.0
critic_coef: 1.0
critic_activation: elu # not used
back_critic_grad: false
feature_lr: 0.0001
critic_lr: 0.0003
discount: 0.99
num_samples: 10
ema: 0.005
feature_ema: 0.005
clip_grad_norm: null
temp: 0.1
diffusion:
time_dim: 64
mlp_hidden_dims: [512, 512, 512]
lr: 0.0003
end_lr: null
lr_decay_steps: null
lr_decay_begin: null
steps: 20
clip_sampler: true
x_min: -1.0
x_max: 1.0
solver: ddpm
num_noises: 20
linear: false
ranking: true

norm_obs: true
49 changes: 49 additions & 0 deletions examples/online/config/dmc/algo/ctrl_qsm.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
# @package _global_

algo:
name: ctrl_qsm
actor_update_freq: 1
target_update_freq: 1
discount: 0.99
ema: 0.005
# critic_hidden_dims: [512, 512, 512] # not used
critic_activation: elu # not used
critic_ensemble_size: 2
layer_norm: true
critic_lr: 0.0003
clip_grad_norm: null

# below are params specific to ctrl_td3
feature_dim: 512
feature_lr: 0.0001
feature_ema: 0.005
phi_hidden_dims: [512, 512]
mu_hidden_dims: [512, 512]
critic_hidden_dims: [512, ]
reward_hidden_dims: [512, ]
rff_dim: 1024
ctrl_coef: 1.0
reward_coef: 1.0
back_critic_grad: false
critic_coef: 1.0

num_noises: 25
linear: false
ranking: true

num_samples: 10
temp: 0.1
diffusion:
time_dim: 64
mlp_hidden_dims: [512, 512, 512]
lr: 0.0003
end_lr: null
lr_decay_steps: null
lr_decay_begin: null
steps: 20
clip_sampler: true
x_min: -1.0
x_max: 1.0
solver: ddpm

norm_obs: true
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
# @package _global_

algo:
name: ctrl_td3
name: ctrlsr_td3
actor_update_freq: 1
target_update_freq: 1
discount: 0.99
Expand Down
51 changes: 51 additions & 0 deletions examples/online/config/dmc/algo/diffsr_aca.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
# @package _global_

algo:
name: diffsr_aca
actor_update_freq: 1
target_update_freq: 1
discount: 0.99
ema: 0.005
actor_hidden_dims: [512, 512, 512]
# critic_hidden_dims: [512, 512, 512] # not used
activation: elu # not used
critic_ensemble_size: 2
layer_norm: true
actor_lr: 0.0003
critic_lr: 0.0003
clip_grad_norm: null
target_policy_noise: 0.2
noise_clip: 0.3
exploration_noise: 0.2

# below are params specific to diffsr_td3
num_noises: 20
num_samples: 10
feature_dim: 512
feature_lr: 0.0001
feature_ema: 0.005
embed_dim: 128
phi_hidden_dims: [512, 512, 512]
mu_hidden_dims: [512, 512, 512]
critic_hidden_dims: [512, ]
reward_hidden_dims: [512, ]
rff_dim: 1024
ddpm_coef: 1.0
reward_coef: 0.1
back_critic_grad: false
critic_coef: 1.0

diffusion:
time_dim: 64
mlp_hidden_dims: [512, 512, 512]
lr: 0.0003
end_lr: null
lr_decay_steps: null
lr_decay_begin: null
steps: 20
clip_sampler: true
x_min: -1.0
x_max: 1.0
solver: ddpm

norm_obs: true
51 changes: 51 additions & 0 deletions examples/online/config/dmc/algo/diffsr_aca_sep.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
# @package _global_

algo:
name: diffsr_aca_sep
actor_update_freq: 1
target_update_freq: 1
discount: 0.99
ema: 0.005
actor_hidden_dims: [512, 512, 512]
# critic_hidden_dims: [512, 512, 512] # not used
activation: elu # not used
critic_ensemble_size: 2
layer_norm: true
actor_lr: 0.0003
critic_lr: 0.0003
clip_grad_norm: null
target_policy_noise: 0.2
noise_clip: 0.3
exploration_noise: 0.2

# below are params specific to diffsr_td3
num_noises: 20
num_samples: 10
feature_dim: 512
feature_lr: 0.0001
feature_ema: 0.005
embed_dim: 128
phi_hidden_dims: [512, 512, 512]
mu_hidden_dims: [512, 512, 512]
critic_hidden_dims: [512, ]
reward_hidden_dims: [512, ]
rff_dim: 1024
ddpm_coef: 1.0
reward_coef: 0.1
back_critic_grad: false
critic_coef: 1.0

diffusion:
time_dim: 64
mlp_hidden_dims: [512, 512, 512]
lr: 0.0003
end_lr: null
lr_decay_steps: null
lr_decay_begin: null
steps: 20
clip_sampler: true
x_min: -1.0
x_max: 1.0
solver: ddpm

norm_obs: true
47 changes: 47 additions & 0 deletions examples/online/config/dmc/algo/diffsr_qsm.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
# @package _global_

algo:
name: diffsr_qsm
actor_update_freq: 1
target_update_freq: 1
discount: 0.99
ema: 0.005
# critic_hidden_dims: [512, 512, 512] # not used
critic_activation: elu # not used
critic_ensemble_size: 2
layer_norm: true
critic_lr: 0.0003
clip_grad_norm: null

# below are params specific to ctrl_td3
num_noises: 50
feature_dim: 512
feature_lr: 0.0001
feature_ema: 0.005
embed_dim: 256
phi_hidden_dims: [512, 512, 512]
mu_hidden_dims: [512, 512, 512]
critic_hidden_dims: [512, ]
reward_hidden_dims: [512, ]
rff_dim: 1024
ddpm_coef: 1.0
reward_coef: 0.1
back_critic_grad: false
critic_coef: 1.0

num_samples: 10
temp: 0.1
diffusion:
time_dim: 64
mlp_hidden_dims: [512, 512, 512]
lr: 0.0003
end_lr: null
lr_decay_steps: null
lr_decay_begin: null
steps: 20
clip_sampler: true
x_min: -1.0
x_max: 1.0
solver: ddpm

norm_obs: true
37 changes: 37 additions & 0 deletions examples/online/config/dmc/algo/diffsr_td3.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
# @package _global_

algo:
name: diffsr_td3
actor_update_freq: 1
target_update_freq: 1
discount: 0.99
ema: 0.005
actor_hidden_dims: [512, 512, 512]
# critic_hidden_dims: [512, 512, 512] # not used
activation: elu # not used
critic_ensemble_size: 2
layer_norm: true
actor_lr: 0.0003
critic_lr: 0.0003
clip_grad_norm: null
target_policy_noise: 0.2
noise_clip: 0.3
exploration_noise: 0.2

# below are params specific to diffsr_td3
num_noises: 50
feature_dim: 512
feature_lr: 0.0001
feature_ema: 0.005
embed_dim: 128
phi_hidden_dims: [512, 512, 512]
mu_hidden_dims: [512, 512, 512]
critic_hidden_dims: [512, ]
reward_hidden_dims: [512, ]
rff_dim: 1024
ddpm_coef: 1.0
reward_coef: 0.1
back_critic_grad: false
critic_coef: 1.0

norm_obs: true
23 changes: 23 additions & 0 deletions examples/online/config/dmc/algo/qsm.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
# @package _global_

algo:
name: qsm
critic_hidden_dims: [512, 512, 512]
critic_activation: elu
critic_lr: 0.0003
discount: 0.99
num_samples: 10
ema: 0.005
temp: 0.1
diffusion:
time_dim: 64
mlp_hidden_dims: [512, 512, 512]
lr: 0.0003
end_lr: null
lr_decay_steps: null
lr_decay_begin: null
steps: 20
clip_sampler: true
x_min: -1.0
x_max: 1.0
solver: ddpm
24 changes: 24 additions & 0 deletions examples/online/config/dmc/algo/sdac.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
# @package _global_

algo:
name: sdac
critic_hidden_dims: [512, 512, 512]
critic_activation: elu
critic_lr: 0.0003
discount: 0.99
num_samples: 10
num_reverse_samples: 500
ema: 0.005
temp: 0.05
diffusion:
time_dim: 64
mlp_hidden_dims: [512, 512, 512]
lr: 0.0003
end_lr: null
lr_decay_steps: null
lr_decay_begin: null
steps: 20
clip_sampler: true
x_min: -1.0
x_max: 1.0
solver: ddpm
24 changes: 24 additions & 0 deletions examples/online/config/mujoco/algo/alac.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
# @package _global_

algo:
name: alac
discount: 0.99
num_samples: 10
ema: 0.005
ld:
resnet: false
activation: relu
ensemble_size: 2
time_dim: 64
hidden_dims: [512, 512]
cond_hidden_dims: [128, 128]
steps: 20
step_size: 0.05
noise_scale: 1.0
noise_schedule: "none"
clip_sampler: true
x_min: -1.0
x_max: 1.0
epsilon: 0.001
lr: 0.0003
clip_grad_norm: null
23 changes: 23 additions & 0 deletions examples/online/config/mujoco/algo/idem.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
# @package _global_

algo:
name: idem
critic_hidden_dims: [256, 256]
critic_lr: 0.0003
discount: 0.99
num_samples: 10
num_reverse_samples: 500
ema: 0.005
temp: 0.2
diffusion:
time_dim: 64
mlp_hidden_dims: [256, 256]
lr: 0.0003
end_lr: null
lr_decay_steps: null
lr_decay_begin: null
steps: 20
clip_sampler: true
x_min: -1.0
x_max: 1.0
solver: ddpm
Loading