qsm scripts

echen333 · echen333 · commit 668b38f0395b · 2025-10-09T19:44:37.000-04:00
diff --git a/examples/online/config/dmc/algo/qsm.yaml b/examples/online/config/dmc/algo/qsm.yaml
@@ -0,0 +1,22 @@
+# @package _global_
+
+algo:
+  name: qsm
+  critic_hidden_dims: [512, 512, 512]
+  critic_lr: 0.0003
+  discount: 0.99
+  num_samples: 10
+  ema: 0.005
+  temp: 0.2
+  diffusion:
+    time_dim: 64
+    mlp_hidden_dims: [512, 512, 512]
+    lr: 0.0003
+    end_lr: null
+    lr_decay_steps: null
+    lr_decay_begin: null
+    steps: 20
+    clip_sampler: true
+    x_min: -1.0
+    x_max: 1.0
+    solver: ddpm
diff --git a/examples/online/main_dmc_offpolicy.py b/examples/online/main_dmc_offpolicy.py
@@ -23,6 +23,7 @@
     "td7": TD7Agent,
     "sdac": SDACAgent,
     "dpmd": DPMDAgent,
+    "qsm": QSMAgent,
 }
 
 class OffPolicyTrainer():
diff --git a/flowrl/agent/online/__init__.py b/flowrl/agent/online/__init__.py
@@ -1,5 +1,6 @@
 from ..base import BaseAgent
 from .dpmd import DPMDAgent
+from .idem import IDEMAgent
 from .ppo import PPOAgent
 from .qsm import QSMAgent
 from .sac import SACAgent
@@ -16,4 +17,5 @@
     "DPMDAgent",
     "PPOAgent",
     "QSMAgent",
+    "IDEMAgent"
 ]
diff --git a/scripts/dmc/qsm.sh b/scripts/dmc/qsm.sh
@@ -0,0 +1,68 @@
+# Specify which GPUs to use
+GPUS=(0 1 2 3 4 5 6 7)  # Modify this array to specify which GPUs to use
+SEEDS=(0 1 2 3 4)
+NUM_EACH_GPU=3
+
+PARALLEL=$((NUM_EACH_GPU * ${#GPUS[@]}))
+
+TASKS=(
+    "acrobot-swingup"
+    "ball_in_cup-catch"
+    "cartpole-balance"
+    "cartpole-balance_sparse"
+    "cartpole-swingup"
+    "cartpole-swingup_sparse"
+    "cheetah-run"
+    "dog-run"
+    "dog-stand"
+    "dog-trot"
+    "dog-walk"
+    "finger-spin"
+    "finger-turn_easy"
+    "finger-turn_hard"
+    "fish-swim"
+    "hopper-hop"
+    "hopper-stand"
+    "humanoid-run"
+    "humanoid-stand"
+    "humanoid-walk"
+    "pendulum-swingup"
+    "quadruped-run"
+    "quadruped-walk"
+    "reacher-easy"
+    "reacher-hard"
+    "walker-run"
+    "walker-stand"
+    "walker-walk"
+)
+
+SHARED_ARGS=(
+    "algo=qsm"
+    "log.tag=default"
+    "log.project=flow-rl"
+    "log.entity=lamda-rl"
+)
+
+run_task() {
+    task=$1
+    seed=$2
+    slot=$3
+    num_gpus=${#GPUS[@]}
+    device_idx=$((slot % num_gpus))
+    device=${GPUS[$device_idx]}
+    echo "Running $env $seed on GPU $device"
+    command="python3 examples/online/main_dmc_offpolicy.py task=$task device=$device seed=$seed ${SHARED_ARGS[@]}"
+    if [ -n "$DRY_RUN" ]; then
+        echo $command
+    else
+        echo $command
+        $command
+    fi
+}
+
+. env_parallel.bash
+if [ -n "$DRY_RUN" ]; then
+    env_parallel -P${PARALLEL} run_task {1} {2} {%} ::: ${TASKS[@]} ::: ${SEEDS[@]}
+else
+    env_parallel --bar --results log/parallel/$name -P${PARALLEL} run_task {1} {2} {%} ::: ${TASKS[@]} ::: ${SEEDS[@]}
+fi

Original file line number	Diff line number	Diff line change
`@@ -23,6 +23,7 @@`
`23`	`23`	`"td7": TD7Agent,`
`24`	`24`	`"sdac": SDACAgent,`
`25`	`25`	`"dpmd": DPMDAgent,`
	`26`	`+ "qsm": QSMAgent,`
`26`	`27`	`}`
`27`	`28`
`28`	`29`	`class OffPolicyTrainer():`