typoverflow
diff --git a/‎examples/online/config/dmc/algo/aca.yaml‎
Lines changed: 1 addition & 1 deletion b/‎examples/online/config/dmc/algo/aca.yaml‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎examples/toy2d/config/algo/aca.yaml‎
Lines changed: 40 additions & 0 deletions b/‎examples/toy2d/config/algo/aca.yaml‎
Lines changed: 40 additions & 0 deletions
diff --git a/‎examples/toy2d/config/algo/qsm.yaml‎
Lines changed: 23 additions & 0 deletions b/‎examples/toy2d/config/algo/qsm.yaml‎
Lines changed: 23 additions & 0 deletions
diff --git a/‎examples/toy2d/config/algo/sdac.yaml‎
Lines changed: 23 additions & 0 deletions b/‎examples/toy2d/config/algo/sdac.yaml‎
Lines changed: 23 additions & 0 deletions
diff --git a/‎examples/toy2d/main_toy2d.py‎
Lines changed: 13 additions & 9 deletions b/‎examples/toy2d/main_toy2d.py‎
Lines changed: 13 additions & 9 deletions
diff --git a/‎examples/toy2d/utils.py‎
Lines changed: 47 additions & 1 deletion b/‎examples/toy2d/utils.py‎
Lines changed: 47 additions & 1 deletion
@@ -38,4 +38,4 @@ algo:
   linear: false
   ranking: true
 
-norm_obs: true
+# norm_obs: true
@@ -0,0 +1,40 @@
+# @package _global_
+
+algo:
+  name: aca
+  target_update_freq: 1
+  feature_dim: 512
+  rff_dim: 1024
+  critic_hidden_dims: [512, 512]
+  reward_hidden_dims: [512, 512]
+  phi_hidden_dims: [512, 512]
+  mu_hidden_dims: [512, 512]
+  ctrl_coef: 1.0
+  reward_coef: 1.0
+  critic_coef: 1.0
+  critic_activation: elu  # not used
+  back_critic_grad: false
+  feature_lr: 0.0001
+  critic_lr: 0.0003
+  discount: 0.99
+  num_samples: 10
+  ema: 0.005
+  feature_ema: 0.005
+  clip_grad_norm: null
+  temp: 0.2
+  diffusion:
+    time_dim: 64
+    mlp_hidden_dims: [512, 512, 512]
+    lr: 0.0003
+    end_lr: null
+    lr_decay_steps: null
+    lr_decay_begin: null
+    steps: 20
+    clip_sampler: true
+    x_min: -5.0
+    x_max: 5.0
+    # solver: ddpm
+    solver: ddpm
+  num_noises: 25
+  linear: false
+  ranking: true
@@ -0,0 +1,23 @@
+# @package _global_
+
+algo:
+  name: qsm
+  critic_hidden_dims: [512, 512, 512]
+  critic_activation: elu
+  critic_lr: 0.0003
+  discount: 0.99
+  num_samples: 10
+  ema: 0.005
+  temp: 0.2
+  diffusion:
+    time_dim: 64
+    mlp_hidden_dims: [512, 512, 512]
+    lr: 0.0003
+    end_lr: null
+    lr_decay_steps: null
+    lr_decay_begin: null
+    steps: 20
+    clip_sampler: true
+    x_min: -5.0
+    x_max: 5.0
+    solver: ddpm
@@ -0,0 +1,23 @@
+# @package _global_
+
+algo:
+  name: sdac
+  critic_hidden_dims: [256, 256]
+  critic_lr: 0.0003
+  discount: 0.99
+  num_samples: 10
+  num_reverse_samples: 500
+  ema: 0.005
+  temp: 0.2
+  diffusion:
+    time_dim: 64
+    mlp_hidden_dims: [256, 256]
+    lr: 0.0003
+    end_lr: null
+    lr_decay_steps: null
+    lr_decay_begin: null
+    steps: 20
+    clip_sampler: false
+    x_min: -1.0
+    x_max: 1.0
+    solver: ddpm
@@ -3,10 +3,10 @@
 
 import hydra
 import omegaconf
-import wandb
 from omegaconf import OmegaConf
 from tqdm import trange
 
+import wandb
 from examples.toy2d.utils import compute_metrics, plot_data, plot_energy, plot_sample
 from flowrl.agent.offline import *
 from flowrl.agent.online import *
@@ -19,6 +19,9 @@
 SUPPORTED_AGENTS: Dict[str, Type[BaseAgent]] = {
     "bdpo": BDPOAgent,
     "dac": DACAgent,
+    "qsm": QSMAgent,
+    "sdac": SDACAgent,
+    "aca": ACAAgent,
 }
 
 class Trainer():
@@ -30,14 +33,15 @@ def __init__(self, cfg: Config):
             log_dir="/".join([cfg.log.dir, cfg.algo.name, cfg.log.tag, cfg.task]),
             name="seed"+str(cfg.seed),
             logger_config={
-                "TensorboardLogger": {"activate": True},
-                "WandbLogger": {
-                    "activate": True,
-                    "config": OmegaConf.to_container(cfg),
-                    "settings": wandb.Settings(_disable_stats=True),
-                    "project": cfg.log.project,
-                    "entity": cfg.log.entity
-                } if ("project" in cfg.log and "entity" in cfg.log) else {"activate": False},
+                "CsvLogger": {"activate": True},
+                # "TensorboardLogger": {"activate": True},
+                # "WandbLogger": {
+                #     "activate": True,
+                #     "config": OmegaConf.to_container(cfg),
+                #     "settings": wandb.Settings(_disable_stats=True),
+                #     "project": cfg.log.project,
+                #     "entity": cfg.log.entity
+                # } if ("project" in cfg.log and "entity" in cfg.log) else {"activate": False},
             }
         )
         self.ckpt_save_dir = os.path.join(self.logger.log_dir, "ckpt")
 
@@ -9,6 +9,7 @@
 
 from flowrl.agent.base import BaseAgent
 from flowrl.agent.offline import *
+from flowrl.agent.online import *
 from flowrl.dataset.toy2d import Toy2dDataset, inf_train_gen
 
 SAMPLE_GRAPH_SIZE = 2000
@@ -108,6 +109,9 @@ def plot_energy(out_dir, task: str, agent: BaseAgent):
     vmin = e.min()
     vmax = e.max()
 
+    def default_plot():
+        pass
+
     def bdpo_plot():
         tt = [0, 1, 3, 5, 10, 20, 30, 40, 50]
         plt.figure(figsize=(30, 3.0))
@@ -168,12 +172,54 @@ def dac_plot():
         plt.close()
         tqdm.write(f"Saved value plot to {saveto}")
 
+    def aca_plot():
+        tt = [0, 1, 3, 5, 10, 20]
+        plt.figure(figsize=(20, 3.0))
+        axes = []
+        for i, t in enumerate(tt):
+            plt.subplot(1, len(tt), i+1)
+            if t == 0:
+                model = agent.critic_target
+                c = model(zero, id_matrix).mean(axis=0).reshape(90, 90)
+            else:
+                model = agent.value_target
+                t_input = np.ones((90*90, 1)) * t
+                c = model(zero, id_matrix, t_input).mean(axis=0).reshape(90, 90)
+            plt.gca().set_aspect("equal", adjustable="box")
+            plt.xlim(0, 89)
+            plt.ylim(0, 89)
+            if i == 0:
+                mappable = plt.imshow(
+                    c, origin="lower", vmin=vmin, vmax=vmax,
+                    cmap="viridis", rasterized=True
+                )
+                plt.yticks(ticks=[5, 25, 45, 65, 85], labels=[-4, -2, 0, 2, 4])
+            else:
+                plt.imshow(
+                    c, origin="lower", vmin=vmin, vmax=vmax,
+                    cmap="viridis", rasterized=True
+                )
+                plt.yticks(ticks=[5, 25, 45, 65, 85], labels=[None, None, None, None, None])
+
+            axes.append(plt.gca())
+            plt.xticks(ticks=[5, 25, 45, 65, 85], labels=[-4, -2, 0, 2, 4])
+            plt.title(f't={t}')
+        plt.tight_layout()
+        cbar = plt.gcf().colorbar(mappable, ax=axes, fraction=0.1, pad=0.02, aspect=12)
+        plt.gcf().axes[-1].yaxis.set_major_formatter(matplotlib.ticker.FormatStrFormatter('%.1f'))
+        saveto = os.path.join(out_dir, "qt_space.png")
+        plt.savefig(saveto, dpi=300)
+        plt.close()
+        tqdm.write(f"Saved value plot to {saveto}")
+
     if isinstance(agent, BDPOAgent):
         bdpo_plot()
     elif isinstance(agent, DACAgent):
         dac_plot()
+    elif isinstance(agent, ACAAgent):
+        aca_plot()
     else:
-        raise NotImplementedError(f"Plotting for {type(agent)} is not implemented")
+        default_plot()